features: add proxy checking
This commit is contained in:
parent
1df7dc94b8
commit
c4bb7a5ffa
@ -1,5 +1,6 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.http.HttpRequest;
|
||||
import org.springframework.http.client.ClientHttpRequestExecution;
|
||||
@ -10,32 +11,26 @@ import java.io.IOException;
|
||||
import java.net.InetSocketAddress;
|
||||
|
||||
@Slf4j
|
||||
@AllArgsConstructor
|
||||
public class DynamicProxyInterceptor implements ClientHttpRequestInterceptor {
|
||||
|
||||
private final UserAgentProvider userAgentProvider;
|
||||
private final ProxyProvider proxyProvider;
|
||||
|
||||
public DynamicProxyInterceptor(UserAgentProvider userAgentProvider, ProxyProvider proxyProvider) {
|
||||
this.userAgentProvider = userAgentProvider;
|
||||
this.proxyProvider = proxyProvider;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ClientHttpResponse intercept(HttpRequest request, byte[] body, ClientHttpRequestExecution execution) throws IOException {
|
||||
// Получаем случайный прокси
|
||||
InetSocketAddress proxyAddress = proxyProvider.getRandomProxy();
|
||||
InetSocketAddress proxyAddress = proxyProvider.getNextProxy();
|
||||
log.info("Используемый прокси: {}:{}", proxyAddress.getHostName(), proxyAddress.getPort());
|
||||
|
||||
// Устанавливаем прокси
|
||||
System.setProperty("http.proxyHost", proxyAddress.getHostName());
|
||||
System.setProperty("http.proxyPort", String.valueOf(proxyAddress.getPort()));
|
||||
|
||||
// Устанавливаем динамический user-agent
|
||||
//Устанавливаем динамический user-agent
|
||||
String randomUserAgent = userAgentProvider.getRandomUserAgent();
|
||||
log.info("Используемый User-Agent: {}", randomUserAgent);
|
||||
request.getHeaders().set("User-Agent", randomUserAgent);
|
||||
|
||||
// Выполняем запрос
|
||||
return execution.execute(request, body);
|
||||
}
|
||||
}
|
||||
|
@ -1,21 +1,45 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import jakarta.annotation.PostConstruct;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.net.InetSocketAddress;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.checkProxies;
|
||||
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.readProxiesFromFile;
|
||||
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.saveProxiesToFile;
|
||||
|
||||
@Component
|
||||
public class ProxyProvider {
|
||||
private static final List<String> proxies = List.of(
|
||||
"85.215.64.49:80",
|
||||
"82.115.19.142:80",
|
||||
"148.113.172.51:8080"
|
||||
);
|
||||
private List<String> workingProxies;
|
||||
private static final String PROXY_FILE_PATH = "/home/forever/УлГТУ/Платформы/parsing-service/src/main/resources/proxy.txt";
|
||||
private static final String WORKING_PROXY_FILE_PATH = "/home/forever/УлГТУ/Платформы/parsing-service/src/main/resources/ok-proxy.txt";
|
||||
|
||||
public InetSocketAddress getRandomProxy() {
|
||||
String[] proxy = proxies.get(new Random().nextInt(proxies.size())).split(":");
|
||||
private final AtomicInteger currentProxyIndex = new AtomicInteger(0);
|
||||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
List<String> proxies = readProxiesFromFile(PROXY_FILE_PATH);
|
||||
System.out.println("Начата проверка проксей");
|
||||
workingProxies = checkProxies(proxies);
|
||||
System.out.println("Закончена проверка проксей");
|
||||
|
||||
saveProxiesToFile(workingProxies, WORKING_PROXY_FILE_PATH);
|
||||
|
||||
if (workingProxies.isEmpty()) {
|
||||
throw new RuntimeException("Нет доступных рабочих прокси.");
|
||||
}
|
||||
|
||||
System.out.println("Найдено рабочих прокси: " + workingProxies.size());
|
||||
}
|
||||
|
||||
public synchronized InetSocketAddress getNextProxy() {
|
||||
// Получаем текущий индекс прокси
|
||||
int currentIndex = currentProxyIndex.getAndUpdate(index -> (index + 1) % workingProxies.size());
|
||||
|
||||
String[] proxy = workingProxies.get(currentIndex).split(":");
|
||||
return new InetSocketAddress(proxy[0], Integer.parseInt(proxy[1]));
|
||||
}
|
||||
}
|
||||
|
@ -6,6 +6,8 @@ import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.http.client.ClientHttpRequestInterceptor;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
import java.util.Collections;
|
||||
|
||||
@Configuration
|
||||
@AllArgsConstructor
|
||||
public class RestTemplateConfig {
|
||||
@ -19,9 +21,8 @@ public class RestTemplateConfig {
|
||||
ClientHttpRequestInterceptor dynamicProxyInterceptor = new DynamicProxyInterceptor(userAgentProvider, proxyProvider);
|
||||
|
||||
// Добавляем интерсептор в RestTemplate
|
||||
//restTemplate.setInterceptors(Collections.singletonList(dynamicProxyInterceptor));
|
||||
restTemplate.setInterceptors(Collections.singletonList(dynamicProxyInterceptor));
|
||||
|
||||
return restTemplate;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -25,7 +25,7 @@ public class WebClientConfig {
|
||||
return WebClient.builder()
|
||||
.filter((request, next) -> {
|
||||
// Получаем случайный прокси для каждого запроса
|
||||
InetSocketAddress proxyAddress = proxyProvider.getRandomProxy();
|
||||
InetSocketAddress proxyAddress = proxyProvider.getNextProxy();
|
||||
log.info("Используемый прокси: {}:{}", proxyAddress.getHostName(), proxyAddress.getPort());
|
||||
|
||||
HttpClient httpClient = HttpClient.create()
|
||||
|
@ -0,0 +1,114 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.proxy;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.net.Proxy;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class ProxyChecker {
|
||||
private static final int TIMEOUT = 2000;
|
||||
private static final int THREAD_COUNT = 30;
|
||||
|
||||
public static List<String> readProxiesFromFile(String filePath) {
|
||||
try {
|
||||
return Files.readAllLines(Paths.get(filePath));
|
||||
} catch (IOException e) {
|
||||
System.err.println("Ошибка при чтении файла: " + e.getMessage());
|
||||
return new ArrayList<>();
|
||||
}
|
||||
}
|
||||
|
||||
public static List<String> checkProxies(List<String> proxies) {
|
||||
ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT);
|
||||
List<Future<String>> futures = new ArrayList<>();
|
||||
|
||||
// Отправляем задачи проверки прокси в пул потоков
|
||||
for (String proxyAddress : proxies) {
|
||||
futures.add(executor.submit(() -> isProxyWorking(proxyAddress) ? proxyAddress : null));
|
||||
}
|
||||
|
||||
// Получаем результаты выполнения
|
||||
List<String> workingProxies = futures.stream()
|
||||
.map(future -> {
|
||||
try {
|
||||
return future.get();
|
||||
} catch (Exception e) {
|
||||
System.err.println("Ошибка при получении результата проверки прокси: " + e.getMessage());
|
||||
return null;
|
||||
}
|
||||
})
|
||||
.filter(proxy -> proxy != null)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
executor.shutdown(); // Завершаем работу пула потоков
|
||||
return workingProxies;
|
||||
}
|
||||
|
||||
private static boolean isProxyWorking(String proxyAddress) {
|
||||
String[] parts = proxyAddress.split(":");
|
||||
if (parts.length != 2) {
|
||||
System.err.println("Некорректный формат прокси: " + proxyAddress);
|
||||
return false;
|
||||
}
|
||||
|
||||
String ip = parts[0];
|
||||
int port;
|
||||
|
||||
try {
|
||||
port = Integer.parseInt(parts[1]);
|
||||
} catch (NumberFormatException e) {
|
||||
System.err.println("Некорректный порт у прокси: " + proxyAddress);
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(ip, port));
|
||||
URL url = new URL("http://www.google.com");
|
||||
HttpURLConnection connection = (HttpURLConnection) url.openConnection(proxy);
|
||||
connection.setConnectTimeout(TIMEOUT);
|
||||
connection.setReadTimeout(TIMEOUT);
|
||||
connection.setRequestMethod("GET");
|
||||
connection.connect();
|
||||
|
||||
int responseCode = connection.getResponseCode();
|
||||
if (
|
||||
responseCode == 200
|
||||
// responseCode == 403 ||
|
||||
// responseCode == 500 ||
|
||||
// responseCode == 407 ||
|
||||
// responseCode == 501
|
||||
) {
|
||||
System.out.println("Прокси работает (код ответа " + responseCode + "): " + proxyAddress);
|
||||
return true;
|
||||
} else {
|
||||
System.out.println("Прокси не отвечает (код ответа " + responseCode + "): " + proxyAddress);
|
||||
return false;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.out.println("Прокси не отвечает: " + proxyAddress);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static void saveProxiesToFile(List<String> proxies, String filePath) {
|
||||
try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath))) {
|
||||
for (String proxy : proxies) {
|
||||
writer.write(proxy);
|
||||
writer.newLine();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.err.println("Ошибка при записи в файл: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
@ -61,8 +61,7 @@ public class ParsingService {
|
||||
});
|
||||
productService.saveData(productEntities, priceHistories);
|
||||
page++;
|
||||
// } while (page <= totalPages);
|
||||
} while (page <= 5);
|
||||
} while (page <= totalPages);
|
||||
}
|
||||
|
||||
private List<ProductInfoDto> convertMapObjectToListProductInfoDto(Map<String, Object> map) {
|
||||
|
@ -5,13 +5,12 @@ import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.core.ParameterizedTypeReference;
|
||||
import org.springframework.http.HttpEntity;
|
||||
import org.springframework.http.HttpMethod;
|
||||
import org.springframework.retry.annotation.Recover;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.retry.annotation.Retryable;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
import ru.pricepulse.parsingservice.config.MarketplacesConfig;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
||||
@AllArgsConstructor
|
||||
@ -22,7 +21,6 @@ public class ClientImpl implements Client {
|
||||
private final RestTemplate restTemplate;
|
||||
private final MarketplacesConfig marketplacesConfig;
|
||||
|
||||
|
||||
@Override
|
||||
@Retryable(maxAttempts = 50, value = RuntimeException.class)
|
||||
public Map<String, Object> scrapPage(int page, String shard, String query) {
|
||||
@ -30,49 +28,15 @@ public class ClientImpl implements Client {
|
||||
shard +
|
||||
query +
|
||||
"?dest=-1257786&page=" + page + "&subject=2290";
|
||||
try {
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
return restTemplate.exchange(
|
||||
ResponseEntity<Map<String, Object>> response = restTemplate.exchange(
|
||||
url,
|
||||
HttpMethod.GET,
|
||||
HttpEntity.EMPTY,
|
||||
new ParameterizedTypeReference<Map<String, Object>>() {}
|
||||
).getBody();
|
||||
new ParameterizedTypeReference<>() {
|
||||
}
|
||||
);
|
||||
|
||||
return response.getBody();
|
||||
}
|
||||
|
||||
@Recover
|
||||
public Map<String, Object> recover(RuntimeException e, int page, String shard, String query) {
|
||||
// Логика обработки неудачи после всех попыток
|
||||
log.error("Все попытки завершились неудачей: {}", e.getMessage());
|
||||
// Можно вернуть пустую карту или другое значение по умолчанию
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
|
||||
|
||||
// @Override
|
||||
// public Map<String, Object> scrapPage(int page, String shard, String query) {
|
||||
// String url = marketplacesConfig.getWildberriesConfigProperties().getCatalogWbUrl() +
|
||||
// shard +
|
||||
// query +
|
||||
// "?dest=-1257786&page=" + page + "&subject=2290";
|
||||
//
|
||||
// try {
|
||||
// TimeUnit.MILLISECONDS.sleep(new Random().nextInt(1000) + 500);
|
||||
// } catch (InterruptedException e) {
|
||||
// Thread.currentThread().interrupt();
|
||||
// }
|
||||
//
|
||||
// return webClient.get()
|
||||
// .uri(url)
|
||||
// .retrieve()
|
||||
// .bodyToMono(new ParameterizedTypeReference<Map<String, Object>>() {
|
||||
// })
|
||||
// .retry(50)
|
||||
// .block();
|
||||
// }
|
||||
}
|
||||
|
135
parsing-service/src/main/resources/ok-proxy.txt
Normal file
135
parsing-service/src/main/resources/ok-proxy.txt
Normal file
@ -0,0 +1,135 @@
|
||||
97.74.87.226:80
|
||||
54.248.238.110:80
|
||||
3.141.217.225:80
|
||||
127.0.0.7:80
|
||||
49.12.235.70:8081
|
||||
13.38.176.104:3128
|
||||
46.51.249.135:3128
|
||||
162.223.90.130:80
|
||||
133.186.144.112:8080
|
||||
51.210.54.186:80
|
||||
101.108.123.39:8080
|
||||
3.130.65.162:3128
|
||||
80.249.112.162:80
|
||||
3.126.147.182:3128
|
||||
110.164.191.211:80
|
||||
13.208.56.180:80
|
||||
31.207.38.66:80
|
||||
116.203.27.109:80
|
||||
13.36.104.85:80
|
||||
18.228.198.164:3128
|
||||
3.123.150.192:3128
|
||||
8.219.97.248:80
|
||||
149.102.233.167:8081
|
||||
202.162.105.202:80
|
||||
165.22.77.86:80
|
||||
154.205.128.153:8888
|
||||
154.65.39.8:80
|
||||
3.124.133.93:3128
|
||||
31.40.248.2:8080
|
||||
188.253.112.218:80
|
||||
3.71.239.218:80
|
||||
159.223.92.147:8888
|
||||
3.78.92.159:3128
|
||||
54.92.168.145:8080
|
||||
50.62.183.223:80
|
||||
123.30.154.171:7777
|
||||
43.200.77.128:3128
|
||||
35.76.62.196:80
|
||||
204.57.112.5:80
|
||||
15.235.153.57:8089
|
||||
54.152.3.36:80
|
||||
47.74.152.29:8888
|
||||
0.0.0.0:80
|
||||
13.59.156.167:80
|
||||
3.127.62.252:80
|
||||
35.79.120.242:3128
|
||||
3.212.148.199:80
|
||||
3.122.84.99:3128
|
||||
45.92.177.60:8080
|
||||
23.95.216.78:34561
|
||||
82.180.146.116:3128
|
||||
52.67.10.183:80
|
||||
172.191.74.198:8080
|
||||
13.37.59.99:3128
|
||||
148.66.6.213:80
|
||||
18.134.236.231:80
|
||||
3.130.65.162:80
|
||||
103.153.154.6:80
|
||||
109.236.83.153:8888
|
||||
78.32.2.82:8080
|
||||
3.9.71.167:1080
|
||||
35.72.118.126:80
|
||||
46.47.197.210:3128
|
||||
13.37.73.214:80
|
||||
13.37.89.201:80
|
||||
110.12.211.140:80
|
||||
154.90.55.37:80
|
||||
152.89.246.197:8080
|
||||
3.37.125.76:3128
|
||||
44.218.183.55:80
|
||||
18.135.133.116:3128
|
||||
52.196.1.182:80
|
||||
94.72.152.254:80
|
||||
3.123.150.192:80
|
||||
196.11.183.160:8080
|
||||
18.133.16.21:80
|
||||
3.12.144.146:80
|
||||
49.13.173.87:80
|
||||
13.56.192.187:80
|
||||
161.35.49.68:80
|
||||
13.37.59.99:80
|
||||
3.122.84.99:80
|
||||
158.140.139.11:58100
|
||||
148.66.6.210:80
|
||||
153.19.91.77:80
|
||||
189.22.234.41:80
|
||||
52.67.10.183:3128
|
||||
41.59.90.171:80
|
||||
43.132.219.102:80
|
||||
13.40.46.249:1088
|
||||
16.163.149.249:80
|
||||
3.71.239.218:3128
|
||||
13.36.113.81:3128
|
||||
60.242.169.3:80
|
||||
49.13.173.87:8081
|
||||
35.176.148.8:1080
|
||||
18.135.133.116:80
|
||||
13.37.89.201:3128
|
||||
3.127.121.101:80
|
||||
35.178.104.4:80
|
||||
182.72.203.246:80
|
||||
13.40.239.130:1080
|
||||
65.108.207.6:80
|
||||
18.223.25.15:80
|
||||
54.233.119.172:3128
|
||||
66.97.37.164:80
|
||||
3.78.92.159:80
|
||||
110.168.213.172:8080
|
||||
49.12.235.70:80
|
||||
94.156.250.169:20128
|
||||
15.236.106.236:3128
|
||||
13.38.153.36:80
|
||||
178.128.199.145:80
|
||||
156.67.217.159:80
|
||||
148.66.6.211:80
|
||||
13.36.87.105:3128
|
||||
3.126.147.182:80
|
||||
51.222.155.142:80
|
||||
141.145.214.176:80
|
||||
184.169.154.119:80
|
||||
5.255.113.61:80
|
||||
3.124.133.93:80
|
||||
3.127.121.101:3128
|
||||
148.66.6.212:80
|
||||
176.9.239.181:80
|
||||
63.35.64.177:3128
|
||||
18.169.83.87:1080
|
||||
148.66.6.214:80
|
||||
18.228.149.161:80
|
||||
18.228.198.164:80
|
||||
106.105.118.250:80
|
||||
103.174.102.127:80
|
||||
162.0.238.147:80
|
||||
103.127.1.130:80
|
||||
185.233.187.103:80
|
3283
parsing-service/src/main/resources/proxy.txt
Normal file
3283
parsing-service/src/main/resources/proxy.txt
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user