features: add proxy checking

This commit is contained in:
Emelyanov535 2024-11-13 13:28:14 +04:00
parent 1df7dc94b8
commit c4bb7a5ffa
9 changed files with 3580 additions and 65 deletions

View File

@ -1,5 +1,6 @@
package ru.pricepulse.parsingservice.config;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.http.HttpRequest;
import org.springframework.http.client.ClientHttpRequestExecution;
@ -10,32 +11,26 @@ import java.io.IOException;
import java.net.InetSocketAddress;
@Slf4j
@AllArgsConstructor
public class DynamicProxyInterceptor implements ClientHttpRequestInterceptor {
private final UserAgentProvider userAgentProvider;
private final ProxyProvider proxyProvider;
public DynamicProxyInterceptor(UserAgentProvider userAgentProvider, ProxyProvider proxyProvider) {
this.userAgentProvider = userAgentProvider;
this.proxyProvider = proxyProvider;
}
@Override
public ClientHttpResponse intercept(HttpRequest request, byte[] body, ClientHttpRequestExecution execution) throws IOException {
// Получаем случайный прокси
InetSocketAddress proxyAddress = proxyProvider.getRandomProxy();
InetSocketAddress proxyAddress = proxyProvider.getNextProxy();
log.info("Используемый прокси: {}:{}", proxyAddress.getHostName(), proxyAddress.getPort());
// Устанавливаем прокси
System.setProperty("http.proxyHost", proxyAddress.getHostName());
System.setProperty("http.proxyPort", String.valueOf(proxyAddress.getPort()));
// Устанавливаем динамический user-agent
//Устанавливаем динамический user-agent
String randomUserAgent = userAgentProvider.getRandomUserAgent();
log.info("Используемый User-Agent: {}", randomUserAgent);
request.getHeaders().set("User-Agent", randomUserAgent);
// Выполняем запрос
return execution.execute(request, body);
}
}

View File

@ -1,21 +1,45 @@
package ru.pricepulse.parsingservice.config;
import jakarta.annotation.PostConstruct;
import org.springframework.stereotype.Component;
import java.net.InetSocketAddress;
import java.util.List;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.checkProxies;
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.readProxiesFromFile;
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.saveProxiesToFile;
@Component
public class ProxyProvider {
private static final List<String> proxies = List.of(
"85.215.64.49:80",
"82.115.19.142:80",
"148.113.172.51:8080"
);
private List<String> workingProxies;
private static final String PROXY_FILE_PATH = "/home/forever/УлГТУ/Платформы/parsing-service/src/main/resources/proxy.txt";
private static final String WORKING_PROXY_FILE_PATH = "/home/forever/УлГТУ/Платформы/parsing-service/src/main/resources/ok-proxy.txt";
public InetSocketAddress getRandomProxy() {
String[] proxy = proxies.get(new Random().nextInt(proxies.size())).split(":");
private final AtomicInteger currentProxyIndex = new AtomicInteger(0);
@PostConstruct
public void init() {
List<String> proxies = readProxiesFromFile(PROXY_FILE_PATH);
System.out.println("Начата проверка проксей");
workingProxies = checkProxies(proxies);
System.out.println("Закончена проверка проксей");
saveProxiesToFile(workingProxies, WORKING_PROXY_FILE_PATH);
if (workingProxies.isEmpty()) {
throw new RuntimeException("Нет доступных рабочих прокси.");
}
System.out.println("Найдено рабочих прокси: " + workingProxies.size());
}
public synchronized InetSocketAddress getNextProxy() {
// Получаем текущий индекс прокси
int currentIndex = currentProxyIndex.getAndUpdate(index -> (index + 1) % workingProxies.size());
String[] proxy = workingProxies.get(currentIndex).split(":");
return new InetSocketAddress(proxy[0], Integer.parseInt(proxy[1]));
}
}

View File

@ -6,6 +6,8 @@ import org.springframework.context.annotation.Configuration;
import org.springframework.http.client.ClientHttpRequestInterceptor;
import org.springframework.web.client.RestTemplate;
import java.util.Collections;
@Configuration
@AllArgsConstructor
public class RestTemplateConfig {
@ -19,9 +21,8 @@ public class RestTemplateConfig {
ClientHttpRequestInterceptor dynamicProxyInterceptor = new DynamicProxyInterceptor(userAgentProvider, proxyProvider);
// Добавляем интерсептор в RestTemplate
//restTemplate.setInterceptors(Collections.singletonList(dynamicProxyInterceptor));
restTemplate.setInterceptors(Collections.singletonList(dynamicProxyInterceptor));
return restTemplate;
}
}

View File

@ -25,7 +25,7 @@ public class WebClientConfig {
return WebClient.builder()
.filter((request, next) -> {
// Получаем случайный прокси для каждого запроса
InetSocketAddress proxyAddress = proxyProvider.getRandomProxy();
InetSocketAddress proxyAddress = proxyProvider.getNextProxy();
log.info("Используемый прокси: {}:{}", proxyAddress.getHostName(), proxyAddress.getPort());
HttpClient httpClient = HttpClient.create()

View File

@ -0,0 +1,114 @@
package ru.pricepulse.parsingservice.wildberries_parser.proxy;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.stream.Collectors;
public class ProxyChecker {
private static final int TIMEOUT = 2000;
private static final int THREAD_COUNT = 30;
public static List<String> readProxiesFromFile(String filePath) {
try {
return Files.readAllLines(Paths.get(filePath));
} catch (IOException e) {
System.err.println("Ошибка при чтении файла: " + e.getMessage());
return new ArrayList<>();
}
}
public static List<String> checkProxies(List<String> proxies) {
ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT);
List<Future<String>> futures = new ArrayList<>();
// Отправляем задачи проверки прокси в пул потоков
for (String proxyAddress : proxies) {
futures.add(executor.submit(() -> isProxyWorking(proxyAddress) ? proxyAddress : null));
}
// Получаем результаты выполнения
List<String> workingProxies = futures.stream()
.map(future -> {
try {
return future.get();
} catch (Exception e) {
System.err.println("Ошибка при получении результата проверки прокси: " + e.getMessage());
return null;
}
})
.filter(proxy -> proxy != null)
.collect(Collectors.toList());
executor.shutdown(); // Завершаем работу пула потоков
return workingProxies;
}
private static boolean isProxyWorking(String proxyAddress) {
String[] parts = proxyAddress.split(":");
if (parts.length != 2) {
System.err.println("Некорректный формат прокси: " + proxyAddress);
return false;
}
String ip = parts[0];
int port;
try {
port = Integer.parseInt(parts[1]);
} catch (NumberFormatException e) {
System.err.println("Некорректный порт у прокси: " + proxyAddress);
return false;
}
try {
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(ip, port));
URL url = new URL("http://www.google.com");
HttpURLConnection connection = (HttpURLConnection) url.openConnection(proxy);
connection.setConnectTimeout(TIMEOUT);
connection.setReadTimeout(TIMEOUT);
connection.setRequestMethod("GET");
connection.connect();
int responseCode = connection.getResponseCode();
if (
responseCode == 200
// responseCode == 403 ||
// responseCode == 500 ||
// responseCode == 407 ||
// responseCode == 501
) {
System.out.println("Прокси работает (код ответа " + responseCode + "): " + proxyAddress);
return true;
} else {
System.out.println("Прокси не отвечает (код ответа " + responseCode + "): " + proxyAddress);
return false;
}
} catch (IOException e) {
System.out.println("Прокси не отвечает: " + proxyAddress);
return false;
}
}
public static void saveProxiesToFile(List<String> proxies, String filePath) {
try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath))) {
for (String proxy : proxies) {
writer.write(proxy);
writer.newLine();
}
} catch (IOException e) {
System.err.println("Ошибка при записи в файл: " + e.getMessage());
}
}
}

View File

@ -61,8 +61,7 @@ public class ParsingService {
});
productService.saveData(productEntities, priceHistories);
page++;
// } while (page <= totalPages);
} while (page <= 5);
} while (page <= totalPages);
}
private List<ProductInfoDto> convertMapObjectToListProductInfoDto(Map<String, Object> map) {

View File

@ -5,13 +5,12 @@ import lombok.extern.slf4j.Slf4j;
import org.springframework.core.ParameterizedTypeReference;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpMethod;
import org.springframework.retry.annotation.Recover;
import org.springframework.http.ResponseEntity;
import org.springframework.retry.annotation.Retryable;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;
import ru.pricepulse.parsingservice.config.MarketplacesConfig;
import java.util.Collections;
import java.util.Map;
@AllArgsConstructor
@ -22,7 +21,6 @@ public class ClientImpl implements Client {
private final RestTemplate restTemplate;
private final MarketplacesConfig marketplacesConfig;
@Override
@Retryable(maxAttempts = 50, value = RuntimeException.class)
public Map<String, Object> scrapPage(int page, String shard, String query) {
@ -30,49 +28,15 @@ public class ClientImpl implements Client {
shard +
query +
"?dest=-1257786&page=" + page + "&subject=2290";
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
}
return restTemplate.exchange(
ResponseEntity<Map<String, Object>> response = restTemplate.exchange(
url,
HttpMethod.GET,
HttpEntity.EMPTY,
new ParameterizedTypeReference<Map<String, Object>>() {}
).getBody();
new ParameterizedTypeReference<>() {
}
);
return response.getBody();
}
@Recover
public Map<String, Object> recover(RuntimeException e, int page, String shard, String query) {
// Логика обработки неудачи после всех попыток
log.error("Все попытки завершились неудачей: {}", e.getMessage());
// Можно вернуть пустую карту или другое значение по умолчанию
return Collections.emptyMap();
}
// @Override
// public Map<String, Object> scrapPage(int page, String shard, String query) {
// String url = marketplacesConfig.getWildberriesConfigProperties().getCatalogWbUrl() +
// shard +
// query +
// "?dest=-1257786&page=" + page + "&subject=2290";
//
// try {
// TimeUnit.MILLISECONDS.sleep(new Random().nextInt(1000) + 500);
// } catch (InterruptedException e) {
// Thread.currentThread().interrupt();
// }
//
// return webClient.get()
// .uri(url)
// .retrieve()
// .bodyToMono(new ParameterizedTypeReference<Map<String, Object>>() {
// })
// .retry(50)
// .block();
// }
}

View File

@ -0,0 +1,135 @@
97.74.87.226:80
54.248.238.110:80
3.141.217.225:80
127.0.0.7:80
49.12.235.70:8081
13.38.176.104:3128
46.51.249.135:3128
162.223.90.130:80
133.186.144.112:8080
51.210.54.186:80
101.108.123.39:8080
3.130.65.162:3128
80.249.112.162:80
3.126.147.182:3128
110.164.191.211:80
13.208.56.180:80
31.207.38.66:80
116.203.27.109:80
13.36.104.85:80
18.228.198.164:3128
3.123.150.192:3128
8.219.97.248:80
149.102.233.167:8081
202.162.105.202:80
165.22.77.86:80
154.205.128.153:8888
154.65.39.8:80
3.124.133.93:3128
31.40.248.2:8080
188.253.112.218:80
3.71.239.218:80
159.223.92.147:8888
3.78.92.159:3128
54.92.168.145:8080
50.62.183.223:80
123.30.154.171:7777
43.200.77.128:3128
35.76.62.196:80
204.57.112.5:80
15.235.153.57:8089
54.152.3.36:80
47.74.152.29:8888
0.0.0.0:80
13.59.156.167:80
3.127.62.252:80
35.79.120.242:3128
3.212.148.199:80
3.122.84.99:3128
45.92.177.60:8080
23.95.216.78:34561
82.180.146.116:3128
52.67.10.183:80
172.191.74.198:8080
13.37.59.99:3128
148.66.6.213:80
18.134.236.231:80
3.130.65.162:80
103.153.154.6:80
109.236.83.153:8888
78.32.2.82:8080
3.9.71.167:1080
35.72.118.126:80
46.47.197.210:3128
13.37.73.214:80
13.37.89.201:80
110.12.211.140:80
154.90.55.37:80
152.89.246.197:8080
3.37.125.76:3128
44.218.183.55:80
18.135.133.116:3128
52.196.1.182:80
94.72.152.254:80
3.123.150.192:80
196.11.183.160:8080
18.133.16.21:80
3.12.144.146:80
49.13.173.87:80
13.56.192.187:80
161.35.49.68:80
13.37.59.99:80
3.122.84.99:80
158.140.139.11:58100
148.66.6.210:80
153.19.91.77:80
189.22.234.41:80
52.67.10.183:3128
41.59.90.171:80
43.132.219.102:80
13.40.46.249:1088
16.163.149.249:80
3.71.239.218:3128
13.36.113.81:3128
60.242.169.3:80
49.13.173.87:8081
35.176.148.8:1080
18.135.133.116:80
13.37.89.201:3128
3.127.121.101:80
35.178.104.4:80
182.72.203.246:80
13.40.239.130:1080
65.108.207.6:80
18.223.25.15:80
54.233.119.172:3128
66.97.37.164:80
3.78.92.159:80
110.168.213.172:8080
49.12.235.70:80
94.156.250.169:20128
15.236.106.236:3128
13.38.153.36:80
178.128.199.145:80
156.67.217.159:80
148.66.6.211:80
13.36.87.105:3128
3.126.147.182:80
51.222.155.142:80
141.145.214.176:80
184.169.154.119:80
5.255.113.61:80
3.124.133.93:80
3.127.121.101:3128
148.66.6.212:80
176.9.239.181:80
63.35.64.177:3128
18.169.83.87:1080
148.66.6.214:80
18.228.149.161:80
18.228.198.164:80
106.105.118.250:80
103.174.102.127:80
162.0.238.147:80
103.127.1.130:80
185.233.187.103:80

File diff suppressed because it is too large Load Diff