price-builder-backend/scraping/scrapingMain.py

83 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import csv
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
def init_driver():
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
def scrape_all_pages(base_url, max_pages=5):
driver = init_driver()
all_laptops_specs = []
try:
driver.get(base_url)
current_page = 1
while current_page <= max_pages:
# Ожидание загрузки товаров
WebDriverWait(driver, 15).until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.app-catalog-1o4umte.ec53oil0'))
)
# Скрапинг товаров
laptop_blocks = driver.find_elements(By.CSS_SELECTOR, 'div.app-catalog-1o4umte.ec53oil0')
for laptop in laptop_blocks:
specs = {}
spec_items = laptop.find_elements(By.CSS_SELECTOR, 'li.app-catalog-12y5psc.e4qu3682')
for item in spec_items:
try:
label_elem = item.find_element(By.CSS_SELECTOR, 'span')
label = label_elem.text.strip()
value = item.text.replace(label, '').strip()
specs[label] = value
except Exception as e:
print(f"Ошибка в характеристике: {e}")
if specs:
all_laptops_specs.append(specs)
# Переход на следующую страницу
try:
next_button = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH,
'//*[@id="__next"]/div/main/section/div[2]/div/div/section/div[2]/div[3]/div/div[2]/div[3]/a/div'))
)
# Прокрутка к кнопке
driver.execute_script("arguments[0].scrollIntoView({block: 'center', inline: 'center'});", next_button)
next_button.click()
WebDriverWait(driver, 10).until(EC.staleness_of(laptop_blocks[0]))
current_page += 1 # Переход на следующую страницу
except Exception as e:
print("Кнопка 'Следующая' не найдена или конец каталога:", e)
break
finally:
driver.quit()
return all_laptops_specs
def save_to_csv(data, filename):
# Сбор всех уникальных заголовков
fieldnames = set()
for row in data:
fieldnames.update(row.keys())
# Сохранение данных в CSV
with open(filename, mode='w', newline='', encoding='utf-8') as file:
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader() # Записываем заголовок
for row in data:
writer.writerow(row)
if __name__ == "__main__":
url = 'https://www.citilink.ru/catalog/noutbuki/?ref=mainpage'
laptops = scrape_all_pages(url, max_pages=2) # Установите количество страниц
save_to_csv(laptops, 'laptops_specs.csv') # Сохраняем в CSV файл
print(f"Данные сохранены в файл 'laptops_specs.csv'.")