diff --git a/scraping/scrapingMain.py b/scraping/scrapingMain.py index c4eae8d..985ffd2 100644 --- a/scraping/scrapingMain.py +++ b/scraping/scrapingMain.py @@ -1,3 +1,4 @@ +import csv from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By @@ -5,6 +6,7 @@ from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from webdriver_manager.chrome import ChromeDriverManager + def init_driver(): options = webdriver.ChromeOptions() options.add_argument("--start-maximized") @@ -60,8 +62,21 @@ def scrape_all_pages(base_url, max_pages=5): return all_laptops_specs +def save_to_csv(data, filename): + # Сбор всех уникальных заголовков + fieldnames = set() + for row in data: + fieldnames.update(row.keys()) + + # Сохранение данных в CSV + with open(filename, mode='w', newline='', encoding='utf-8') as file: + writer = csv.DictWriter(file, fieldnames=fieldnames) + writer.writeheader() # Записываем заголовок + for row in data: + writer.writerow(row) + if __name__ == "__main__": url = 'https://www.citilink.ru/catalog/noutbuki/?ref=mainpage' - laptops = scrape_all_pages(url, max_pages=5) # Установите количество страниц - for i, specs in enumerate(laptops, 1): - print(f'Ноутбук {i}: {specs}') + laptops = scrape_all_pages(url, max_pages=2) # Установите количество страниц + save_to_csv(laptops, 'laptops_specs.csv') # Сохраняем в CSV файл + print(f"Данные сохранены в файл 'laptops_specs.csv'.")