From 2934ae7c3213cb688b2e4bfe69f70dcc6ea122d1 Mon Sep 17 00:00:00 2001 From: IsmailovRovshan Date: Mon, 2 Dec 2024 00:09:42 +0400 Subject: [PATCH] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5?= =?UTF-8?q?=D0=BD=D0=B8=D0=B5=20=D1=81=D0=BE=D1=85=D1=80=D0=B0=D0=BD=D0=B5?= =?UTF-8?q?=D0=BD=D0=B8=D1=8F=20=D0=B4=D0=B0=D0=BD=D0=BD=D1=8B=D1=85=20?= =?UTF-8?q?=D0=B2=20CSV?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scraping/scrapingMain.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/scraping/scrapingMain.py b/scraping/scrapingMain.py index c4eae8d..985ffd2 100644 --- a/scraping/scrapingMain.py +++ b/scraping/scrapingMain.py @@ -1,3 +1,4 @@ +import csv from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By @@ -5,6 +6,7 @@ from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from webdriver_manager.chrome import ChromeDriverManager + def init_driver(): options = webdriver.ChromeOptions() options.add_argument("--start-maximized") @@ -60,8 +62,21 @@ def scrape_all_pages(base_url, max_pages=5): return all_laptops_specs +def save_to_csv(data, filename): + # Сбор всех уникальных заголовков + fieldnames = set() + for row in data: + fieldnames.update(row.keys()) + + # Сохранение данных в CSV + with open(filename, mode='w', newline='', encoding='utf-8') as file: + writer = csv.DictWriter(file, fieldnames=fieldnames) + writer.writeheader() # Записываем заголовок + for row in data: + writer.writerow(row) + if __name__ == "__main__": url = 'https://www.citilink.ru/catalog/noutbuki/?ref=mainpage' - laptops = scrape_all_pages(url, max_pages=5) # Установите количество страниц - for i, specs in enumerate(laptops, 1): - print(f'Ноутбук {i}: {specs}') + laptops = scrape_all_pages(url, max_pages=2) # Установите количество страниц + save_to_csv(laptops, 'laptops_specs.csv') # Сохраняем в CSV файл + print(f"Данные сохранены в файл 'laptops_specs.csv'.")