import json import pandas as pd class CityFilter: def __init__(self, json_file_path): self.json_file_path = json_file_path self.cities_airports = self.load_and_extract_cities_airports() def load_and_extract_cities_airports(self): data = self.load_json(self.json_file_path) cities_airports = {} for entry in data: parts = entry['label'].split(',') if len(parts) > 1: city1 = ' '.join(parts[0].strip().split()[:2]) # Возьмем первые два слова city2 = ' '.join(parts[1].strip().split()[:2]) # Возьмем первые два слова full_address = entry['label'].strip() cities_airports[city1] = full_address cities_airports[city2] = full_address return cities_airports @staticmethod def load_json(file_path): with open(file_path, 'r', encoding='utf-8') as file: data = json.load(file) return data @staticmethod def find_city_and_airport(address, cities_airports): parts = address.split(',') for part in parts: words = part.strip().split() for i in range(len(words)): city_1 = ' '.join(words[i:i+1]) city_2 = ' '.join(words[i:i+2]) if city_1 in cities_airports: return city_1, cities_airports[city_1] if city_2 in cities_airports: return city_2, cities_airports[city_2] return None, None def filter_cities_in_csv(self, csv_file_path, output_path): df = pd.read_csv(csv_file_path) df['city'], df['airport'] = zip(*df['address'].apply(lambda x: self.find_city_and_airport(x, self.cities_airports))) df = df[df['city'].notnull()] df.to_csv(output_path, index=False) print(f"Filtered entries:\n{df.head(15)}") # Пример использования класса json_file_path = 'airports.json' csv_file_path_positive = '../neural_network/dataset/filtered/filtered_dataset_positive.csv' csv_file_path_negative = '../neural_network/dataset/filtered/filtered_dataset_negative.csv' positive_output_path_negative = '../sity/sity_negative.csv' negative_output_path_positive = '../sity/sity_positive.csv' city_filter = CityFilter(json_file_path) city_filter.filter_cities_in_csv(csv_file_path_positive, negative_output_path_positive) city_filter.filter_cities_in_csv(csv_file_path_negative, positive_output_path_negative)