import json import pandas as pd class CityFilter: def __init__(self, json_file_path): self.json_file_path = json_file_path self.cities_set = self.load_and_extract_cities() def load_and_extract_cities(self): data = self.load_json(self.json_file_path) cities_set = set() for entry in data: parts = entry['label'].split(',') if len(parts) > 1: city1 = parts[0].strip().split()[0] if parts[0].strip().split() else '' city2 = parts[1].strip().split()[0] if parts[1].strip().split() else '' cities_set.update([city1, city2]) return cities_set @staticmethod def load_json(file_path): with open(file_path, 'r', encoding='utf-8') as file: data = json.load(file) return data @staticmethod def find_city(address, cities_set): parts = address.split(',') for part in parts: words = part.strip().split() for word in words: if word in cities_set: return word return None def filter_cities_in_csv(self, csv_file_path, output_path): df = pd.read_csv(csv_file_path) df['city'] = df['address'].apply(lambda x: self.find_city(x, self.cities_set)) df = df[df['city'].notnull()] df.to_csv(output_path, index=False) print(f"Filtered entries:\n{df.head(15)}") # Пример использования класса json_file_path = 'airports.json' csv_file_path_positive = '../neural_network/dataset/filtered/filtered_dataset_positive.csv' csv_file_path_negative = '../neural_network/dataset/filtered/filtered_dataset_negative.csv' positive_output_path_negative = '../sity/sity_negative.csv' negative_output_path_positive = '../sity/sity_positive.csv' city_filter = CityFilter(json_file_path) city_filter.filter_cities_in_csv(csv_file_path_positive, negative_output_path_positive) city_filter.filter_cities_in_csv(csv_file_path_negative, positive_output_path_negative)