Добавил аэропорты в сити

This commit is contained in:
maksim 2024-06-08 00:01:29 +04:00
parent e73d503017
commit 02d891c01c
2 changed files with 20 additions and 14 deletions

View File

@ -42,6 +42,7 @@ class TripRequest(BaseModel):
class Review(BaseModel): class Review(BaseModel):
city: str city: str
airport: str
address: str address: str
name_ru: str name_ru: str

View File

@ -4,18 +4,20 @@ import pandas as pd
class CityFilter: class CityFilter:
def __init__(self, json_file_path): def __init__(self, json_file_path):
self.json_file_path = json_file_path self.json_file_path = json_file_path
self.cities_set = self.load_and_extract_cities() self.cities_airports = self.load_and_extract_cities_airports()
def load_and_extract_cities(self): def load_and_extract_cities_airports(self):
data = self.load_json(self.json_file_path) data = self.load_json(self.json_file_path)
cities_set = set() cities_airports = {}
for entry in data: for entry in data:
parts = entry['label'].split(',') parts = entry['label'].split(',')
if len(parts) > 1: if len(parts) > 1:
city1 = parts[0].strip().split()[0] if parts[0].strip().split() else '' city1 = ' '.join(parts[0].strip().split()[:2]) # Возьмем первые два слова
city2 = parts[1].strip().split()[0] if parts[1].strip().split() else '' city2 = ' '.join(parts[1].strip().split()[:2]) # Возьмем первые два слова
cities_set.update([city1, city2]) full_address = entry['label'].strip()
return cities_set cities_airports[city1] = full_address
cities_airports[city2] = full_address
return cities_airports
@staticmethod @staticmethod
def load_json(file_path): def load_json(file_path):
@ -24,23 +26,26 @@ class CityFilter:
return data return data
@staticmethod @staticmethod
def find_city(address, cities_set): def find_city_and_airport(address, cities_airports):
parts = address.split(',') parts = address.split(',')
for part in parts: for part in parts:
words = part.strip().split() words = part.strip().split()
for word in words: for i in range(len(words)):
if word in cities_set: city_1 = ' '.join(words[i:i+1])
return word city_2 = ' '.join(words[i:i+2])
return None if city_1 in cities_airports:
return city_1, cities_airports[city_1]
if city_2 in cities_airports:
return city_2, cities_airports[city_2]
return None, None
def filter_cities_in_csv(self, csv_file_path, output_path): def filter_cities_in_csv(self, csv_file_path, output_path):
df = pd.read_csv(csv_file_path) df = pd.read_csv(csv_file_path)
df['city'] = df['address'].apply(lambda x: self.find_city(x, self.cities_set)) df['city'], df['airport'] = zip(*df['address'].apply(lambda x: self.find_city_and_airport(x, self.cities_airports)))
df = df[df['city'].notnull()] df = df[df['city'].notnull()]
df.to_csv(output_path, index=False) df.to_csv(output_path, index=False)
print(f"Filtered entries:\n{df.head(15)}") print(f"Filtered entries:\n{df.head(15)}")
# Пример использования класса # Пример использования класса
json_file_path = 'airports.json' json_file_path = 'airports.json'
csv_file_path_positive = '../neural_network/dataset/filtered/filtered_dataset_positive.csv' csv_file_path_positive = '../neural_network/dataset/filtered/filtered_dataset_positive.csv'