Добавил аэропорты в сити
This commit is contained in:
parent
e73d503017
commit
02d891c01c
@ -42,6 +42,7 @@ class TripRequest(BaseModel):
|
|||||||
|
|
||||||
class Review(BaseModel):
|
class Review(BaseModel):
|
||||||
city: str
|
city: str
|
||||||
|
airport: str
|
||||||
address: str
|
address: str
|
||||||
name_ru: str
|
name_ru: str
|
||||||
|
|
||||||
|
@ -4,18 +4,20 @@ import pandas as pd
|
|||||||
class CityFilter:
|
class CityFilter:
|
||||||
def __init__(self, json_file_path):
|
def __init__(self, json_file_path):
|
||||||
self.json_file_path = json_file_path
|
self.json_file_path = json_file_path
|
||||||
self.cities_set = self.load_and_extract_cities()
|
self.cities_airports = self.load_and_extract_cities_airports()
|
||||||
|
|
||||||
def load_and_extract_cities(self):
|
def load_and_extract_cities_airports(self):
|
||||||
data = self.load_json(self.json_file_path)
|
data = self.load_json(self.json_file_path)
|
||||||
cities_set = set()
|
cities_airports = {}
|
||||||
for entry in data:
|
for entry in data:
|
||||||
parts = entry['label'].split(',')
|
parts = entry['label'].split(',')
|
||||||
if len(parts) > 1:
|
if len(parts) > 1:
|
||||||
city1 = parts[0].strip().split()[0] if parts[0].strip().split() else ''
|
city1 = ' '.join(parts[0].strip().split()[:2]) # Возьмем первые два слова
|
||||||
city2 = parts[1].strip().split()[0] if parts[1].strip().split() else ''
|
city2 = ' '.join(parts[1].strip().split()[:2]) # Возьмем первые два слова
|
||||||
cities_set.update([city1, city2])
|
full_address = entry['label'].strip()
|
||||||
return cities_set
|
cities_airports[city1] = full_address
|
||||||
|
cities_airports[city2] = full_address
|
||||||
|
return cities_airports
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def load_json(file_path):
|
def load_json(file_path):
|
||||||
@ -24,23 +26,26 @@ class CityFilter:
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def find_city(address, cities_set):
|
def find_city_and_airport(address, cities_airports):
|
||||||
parts = address.split(',')
|
parts = address.split(',')
|
||||||
for part in parts:
|
for part in parts:
|
||||||
words = part.strip().split()
|
words = part.strip().split()
|
||||||
for word in words:
|
for i in range(len(words)):
|
||||||
if word in cities_set:
|
city_1 = ' '.join(words[i:i+1])
|
||||||
return word
|
city_2 = ' '.join(words[i:i+2])
|
||||||
return None
|
if city_1 in cities_airports:
|
||||||
|
return city_1, cities_airports[city_1]
|
||||||
|
if city_2 in cities_airports:
|
||||||
|
return city_2, cities_airports[city_2]
|
||||||
|
return None, None
|
||||||
|
|
||||||
def filter_cities_in_csv(self, csv_file_path, output_path):
|
def filter_cities_in_csv(self, csv_file_path, output_path):
|
||||||
df = pd.read_csv(csv_file_path)
|
df = pd.read_csv(csv_file_path)
|
||||||
df['city'] = df['address'].apply(lambda x: self.find_city(x, self.cities_set))
|
df['city'], df['airport'] = zip(*df['address'].apply(lambda x: self.find_city_and_airport(x, self.cities_airports)))
|
||||||
df = df[df['city'].notnull()]
|
df = df[df['city'].notnull()]
|
||||||
df.to_csv(output_path, index=False)
|
df.to_csv(output_path, index=False)
|
||||||
print(f"Filtered entries:\n{df.head(15)}")
|
print(f"Filtered entries:\n{df.head(15)}")
|
||||||
|
|
||||||
|
|
||||||
# Пример использования класса
|
# Пример использования класса
|
||||||
json_file_path = 'airports.json'
|
json_file_path = 'airports.json'
|
||||||
csv_file_path_positive = '../neural_network/dataset/filtered/filtered_dataset_positive.csv'
|
csv_file_path_positive = '../neural_network/dataset/filtered/filtered_dataset_positive.csv'
|
||||||
|
Loading…
Reference in New Issue
Block a user