import pandas as pd from sklearn.model_selection import train_test_split from config import DATA_SIZE def load_dataset(): data = pd.read_csv('true_car_listings.csv')[:DATA_SIZE] names = ['Year', 'Mileage', 'City', 'State', 'Vin', 'Make', 'Model'] convert_to_num(data, 'City') convert_to_num(data, 'State') convert_to_num(data, 'Vin') convert_to_num(data, 'Make') convert_to_num(data, 'Model') Y = data['Price'] X = data[names] x_train, x_test, y_train, y_test = train_test_split( X, Y, test_size=0.05, random_state=42) return x_train, x_test, y_train, y_test def convert_to_num(data, col): unique_numbers = list(set(data[col])) data[col] = data[col].apply(unique_numbers.index)