IIS_2023_1/malkova_anastasia_lab_5/dataset.py
2023-11-17 00:28:29 +04:00

24 lines
567 B
Python

import pandas as pd
from config import DATA_SIZE
def load_dataset():
data = pd.read_csv('true_car_listings.csv')[:DATA_SIZE]
names = ['Year', 'Mileage', 'City', 'State', 'Vin', 'Make', 'Model']
convert_to_num(data, 'City')
convert_to_num(data, 'State')
convert_to_num(data, 'Vin')
convert_to_num(data, 'Make')
convert_to_num(data, 'Model')
Y = data['Price']
X = data[names]
return X, Y, names
def convert_to_num(data, col):
unique_numbers = list(set(data[col]))
data[col] = data[col].apply(unique_numbers.index)