30 lines
940 B
Python
30 lines
940 B
Python
import pandas as pd
|
|
from sklearn.tree import DecisionTreeClassifier
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
slice_size = 30000
|
|
data = pd.read_csv('true_car_listings.csv', index_col='Vin')[:slice_size]
|
|
|
|
unique_numbers = list(set(data['Model']))
|
|
data['Model'] = data['Model'].apply(unique_numbers.index)
|
|
|
|
clf = DecisionTreeClassifier(random_state=341)
|
|
|
|
# Выбираем параметры
|
|
Y = data['Price']
|
|
X = data[['Mileage', 'Year', 'Model']]
|
|
print(X)
|
|
|
|
# Разделяем набор на тренировочные и тестовые данные
|
|
X_train, X_test, y_train, y_test = train_test_split(
|
|
X, Y, test_size=0.2, random_state=42)
|
|
|
|
# Запуск на тренировочных данных
|
|
clf.fit(X_train, y_train)
|
|
|
|
# Точность модели
|
|
print(f'Score: {clf.score(X_test, y_test)}')
|
|
|
|
# Значимость параметров
|
|
importances = clf.feature_importances_
|
|
print(f'Means {importances}') |