import pandas as pd from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import train_test_split slice_size = 30000 data = pd.read_csv('true_car_listings.csv', index_col='Vin')[:slice_size] unique_numbers = list(set(data['Model'])) data['Model'] = data['Model'].apply(unique_numbers.index) clf = DecisionTreeClassifier(random_state=341) # Выбираем параметры Y = data['Price'] X = data[['Mileage', 'Year', 'Model']] print(X) # Разделяем набор на тренировочные и тестовые данные X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size=0.2, random_state=42) # Запуск на тренировочных данных clf.fit(X_train, y_train) # Точность модели print(f'Score: {clf.score(X_test, y_test)}') # Значимость параметров importances = clf.feature_importances_ print(f'Means {importances}')