IIS_2023_1/lipatov_ilya_lab_3/lab3.py
2023-10-15 17:18:00 +04:00

33 lines
1.1 KiB
Python

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
import numpy as np
pd.options.mode.chained_assignment = None
FILE_PATH = "boston.csv"
REQUIRED_COLUMNS = ['CRIM', 'DIS', 'TAX']
TARGET_COLUMN = 'RAD'
def print_classifier_info(feature_importance):
feature_names = REQUIRED_COLUMNS
embarked_score = feature_importance[-3:].sum()
scores = np.append(feature_importance[:2], embarked_score)
scores = map(lambda score: round(score, 2), scores)
print(dict(zip(feature_names, scores)))
if __name__ == '__main__':
data = pd.read_csv(FILE_PATH)
X = data[REQUIRED_COLUMNS]
y = data[TARGET_COLUMN]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
classifier_tree = DecisionTreeClassifier(random_state=42)
classifier_tree.fit(X_train, y_train)
print_classifier_info(classifier_tree.feature_importances_)
print("Оценка качества (задача классификации) - ", classifier_tree.score(X_test, y_test))