from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier import pandas as pd import numpy as np pd.options.mode.chained_assignment = None FILE_PATH = "WindData.csv" REQUIRED_COLUMNS = ['TI1', 'V1'] TARGET_COLUMN_1 = 'TurbulenceIntensityClassA' TARGET_COLUMN_2 = 'TurbulenceIntensityClassB' TARGET_COLUMN_3 = 'TurbulenceIntensityClassC' def print_classifier_info(feature_importance): feature_names = REQUIRED_COLUMNS embarked_score = feature_importance[-3:].sum() scores = np.append(feature_importance[:2], embarked_score) scores = map(lambda score: round(score, 2), scores) print(dict(zip(feature_names, scores))) def actions(target_column): data = pd.read_csv(FILE_PATH) X = data[REQUIRED_COLUMNS] y = data[target_column] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=100) classifier_tree = DecisionTreeClassifier(random_state=100) classifier_tree.fit(X_train, y_train) print_classifier_info(classifier_tree.feature_importances_) print("Оценка качества классификации ", target_column, " - ", classifier_tree.score(X_test, y_test)) if __name__ == '__main__': actions(TARGET_COLUMN_1) actions(TARGET_COLUMN_2) actions(TARGET_COLUMN_3)