42 lines
1.3 KiB
Python
42 lines
1.3 KiB
Python
|
from sklearn.model_selection import train_test_split
|
||
|
from sklearn.tree import DecisionTreeClassifier
|
||
|
import pandas as pd
|
||
|
import numpy as np
|
||
|
|
||
|
pd.options.mode.chained_assignment = None
|
||
|
|
||
|
FILE_PATH = "WindData.csv"
|
||
|
REQUIRED_COLUMNS = ['TI1', 'V1']
|
||
|
TARGET_COLUMN_1 = 'TurbulenceIntensityClassA'
|
||
|
TARGET_COLUMN_2 = 'TurbulenceIntensityClassB'
|
||
|
TARGET_COLUMN_3 = 'TurbulenceIntensityClassC'
|
||
|
|
||
|
|
||
|
def print_classifier_info(feature_importance):
|
||
|
feature_names = REQUIRED_COLUMNS
|
||
|
embarked_score = feature_importance[-3:].sum()
|
||
|
scores = np.append(feature_importance[:2], embarked_score)
|
||
|
scores = map(lambda score: round(score, 2), scores)
|
||
|
print(dict(zip(feature_names, scores)))
|
||
|
|
||
|
|
||
|
def actions(target_column):
|
||
|
data = pd.read_csv(FILE_PATH)
|
||
|
|
||
|
X = data[REQUIRED_COLUMNS]
|
||
|
y = data[target_column]
|
||
|
|
||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=100)
|
||
|
|
||
|
classifier_tree = DecisionTreeClassifier(random_state=100)
|
||
|
classifier_tree.fit(X_train, y_train)
|
||
|
|
||
|
print_classifier_info(classifier_tree.feature_importances_)
|
||
|
print("Оценка качества классификации ", target_column, " - ", classifier_tree.score(X_test, y_test))
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
actions(TARGET_COLUMN_1)
|
||
|
actions(TARGET_COLUMN_2)
|
||
|
actions(TARGET_COLUMN_3)
|