IIS_2023_1/antonov_dmitry_lab_3/lab3.py
2023-10-08 10:49:00 +04:00

36 lines
1.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
# прочитали датасет
data = pd.read_csv('dataset.csv')
# определение признаков
# целевая переменная - Target
X = data[['Gender', 'Debtor', 'Curricular units 2nd sem (approved)']]
y = data['Target'] # Assuming 'Dropout' is the target variable
# разделили данные на тренировочную и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# создали модель decision tree classifier
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)
# получили значения модели для 2ух самых важных признаков
feature_importances = dt_classifier.feature_importances_
top_features_indices = feature_importances.argsort()[-2:][::-1]
top_features = X.columns[top_features_indices]
# вывод результата
print("2 самых важных признака:", top_features)
# получили значения модели для проверки точности
predictions = dt_classifier.predict(X_test)
# вычислили точность модели
accuracy = accuracy_score(y_test, predictions)
print("точность модели:", accuracy)