IIS_2023_1/antonov_dmitry_lab_5/lab5.py

48 lines
1.5 KiB
Python
Raw Normal View History

2023-10-08 14:21:51 +04:00
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.linear_model import Lasso
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
# загрузка данных
data = pd.read_csv('dataset.csv')
X = (data[
2023-10-08 15:40:22 +04:00
['Curricular units 2nd sem (approved)',
2023-10-08 14:21:51 +04:00
'Tuition fees up to date',
2023-10-08 15:40:22 +04:00
'Curricular units 2nd sem (grade)']]
)
2023-10-08 14:21:51 +04:00
y = data['Target']
# тренировка модели
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
lasso_model = Lasso(alpha=0.01)
lasso_model.fit(X_train, y_train)
# оценка модели
y_pred_train = lasso_model.predict(X_train)
y_pred_test = lasso_model.predict(X_test)
# оценка результатов модели
train_accuracy = accuracy_score(y_train, np.round(y_pred_train))
test_accuracy = accuracy_score(y_test, np.round(y_pred_test))
# вывод результатов
print(f"Тренировочная Accuracy: {train_accuracy}")
print(f"Тест Accuracy: {test_accuracy}")
# коэффициенты значимости признаков
coefficients = lasso_model.coef_
feature_names = X.columns
# вывод в консоль коэффициентов значимости
for feature, coef in zip(feature_names, coefficients):
print(f"{feature}: {coef}")
plt.figure(figsize=(10, 6))
plt.barh(feature_names, coefficients)
plt.xlabel('коэффициент')
plt.title('Значимости признаков по регрессии Лассо')
plt.show()