2023-10-08 14:21:51 +04:00
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
|
|
from matplotlib import pyplot as plt
|
|
|
|
from sklearn.linear_model import Lasso
|
|
|
|
from sklearn.metrics import accuracy_score
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
|
|
|
|
# загрузка данных
|
|
|
|
data = pd.read_csv('dataset.csv')
|
|
|
|
X = (data[
|
2023-10-08 15:40:22 +04:00
|
|
|
['Curricular units 2nd sem (approved)',
|
2023-10-08 14:21:51 +04:00
|
|
|
'Tuition fees up to date',
|
2023-10-08 15:40:22 +04:00
|
|
|
'Curricular units 2nd sem (grade)']]
|
|
|
|
)
|
2023-10-08 14:21:51 +04:00
|
|
|
y = data['Target']
|
|
|
|
|
|
|
|
# тренировка модели
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
|
|
|
|
|
|
lasso_model = Lasso(alpha=0.01)
|
|
|
|
lasso_model.fit(X_train, y_train)
|
|
|
|
|
|
|
|
# оценка модели
|
|
|
|
y_pred_train = lasso_model.predict(X_train)
|
|
|
|
y_pred_test = lasso_model.predict(X_test)
|
|
|
|
|
|
|
|
# оценка результатов модели
|
|
|
|
train_accuracy = accuracy_score(y_train, np.round(y_pred_train))
|
|
|
|
test_accuracy = accuracy_score(y_test, np.round(y_pred_test))
|
|
|
|
|
|
|
|
# вывод результатов
|
|
|
|
print(f"Тренировочная Accuracy: {train_accuracy}")
|
|
|
|
print(f"Тест Accuracy: {test_accuracy}")
|
|
|
|
|
|
|
|
# коэффициенты значимости признаков
|
|
|
|
coefficients = lasso_model.coef_
|
|
|
|
feature_names = X.columns
|
|
|
|
|
|
|
|
# вывод в консоль коэффициентов значимости
|
|
|
|
for feature, coef in zip(feature_names, coefficients):
|
|
|
|
print(f"{feature}: {coef}")
|
|
|
|
|
|
|
|
plt.figure(figsize=(10, 6))
|
|
|
|
plt.barh(feature_names, coefficients)
|
|
|
|
plt.xlabel('коэффициент')
|
|
|
|
plt.title('Значимости признаков по регрессии Лассо')
|
|
|
|
plt.show()
|