import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.linear_model import Lasso
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# загрузка данных
data = pd.read_csv('dataset.csv')
X = (data[
         ['Curricular units 2nd sem (approved)',
          'Tuition fees up to date',
          'Curricular units 2nd sem (grade)']]
)
y = data['Target']

# тренировка модели
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

lasso_model = Lasso(alpha=0.01)
lasso_model.fit(X_train, y_train)

# оценка модели
y_pred_train = lasso_model.predict(X_train)
y_pred_test = lasso_model.predict(X_test)

# оценка результатов модели
train_accuracy = accuracy_score(y_train, np.round(y_pred_train))
test_accuracy = accuracy_score(y_test, np.round(y_pred_test))

# вывод результатов
print(f"Тренировочная Accuracy: {train_accuracy}")
print(f"Тест Accuracy: {test_accuracy}")

# коэффициенты значимости признаков
coefficients = lasso_model.coef_
feature_names = X.columns

# вывод в консоль коэффициентов значимости
for feature, coef in zip(feature_names, coefficients):
    print(f"{feature}: {coef}")

plt.figure(figsize=(10, 6))
plt.barh(feature_names, coefficients)
plt.xlabel('коэффициент')
plt.title('Значимости признаков по регрессии Лассо')
plt.show()