import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score from sklearn.model_selection import train_test_split from sklearn.preprocessing import PolynomialFeatures # Загрузка данных data = pd.read_csv("smoking_drinking_dataset.csv") # # Подготовка данных data = pd.get_dummies(data, columns=['sex', 'DRK_YN'], drop_first=True) # Разделение данных на признаки (X) и целевую переменную (y) X = data.drop(columns=['SMK_stat_type_cd']) y = data['SMK_stat_type_cd'] # Разделение данных X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Построение полиномиальных признаков poly = PolynomialFeatures(degree=2) X_train_poly = poly.fit_transform(X_train) X_test_poly = poly.transform(X_test) # Обучение модели model = LinearRegression() model.fit(X_train_poly, y_train) # Предсказание на тестовых данных y_pred = model.predict(X_test_poly) # Оценка модели mse = mean_squared_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) # Вывод результатов print(f"Mean Squared Error: {mse}") print(f"R^2 Score: {r2}")