import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import sklearn from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge from sklearn.preprocessing import (LabelEncoder, StandardScaler, MinMaxScaler, RobustScaler) from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, learning_curve, ShuffleSplit def str_features_to_numeric(data): # Преобразовывает все строковые признаки в числовые. # Определение категориальных признаков categorical_columns = [] numerics = ['int8', 'int16', 'int32', 'int64', 'float16', 'float32', 'float64'] features = data.columns.values.tolist() for col in features: if data[col].dtype in numerics: continue categorical_columns.append(col) # Кодирование категориальных признаков for col in categorical_columns: if col in data.columns: le = LabelEncoder() le.fit(list(data[col].astype(str).values)) data[col] = le.transform(list(data[col].astype(str).values)) return data if __name__ == "__main__": data = pd.read_csv("..//heart_disease_uci.csv") data['target'] = data['trestbps'] data = data.drop(columns=['id', 'dataset', 'trestbps']) data_wo_null = data.dropna() print(len(data_wo_null)) encoded_data_wo_null = str_features_to_numeric(data_wo_null) print(len(encoded_data_wo_null)) # Model standartization # The standard score of a sample x is calculated as: # z = (x - мат.ож.) / (стандартное отклонение) scaler = StandardScaler() new_data = pd.DataFrame(scaler.fit_transform(encoded_data_wo_null), columns = encoded_data_wo_null.columns) dataset = data_wo_null.copy() # original data target_name = 'target' target = data_wo_null.pop(target_name) test_train_split_part = 0.2 random_state = 42 train, valid, train_target, valid_target = train_test_split(new_data, target, test_size=test_train_split_part, random_state=random_state) reg = LinearRegression().fit(train, train_target) print("---"*15, " LinearRegression ", "---"*15) print(f"Accuracy: {reg.score(valid, valid_target)}") print(f"коэффициенты: {reg.coef_}") print(f"Смещение относительно начала координат (bias): {reg.intercept_}") SGD_reg = SGDRegressor(max_iter=1000, tol=1e-3) SGD_reg.fit(train, train_target) print("---"*15, " SGDRegressor ", "---"*15) print(f"Accuracy: {SGD_reg.score(valid, valid_target)}") print(f"коэффициенты: {SGD_reg.coef_}") print(f"Смещение относительно начала координат (bias): {SGD_reg.intercept_}") Ridge_clf = Ridge(alpha=1.0) Ridge_clf.fit(train, train_target) print("---"*15, " Ridge ", "---"*15) print(f"Accuracy: {Ridge_clf.score(valid, valid_target)}") print(f"коэффициенты: {Ridge_clf.coef_}") print(f"Смещение относительно начала координат (bias): {Ridge_clf.intercept_}")