import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import sklearn from sklearn.neural_network import MLPClassifier import argparse from sklearn.preprocessing import (LabelEncoder, StandardScaler, MinMaxScaler, RobustScaler) from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, learning_curve, ShuffleSplit def get_arguments(): parser = argparse.ArgumentParser() parser.add_argument('--id_pred', type=int, default=1, help='Какой id из тестовой выборки будем предсказывать') args = parser.parse_args() return args def str_features_to_numeric(data): # Преобразовывает все строковые признаки в числовые. # Определение категориальных признаков categorical_columns = [] numerics = ['int8', 'int16', 'int32', 'int64', 'float16', 'float32', 'float64'] features = data.columns.values.tolist() for col in features: if data[col].dtype in numerics: continue categorical_columns.append(col) # Кодирование категориальных признаков for col in categorical_columns: if col in data.columns: le = LabelEncoder() le.fit(list(data[col].astype(str).values)) data[col] = le.transform(list(data[col].astype(str).values)) return data if __name__ == "__main__": args = get_arguments() data = pd.read_csv("..//heart_disease_uci.csv") data['target'] = data['num'] data = data.drop(columns=['id', 'dataset', 'num']) data_wo_null = data.dropna() print(len(data_wo_null)) data_wo_null.head(3) encoded_data_wo_null = str_features_to_numeric(data_wo_null) scaler = StandardScaler() new_data = pd.DataFrame(scaler.fit_transform(encoded_data_wo_null), columns = encoded_data_wo_null.columns) dataset = data_wo_null.copy() # original data target_name = 'target' target = data_wo_null.pop(target_name) X_train, X_test, y_train, y_test = train_test_split(new_data, target, test_size=0.2, random_state=42) clf = MLPClassifier(random_state=42, max_iter=300, hidden_layer_sizes=(100)).fit(X_train, y_train) print("---"*15, " MLPClassifier(100) ", "---"*15) print(f"Accuracy: {clf.score(X_test, y_test)}") clf2 = MLPClassifier(random_state=42, max_iter=300, hidden_layer_sizes=(300, 100)).fit(X_train, y_train) print("---"*15, " MLPClassifier(300, 100) ", "---"*15) print(f"Accuracy: {clf2.score(X_test, y_test)}") clf3 = MLPClassifier(random_state=42, max_iter=300, hidden_layer_sizes=(150, 100, 50, 50)).fit(X_train, y_train) print("---"*15, " MLPClassifier(150, 100, 50, 50) ", "---"*15) print(f"Accuracy: {clf3.score(X_test, y_test)}") clf4 = MLPClassifier(random_state=42, max_iter=300, hidden_layer_sizes=(100, 400, 600, 400, 100)).fit(X_train, y_train) print("---"*15, " MLPClassifier(100, 400, 600, 400, 100) ", "---"*15) print(f"Accuracy: {clf4.score(X_test, y_test)}") print("---"*15, f" Предсказание элемента под id = {args.id_pred}", "---"*15) print(f"Предсказанное значение: {clf3.predict(np.array(list(X_test.iloc[args.id_pred])).reshape(1, -1))}") print(f"Настоящее значение {y_test.iloc[args.id_pred]}")