from matplotlib import pyplot as plt from sklearn.model_selection import train_test_split from sklearn.neural_network import MLPClassifier import pandas as pd import numpy as np data = pd.read_csv('sberbank_data.csv', index_col='id') x = data[['timestamp', 'full_sq', 'floor', 'max_floor', 'build_year', 'num_room', 'material', 'kremlin_km']] x = x.replace('NA', 0) x.fillna(0, inplace=True) col_date = [] for val in x['timestamp']: col_date.append(val.split('-', 1)[0]) x = x.drop(columns='timestamp') x['timestamp'] = col_date y = [] for val in data['price_doc']: if val < 1500000: y.append('low') elif val < 3000000: y.append('medium') elif val < 5500000: y.append('high') elif val < 10000000: y.append('premium') else: y.append('oligarch') x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.01, random_state=42) min_scores = [] med_scores = [] max_scores = [] def do_test(iters_num): global x_train, x_test, y_train, y_test, min_scores, med_scores, max_scores print("Testing iterations number "+str(iters_num)+":") scores = [] for i in range(10): neuro = MLPClassifier(max_iter=200) neuro.fit(x_train, y_train) scr = neuro.score(x_test, y_test) print("res"+str(i+1)+": "+str(scr)) scores.append(scr) print("Medium result: "+str(np.mean(scores))) min_scores.append(np.min(scores)) med_scores.append(np.mean(scores)) max_scores.append(np.max(scores)) def start(): global min_scores, med_scores, max_scores iter_nums = [200, 400, 600, 800, 1000] for num in iter_nums: do_test(num) plt.figure(1, figsize=(16, 9)) plt.plot(iter_nums, min_scores, c='r') plt.plot(iter_nums, med_scores, c='b') plt.plot(iter_nums, max_scores, c='b') plt.show() start()