77 lines
1.8 KiB
Python
77 lines
1.8 KiB
Python
from matplotlib import pyplot as plt
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.neural_network import MLPClassifier
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
data = pd.read_csv('sberbank_data.csv', index_col='id')
|
|
x = data[['timestamp', 'full_sq', 'floor', 'max_floor', 'build_year', 'num_room', 'material', 'kremlin_km']]
|
|
|
|
x = x.replace('NA', 0)
|
|
x.fillna(0, inplace=True)
|
|
|
|
col_date = []
|
|
|
|
for val in x['timestamp']:
|
|
col_date.append(val.split('-', 1)[0])
|
|
|
|
x = x.drop(columns='timestamp')
|
|
x['timestamp'] = col_date
|
|
|
|
y = []
|
|
for val in data['price_doc']:
|
|
if val < 1500000:
|
|
y.append('low')
|
|
elif val < 3000000:
|
|
y.append('medium')
|
|
elif val < 5500000:
|
|
y.append('high')
|
|
elif val < 10000000:
|
|
y.append('premium')
|
|
else:
|
|
y.append('oligarch')
|
|
|
|
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.01, random_state=42)
|
|
|
|
min_scores = []
|
|
med_scores = []
|
|
max_scores = []
|
|
|
|
|
|
def do_test(iters_num):
|
|
global x_train, x_test, y_train, y_test, min_scores, med_scores, max_scores
|
|
|
|
print("Testing iterations number "+str(iters_num)+":")
|
|
scores = []
|
|
|
|
for i in range(10):
|
|
neuro = MLPClassifier(max_iter=200)
|
|
neuro.fit(x_train, y_train)
|
|
scr = neuro.score(x_test, y_test)
|
|
print("res"+str(i+1)+": "+str(scr))
|
|
scores.append(scr)
|
|
|
|
print("Medium result: "+str(np.mean(scores)))
|
|
|
|
min_scores.append(np.min(scores))
|
|
med_scores.append(np.mean(scores))
|
|
max_scores.append(np.max(scores))
|
|
|
|
|
|
def start():
|
|
global min_scores, med_scores, max_scores
|
|
|
|
iter_nums = [200, 400, 600, 800, 1000]
|
|
|
|
for num in iter_nums:
|
|
do_test(num)
|
|
|
|
plt.figure(1, figsize=(16, 9))
|
|
plt.plot(iter_nums, min_scores, c='r')
|
|
plt.plot(iter_nums, med_scores, c='b')
|
|
plt.plot(iter_nums, max_scores, c='b')
|
|
plt.show()
|
|
|
|
|
|
start()
|