41 lines
1.3 KiB
Python
41 lines
1.3 KiB
Python
from scipy.cluster import hierarchy
|
|
import pandas as pd
|
|
from matplotlib import pyplot as plt
|
|
|
|
|
|
def start():
|
|
data = pd.read_csv('sberbank_data.csv', index_col='id')
|
|
x = data[['full_sq', 'price_doc']]
|
|
plt.figure(1, figsize=(16, 9))
|
|
plt.title('Дендрограмма кластеризации цен')
|
|
|
|
prices = [0, 0, 0, 0]
|
|
for ind, val in x.iterrows():
|
|
val = val['price_doc'] / val['full_sq']
|
|
if val < 100000:
|
|
prices[0] = prices[0] + 1
|
|
elif val < 300000:
|
|
prices[1] = prices[1] + 1
|
|
elif val < 500000:
|
|
prices[2] = prices[2] + 1
|
|
else:
|
|
prices[3] = prices[3] + 1
|
|
print('Результаты подчсёта ручного распределения:')
|
|
print('низких цен:'+str(prices[0]))
|
|
print('средних цен:'+str(prices[1]))
|
|
print('высоких цен:'+str(prices[2]))
|
|
print('премиальных цен:'+str(prices[3]))
|
|
|
|
hierarchy.dendrogram(hierarchy.linkage(x, method='single'),
|
|
truncate_mode='lastp',
|
|
p=15,
|
|
orientation='top',
|
|
leaf_rotation=90,
|
|
leaf_font_size=8,
|
|
show_contracted=True)
|
|
|
|
plt.show()
|
|
|
|
|
|
start()
|