IIS_2023_1/lipatov_ilya_lab_4/lab4.py
2023-10-28 16:52:13 +04:00

37 lines
1.0 KiB
Python

from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
FILE_PATH = "boston.csv"
FEATURES = ['LSTAT', 'CRIM']
def plot_dendrogram(model, **kwargs):
counts = np.zeros(model.children_.shape[0])
n_samples = len(model.labels_)
for i, merge in enumerate(model.children_):
current_count = 0
for child_idx in merge:
if child_idx < n_samples:
current_count += 1
else:
current_count += counts[child_idx - n_samples]
counts[i] = current_count
linkage_matrix = np.column_stack(
[model.children_, model.distances_, counts]
).astype(float)
dendrogram(linkage_matrix, **kwargs)
data = pd.read_csv(FILE_PATH)
X = data[FEATURES]
model = AgglomerativeClustering(distance_threshold=0, n_clusters=None)
model = model.fit(X)
plt.title("Hierarchical Clustering Dendrogram for Boston House Prices")
plot_dendrogram(model, truncate_mode="level", p=2)
plt.show()