from sklearn.cluster import AgglomerativeClustering from scipy.cluster.hierarchy import dendrogram import matplotlib.pyplot as plt import pandas as pd import numpy as np FILE_PATH = "boston.csv" FEATURES = ['LSTAT', 'CRIM'] def plot_dendrogram(model, **kwargs): counts = np.zeros(model.children_.shape[0]) n_samples = len(model.labels_) for i, merge in enumerate(model.children_): current_count = 0 for child_idx in merge: if child_idx < n_samples: current_count += 1 else: current_count += counts[child_idx - n_samples] counts[i] = current_count linkage_matrix = np.column_stack( [model.children_, model.distances_, counts] ).astype(float) dendrogram(linkage_matrix, **kwargs) data = pd.read_csv(FILE_PATH) X = data[FEATURES] model = AgglomerativeClustering(distance_threshold=0, n_clusters=None) model = model.fit(X) plt.title("Hierarchical Clustering Dendrogram for Boston House Prices") plot_dendrogram(model, truncate_mode="level", p=2) plt.show()