44 lines
1.5 KiB
Python
44 lines
1.5 KiB
Python
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
from sklearn.cluster import KMeans
|
||
from sklearn.metrics import silhouette_score
|
||
|
||
data = pd.read_csv('tutors.csv', index_col=0)
|
||
data = data.dropna()
|
||
features = ['Price', 'Score', 'Reviews_number', 'Experience']
|
||
X = data[features].values
|
||
|
||
# метод K-means
|
||
kmeans = KMeans(n_clusters=3)
|
||
kmeans.fit(X)
|
||
|
||
# получение меток кластеров
|
||
labels = kmeans.labels_
|
||
data['Cluster'] = labels
|
||
|
||
# получение центров кластеров
|
||
cluster_centers = kmeans.cluster_centers_
|
||
|
||
# подсчет значений в каждом кластере
|
||
cluster_counts = data['Cluster'].value_counts()
|
||
|
||
for cluster in sorted(data['Cluster'].unique()):
|
||
cluster_data = data[data['Cluster'] == cluster]
|
||
print(f"-----Cluster № {cluster + 1} -----")
|
||
print(f"Number of tutors: {cluster_counts[cluster]}")
|
||
print("Cluster centers:")
|
||
for f, c in zip(features, cluster_centers[cluster]):
|
||
print(f"{f} - {c:.2f}")
|
||
print()
|
||
|
||
# оценка качества с помощью коэффициента силуэта
|
||
silhouette = silhouette_score(X, labels)
|
||
print(f"Коэффициент силуэта: {silhouette:.2f}")
|
||
|
||
# отображение результатов на графике
|
||
plt.scatter(data['Score'], data['Price'], c=labels)
|
||
plt.scatter(cluster_centers[:, 1], cluster_centers[:, 0], marker='x', color='red')
|
||
plt.xlabel('Score')
|
||
plt.ylabel('Price')
|
||
plt.title('K-means clustering')
|
||
plt.show() |