IIS_2023_1/martysheva_tamara_lab_4/lab4.py

41 lines
1.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from matplotlib.colors import ListedColormap
#Данные
data = pandas.read_csv('clean_data.csv')
#Приведение строчных значений к числовым
#work
factorized_data_work, unique_values_work = pandas.factorize(data['work'])
data['work'] = factorized_data_work
#gymtime
factorized_data_gymtime, unique_values_gymtime = pandas.factorize(data['gymtime'])
data['gymtime'] = factorized_data_gymtime
#Отбор нужных столбцов
x = data[['age', 'weight', 'work', 'phy_health', 'phy_ex' 'gymtime']]
#Нормализация данных
scaler = StandardScaler()
x_norm = scaler.fit_transform(x)
#Выбор количества кластеров
n_clusters = 3
#Кластеризация с использованием KMeans
kmeans = KMeans(n_clusters=n_clusters, init='k-means++', random_state=42)
target = kmeans.fit_predict(x_norm)
#t-SNE для визуализации результатов и кластеризации
tsne = TSNE(n_components=2, random_state=42)
x_tsne = tsne.fit_transform(x_norm)
# Вывод результатов
plt.scatter(x_tsne[:, 0], x_tsne[:, 1], c=target, cmap=ListedColormap(['#8b00ff', '#ff294d', '#77dd77']))
plt.title('t-SNE кластеризация')
plt.savefig("t-SNE.png")