import numpy as np import pandas as pb import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression, Perceptron from sklearn.neural_network import MLPClassifier, MLPRegressor from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier from scipy.cluster.hierarchy import dendrogram, linkage df = pb.read_csv("StudentsPerformance.csv", sep=",", encoding="windows-1251") df1 = df print("Данные без подготовки:") with pb.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000): print(df[:5]) def prepareStringData(columnName): uniq = df[columnName].unique() mp = {} for i in uniq: mp[i] = len(mp) df[columnName] = df[columnName].map(mp) print() print("Данные после подготовки:") prepareStringData("gender") prepareStringData("race/ethnicity") prepareStringData("parental level of education") prepareStringData("lunch") prepareStringData("test preparation course") with pb.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000): print(df[:5]) X = df[:15] X = X[["math score", "reading score", "writing score"]].values labelList = [] for i in X: st = "" for j in i: st += str(j) st += "," st = "(" + st[:len(st) - 1] + ")" labelList.append(st) linked = linkage(X, 'single') plt.figure(figsize=(10, 7)) dendrogram(linked, orientation='top', labels=labelList, distance_sort='descending', show_leaf_counts=True) plt.show()