import numpy as np import pandas as pb import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression, Perceptron from sklearn.neural_network import MLPClassifier, MLPRegressor from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier df = pb.read_csv("StudentsPerformance.csv", sep=",", encoding="windows-1251") df1 = df print("Данные без подготовки:") with pb.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000): print(df[:5]) def prepareStringData(columnName): uniq = df[columnName].unique() mp = {} for i in uniq: mp[i] = len(mp) df[columnName] = df[columnName].map(mp) print() print("Данные после подготовки:") prepareStringData("gender") prepareStringData("race/ethnicity") prepareStringData("parental level of education") prepareStringData("lunch") prepareStringData("test preparation course") with pb.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000): print(df[:5]) X = df[["gender", "race/ethnicity", "lunch", "test preparation course", "math score", "reading score", "writing score"]] y = df["parental level of education"] X_train, X_Test, y_train, y_test = train_test_split(X, y, test_size=0.01, random_state=42) dtc = DecisionTreeClassifier() dtc = dtc.fit(X_train, y_train) dtr = DecisionTreeRegressor() dtr = dtr.fit(X_train, y_train) print() print("Результат дерева класификации на учебных данных: ", dtc.score(X_train, y_train)) print("Результат дерева класификации на тестовых данных: ", dtc.score(X_Test, y_test)) print() print("Результат дерева регрессии на учебных данных: ", dtr.score(X_train, y_train)) print("Результат дерева регрессии на тестовых данных: ", dtr.score(X_Test, y_test))