47 lines
2.0 KiB
Python
47 lines
2.0 KiB
Python
|
import numpy as np
|
||
|
import pandas as pb
|
||
|
import matplotlib.pyplot as plt
|
||
|
from sklearn.model_selection import train_test_split
|
||
|
from sklearn.linear_model import LinearRegression, Perceptron
|
||
|
from sklearn.neural_network import MLPClassifier, MLPRegressor
|
||
|
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler
|
||
|
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
|
||
|
|
||
|
df = pb.read_csv("StudentsPerformance.csv", sep=",", encoding="windows-1251")
|
||
|
df1 = df
|
||
|
print("Данные без подготовки:")
|
||
|
with pb.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000):
|
||
|
print(df[:5])
|
||
|
|
||
|
def prepareStringData(columnName):
|
||
|
uniq = df[columnName].unique()
|
||
|
mp = {}
|
||
|
for i in uniq:
|
||
|
mp[i] = len(mp)
|
||
|
df[columnName] = df[columnName].map(mp)
|
||
|
|
||
|
|
||
|
print()
|
||
|
print("Данные после подготовки:")
|
||
|
prepareStringData("gender")
|
||
|
prepareStringData("race/ethnicity")
|
||
|
prepareStringData("parental level of education")
|
||
|
prepareStringData("lunch")
|
||
|
prepareStringData("test preparation course")
|
||
|
with pb.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000):
|
||
|
print(df[:5])
|
||
|
|
||
|
X = df[["gender", "race/ethnicity", "lunch", "test preparation course", "math score", "reading score", "writing score"]]
|
||
|
y = df["parental level of education"]
|
||
|
X_train, X_Test, y_train, y_test = train_test_split(X, y, test_size=0.01, random_state=42)
|
||
|
dtc = DecisionTreeClassifier()
|
||
|
dtc = dtc.fit(X_train, y_train)
|
||
|
dtr = DecisionTreeRegressor()
|
||
|
dtr = dtr.fit(X_train, y_train)
|
||
|
print()
|
||
|
print("Результат дерева класификации на учебных данных: ", dtc.score(X_train, y_train))
|
||
|
print("Результат дерева класификации на тестовых данных: ", dtc.score(X_Test, y_test))
|
||
|
print()
|
||
|
print("Результат дерева регрессии на учебных данных: ", dtr.score(X_train, y_train))
|
||
|
print("Результат дерева регрессии на тестовых данных: ", dtr.score(X_Test, y_test))
|