import numpy as np import pandas as pb import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression, Perceptron, LogisticRegression, Lasso, Ridge from sklearn.neural_network import MLPClassifier, MLPRegressor from sklearn.pipeline import make_pipeline from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier from sklearn.preprocessing import PolynomialFeatures df = pb.read_csv("StudentsPerformance.csv", sep=",", encoding="windows-1251") df1 = df print("Данные без подготовки:") with pb.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000): print(df[:5]) def prepareStringData(columnName): uniq = df[columnName].unique() mp = {} for i in uniq: mp[i] = len(mp) df[columnName] = df[columnName].map(mp) print() print("Данные после подготовки:") prepareStringData("gender") prepareStringData("race/ethnicity") prepareStringData("parental level of education") prepareStringData("lunch") prepareStringData("test preparation course") with pb.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000): print(df[:5]) X = df[["gender", "race/ethnicity", "lunch", "test preparation course", "parental level of education", "reading score", "writing score"]] y = df["math score"] X_train, X_Test, y_train, y_test = train_test_split(X, y, test_size=0.26, random_state=42) lnr = LinearRegression() lnr = lnr.fit(X_train,y_train) poly_regression = make_pipeline(PolynomialFeatures(degree=4), LinearRegression()) poly_regression.fit(X_train, y_train) lasso = Lasso() lasso.fit(X_train, y_train) ridge = Ridge() ridge.fit(X_train, y_train) print("Линейная регрессия: ", lnr.score(X_Test,y_test)) print("Полиномиальная регрессия: ", poly_regression.score(X_Test,y_test)) print("Лассо-регрессия: ", lasso.score(X_Test,y_test)) print("Гребневая регрессия: ", ridge.score(X_Test,y_test))