IIS_2023_1/madyshev_egor_lab_5/main.py

58 lines
2.1 KiB
Python
Raw Normal View History

2023-11-02 19:03:28 +04:00
import numpy as np
import pandas as pb
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Perceptron, LogisticRegression, Lasso, Ridge
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.preprocessing import PolynomialFeatures
df = pb.read_csv("StudentsPerformance.csv", sep=",", encoding="windows-1251")
df1 = df
print("Данные без подготовки:")
with pb.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000):
print(df[:5])
def prepareStringData(columnName):
uniq = df[columnName].unique()
mp = {}
for i in uniq:
mp[i] = len(mp)
df[columnName] = df[columnName].map(mp)
print()
print("Данные после подготовки:")
prepareStringData("gender")
prepareStringData("race/ethnicity")
prepareStringData("parental level of education")
prepareStringData("lunch")
prepareStringData("test preparation course")
with pb.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000):
print(df[:5])
X = df[["gender", "race/ethnicity", "lunch", "test preparation course", "parental level of education", "reading score", "writing score"]]
y = df["math score"]
X_train, X_Test, y_train, y_test = train_test_split(X, y, test_size=0.26, random_state=42)
lnr = LinearRegression()
lnr = lnr.fit(X_train,y_train)
poly_regression = make_pipeline(PolynomialFeatures(degree=4), LinearRegression())
poly_regression.fit(X_train, y_train)
lasso = Lasso()
lasso.fit(X_train, y_train)
ridge = Ridge()
ridge.fit(X_train, y_train)
print("Линейная регрессия: ", lnr.score(X_Test,y_test))
print("Полиномиальная регрессия: ", poly_regression.score(X_Test,y_test))
print("Лассо-регрессия: ", lasso.score(X_Test,y_test))
print("Гребневая регрессия: ", ridge.score(X_Test,y_test))