malkova_anastasia_lab_1 ready #120

Merged
Alexey merged 2 commits from malkova_anastasia_lab_1 into main 2023-11-06 21:08:35 +04:00
6 changed files with 187 additions and 0 deletions
Showing only changes of commit 1f35af8f8f - Show all commits

View File

@ -0,0 +1,44 @@
# Лабораторная работа №1
> Работа с типовыми наборами данных и различными моделями
# Задание
Сгенерировать определённый тип данных, сравнить на нём разные модели и отобразить качество на графиках.
Данные: make_classification (n_samples=500, n_features=2, n_redundant=0, n_informative=2, random_state=rs, n_clusters_per_class=1)
Модели:
* Линейную регрессию
* Персептрон
* Гребневую полиномиальную регрессию (со степенью 3, alpha= 1.0)
### Как запустить лабораторную работу
1. Установить python, numpy, sklearn, matplotlib
2. Запустить команду `python main.py` в корне проекта
### Использованные технологии
* Язык программирования `python`
* Библиотеки `numpy, sklearn, matplotlib`
* Среда разработки `PyCharm`
### Что делает программа?
Генерирует набор данных для классификации с помощью make_classification.
Обучает на них 3 модели:
- Линейную регрессию
- Персептрон
- Гребневую полиномиальную регрессию (со степенью 3, alpha = 1.0)
Собирает итоговые оценки моделей:
- Линейная регрессия - коэффициент детерминации R2
- Персептрон - средняя точность по заданным тестовым данным
- Гребневая полиномиальная регрессия - Перекрёстная проверка
![plots screen](plots.jpg)
Лучший результат показала модель персептрона

View File

@ -0,0 +1,16 @@
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
def generate_dataset():
x, y = make_classification(n_samples=500, n_features=2, n_redundant=0,
n_informative=2, random_state=0, n_clusters_per_class=1)
random = np.random.RandomState(2)
x += 2.5 * random.uniform(size=x.shape)
return x, y
def split_dataset(x, y):
return train_test_split(
x, y, test_size=.05, random_state=42)

View File

@ -0,0 +1,19 @@
from dataset import generate_dataset, split_dataset
from models import launch_linear_regression, launch_perceptron, launch_ridge_poly_regression
from plots import show_plot
x, y = generate_dataset()
x_train, x_test, y_train, y_test = split_dataset(x, y)
my_linear_model, linear_model_score = launch_linear_regression(
x_train, x_test, y_train, y_test)
my_perceptron_model, perceptron_model_score = launch_perceptron(
x_train, x_test, y_train, y_test)
my_polynomial_model, polynomial_model_score = launch_ridge_poly_regression(
x_train, x_test, y_train, y_test)
show_plot(x, x_train, x_test, y_train, y_test,
my_linear_model, linear_model_score,
my_perceptron_model, perceptron_model_score,
my_polynomial_model, polynomial_model_score)

View File

@ -0,0 +1,37 @@
from sklearn.linear_model import LinearRegression, Perceptron, Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline
def launch_linear_regression(x_train, x_test, y_train, y_test):
my_linear_model = LinearRegression()
my_linear_model.fit(x_train, y_train)
linear_model_score = my_linear_model.score(
x_test, y_test)
print('linear_model_score: ', linear_model_score)
return my_linear_model, linear_model_score
# Perceptron
def launch_perceptron(x_train, x_test, y_train, y_test):
my_perceptron_model = Perceptron()
my_perceptron_model.fit(x_train, y_train)
perceptron_model_score = my_perceptron_model.score(
x_test, y_test)
print('perceptron_model_score: ', perceptron_model_score)
return my_perceptron_model, perceptron_model_score
# RidgePolyRegression
def launch_ridge_poly_regression(x_train, x_test, y_train, y_test):
my_polynomial_model = PolynomialFeatures(degree=3, include_bias=False)
ridge = Ridge(alpha=1)
pipeline = Pipeline(
[("polynomial_features", my_polynomial_model), ("ridge_regression", ridge)])
pipeline.fit(x_train, y_train)
scores = cross_val_score(pipeline, x_test, y_test,
scoring="neg_mean_squared_error", cv=5)
polynomial_model_score = -scores.mean()
print('mean polynomial_model_score: ', polynomial_model_score)
return my_polynomial_model, polynomial_model_score

Binary file not shown.

After

Width:  |  Height:  |  Size: 194 KiB

View File

@ -0,0 +1,71 @@
import numpy as np
from matplotlib.colors import ListedColormap
from matplotlib.axes import Axes
from matplotlib import pyplot as plt
TRAIN_DATA_ROW_LENGTH = 3
TEST_DATA_ROW_LENGTH = 6
LINEAR_REGRESSION_PLOT_INDEX = 6
PERCEPTRON_REGRESSION_PLOT_INDEX = 7
RIDGE_POLY_REGRESSION_REGRESSION_PLOT_INDEX = 8
def show_plot(x, x_train, x_test, y_train, y_test, my_linear_model, linear_model_score, my_perceptron_model, perceptron_model_score, pipeline, polynomial_model_score):
h = .02 # шаг регулярной сетки
x0_min, x0_max = x[:, 0].min() - .5, x[:, 0].max() + .5
x1_min, x1_max = x[:, 1].min() - .5, x[:, 1].max() + .5
xx0, xx1 = np.meshgrid(np.arange(x0_min, x0_max, h),
np.arange(x1_min, x1_max, h))
cm = plt.cm.RdBu
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
for i in range(9):
current_subplot = plt.subplot(3, 3, i+1)
if i < TRAIN_DATA_ROW_LENGTH:
current_subplot.scatter(
x_train[:, 0], x_train[:, 1], c=y_train, cmap=cm_bright)
elif i < TEST_DATA_ROW_LENGTH:
current_subplot.scatter(
x_test[:, 0], x_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
else:
if i == LINEAR_REGRESSION_PLOT_INDEX:
show_gradient(my_linear_model, current_subplot=current_subplot,
title='LinearRegression', score=linear_model_score, xx0=xx0, xx1=xx1, cm=cm)
elif i == PERCEPTRON_REGRESSION_PLOT_INDEX:
show_gradient(my_perceptron_model, current_subplot=current_subplot,
title='Perceptron', score=perceptron_model_score, xx0=xx0, xx1=xx1, cm=cm)
elif i == RIDGE_POLY_REGRESSION_REGRESSION_PLOT_INDEX:
current_subplot.set_title('RidgePolyRegression')
show_gradient(pipeline, current_subplot=current_subplot,
title='RidgePolyRegression', score=polynomial_model_score, xx0=xx0, xx1=xx1, cm=cm)
current_subplot.scatter(
x_train[:, 0], x_train[:, 1], c=y_train, cmap=cm_bright)
current_subplot.scatter(
x_test[:, 0], x_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
plt.show()
def show_gradient(model, current_subplot: Axes, title: str, score: float, xx0, xx1, cm):
current_subplot.set_title(title)
if hasattr(model, "decision_function"):
Z = model.decision_function(np.c_[xx0.ravel(), xx1.ravel()])
elif hasattr(model, "predict_proba"):
Z = model.predict_proba(np.c_[xx0.ravel(), xx1.ravel()])[:, 1]
elif hasattr(model, "predict"):
Z = model.predict(np.c_[xx0.ravel(), xx1.ravel()])
else:
return
Z = Z.reshape(xx0.shape)
current_subplot.contourf(xx0, xx1, Z, cmap=cm, alpha=.8)
current_subplot.set_xlim(xx0.min(), xx0.max())
current_subplot.set_ylim(xx0.min(), xx1.max())
current_subplot.set_xticks(())
current_subplot.set_yticks(())
current_subplot.text(xx0.max() - .3, xx1.min() + .3, ('%.2f' % score),
size=15, horizontalalignment='left')