Merge pull request 'malkova_anastasia_lab_1 ready' (#120) from malkova_anastasia_lab_1 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/120
This commit is contained in:
commit
28056f94bd
44
malkova_anastasia_lab_1/README.md
Normal file
44
malkova_anastasia_lab_1/README.md
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
# Лабораторная работа №1
|
||||||
|
|
||||||
|
> Работа с типовыми наборами данных и различными моделями
|
||||||
|
|
||||||
|
# Задание
|
||||||
|
|
||||||
|
Сгенерировать определённый тип данных, сравнить на нём разные модели и отобразить качество на графиках.
|
||||||
|
|
||||||
|
Данные: make_classification (n_samples=500, n_features=2, n_redundant=0, n_informative=2, random_state=rs, n_clusters_per_class=1)
|
||||||
|
Модели:
|
||||||
|
* Линейную регрессию
|
||||||
|
* Персептрон
|
||||||
|
* Гребневую полиномиальную регрессию (со степенью 3, alpha= 1.0)
|
||||||
|
|
||||||
|
### Как запустить лабораторную работу
|
||||||
|
|
||||||
|
1. Установить python, numpy, sklearn, matplotlib
|
||||||
|
2. Запустить команду `python main.py` в корне проекта
|
||||||
|
|
||||||
|
### Использованные технологии
|
||||||
|
|
||||||
|
* Язык программирования `python`
|
||||||
|
* Библиотеки `numpy, sklearn, matplotlib`
|
||||||
|
* Среда разработки `PyCharm`
|
||||||
|
|
||||||
|
### Что делает программа?
|
||||||
|
|
||||||
|
Генерирует набор данных для классификации с помощью make_classification.
|
||||||
|
Обучает на них 3 модели:
|
||||||
|
|
||||||
|
- Линейную регрессию
|
||||||
|
- Персептрон
|
||||||
|
- Гребневую полиномиальную регрессию (со степенью 3, alpha = 1.0)
|
||||||
|
|
||||||
|
Собирает итоговые оценки моделей:
|
||||||
|
|
||||||
|
- Линейная регрессия - коэффициент детерминации R2
|
||||||
|
- Персептрон - средняя точность по заданным тестовым данным
|
||||||
|
- Гребневая полиномиальная регрессия - Перекрёстная проверка
|
||||||
|
|
||||||
|
![plots screen](plots.jpg)
|
||||||
|
|
||||||
|
Лучший результат показала модель персептрона
|
||||||
|
|
16
malkova_anastasia_lab_1/dataset.py
Normal file
16
malkova_anastasia_lab_1/dataset.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
import numpy as np
|
||||||
|
from sklearn.datasets import make_classification
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
|
||||||
|
def generate_dataset():
|
||||||
|
x, y = make_classification(n_samples=500, n_features=2, n_redundant=0,
|
||||||
|
n_informative=2, random_state=0, n_clusters_per_class=1)
|
||||||
|
random = np.random.RandomState(2)
|
||||||
|
x += 2.5 * random.uniform(size=x.shape)
|
||||||
|
return x, y
|
||||||
|
|
||||||
|
|
||||||
|
def split_dataset(x, y):
|
||||||
|
return train_test_split(
|
||||||
|
x, y, test_size=.05, random_state=42)
|
19
malkova_anastasia_lab_1/main.py
Normal file
19
malkova_anastasia_lab_1/main.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
from dataset import generate_dataset, split_dataset
|
||||||
|
from models import launch_linear_regression, launch_perceptron, launch_ridge_poly_regression
|
||||||
|
from plots import show_plot
|
||||||
|
|
||||||
|
x, y = generate_dataset()
|
||||||
|
|
||||||
|
x_train, x_test, y_train, y_test = split_dataset(x, y)
|
||||||
|
|
||||||
|
my_linear_model, linear_model_score = launch_linear_regression(
|
||||||
|
x_train, x_test, y_train, y_test)
|
||||||
|
my_perceptron_model, perceptron_model_score = launch_perceptron(
|
||||||
|
x_train, x_test, y_train, y_test)
|
||||||
|
my_polynomial_model, polynomial_model_score = launch_ridge_poly_regression(
|
||||||
|
x_train, x_test, y_train, y_test)
|
||||||
|
|
||||||
|
show_plot(x, x_train, x_test, y_train, y_test,
|
||||||
|
my_linear_model, linear_model_score,
|
||||||
|
my_perceptron_model, perceptron_model_score,
|
||||||
|
my_polynomial_model, polynomial_model_score)
|
37
malkova_anastasia_lab_1/models.py
Normal file
37
malkova_anastasia_lab_1/models.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
from sklearn.linear_model import LinearRegression, Perceptron, Ridge
|
||||||
|
from sklearn.preprocessing import PolynomialFeatures
|
||||||
|
from sklearn.model_selection import cross_val_score
|
||||||
|
from sklearn.pipeline import Pipeline
|
||||||
|
|
||||||
|
|
||||||
|
def launch_linear_regression(x_train, x_test, y_train, y_test):
|
||||||
|
my_linear_model = LinearRegression()
|
||||||
|
my_linear_model.fit(x_train, y_train)
|
||||||
|
linear_model_score = my_linear_model.score(
|
||||||
|
x_test, y_test)
|
||||||
|
print('linear_model_score: ', linear_model_score)
|
||||||
|
return my_linear_model, linear_model_score
|
||||||
|
|
||||||
|
|
||||||
|
# Perceptron
|
||||||
|
def launch_perceptron(x_train, x_test, y_train, y_test):
|
||||||
|
my_perceptron_model = Perceptron()
|
||||||
|
my_perceptron_model.fit(x_train, y_train)
|
||||||
|
perceptron_model_score = my_perceptron_model.score(
|
||||||
|
x_test, y_test)
|
||||||
|
print('perceptron_model_score: ', perceptron_model_score)
|
||||||
|
return my_perceptron_model, perceptron_model_score
|
||||||
|
|
||||||
|
|
||||||
|
# RidgePolyRegression
|
||||||
|
def launch_ridge_poly_regression(x_train, x_test, y_train, y_test):
|
||||||
|
my_polynomial_model = PolynomialFeatures(degree=3, include_bias=False)
|
||||||
|
ridge = Ridge(alpha=1)
|
||||||
|
pipeline = Pipeline(
|
||||||
|
[("polynomial_features", my_polynomial_model), ("ridge_regression", ridge)])
|
||||||
|
pipeline.fit(x_train, y_train)
|
||||||
|
scores = cross_val_score(pipeline, x_test, y_test,
|
||||||
|
scoring="neg_mean_squared_error", cv=5)
|
||||||
|
polynomial_model_score = -scores.mean()
|
||||||
|
print('mean polynomial_model_score: ', polynomial_model_score)
|
||||||
|
return my_polynomial_model, polynomial_model_score
|
BIN
malkova_anastasia_lab_1/plots.jpg
Normal file
BIN
malkova_anastasia_lab_1/plots.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 194 KiB |
71
malkova_anastasia_lab_1/plots.py
Normal file
71
malkova_anastasia_lab_1/plots.py
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
import numpy as np
|
||||||
|
from matplotlib.colors import ListedColormap
|
||||||
|
from matplotlib.axes import Axes
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
TRAIN_DATA_ROW_LENGTH = 3
|
||||||
|
TEST_DATA_ROW_LENGTH = 6
|
||||||
|
LINEAR_REGRESSION_PLOT_INDEX = 6
|
||||||
|
PERCEPTRON_REGRESSION_PLOT_INDEX = 7
|
||||||
|
RIDGE_POLY_REGRESSION_REGRESSION_PLOT_INDEX = 8
|
||||||
|
|
||||||
|
|
||||||
|
def show_plot(x, x_train, x_test, y_train, y_test, my_linear_model, linear_model_score, my_perceptron_model, perceptron_model_score, pipeline, polynomial_model_score):
|
||||||
|
h = .02 # шаг регулярной сетки
|
||||||
|
x0_min, x0_max = x[:, 0].min() - .5, x[:, 0].max() + .5
|
||||||
|
x1_min, x1_max = x[:, 1].min() - .5, x[:, 1].max() + .5
|
||||||
|
xx0, xx1 = np.meshgrid(np.arange(x0_min, x0_max, h),
|
||||||
|
np.arange(x1_min, x1_max, h))
|
||||||
|
cm = plt.cm.RdBu
|
||||||
|
|
||||||
|
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
|
||||||
|
|
||||||
|
for i in range(9):
|
||||||
|
current_subplot = plt.subplot(3, 3, i+1)
|
||||||
|
if i < TRAIN_DATA_ROW_LENGTH:
|
||||||
|
current_subplot.scatter(
|
||||||
|
x_train[:, 0], x_train[:, 1], c=y_train, cmap=cm_bright)
|
||||||
|
elif i < TEST_DATA_ROW_LENGTH:
|
||||||
|
current_subplot.scatter(
|
||||||
|
x_test[:, 0], x_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
|
||||||
|
else:
|
||||||
|
if i == LINEAR_REGRESSION_PLOT_INDEX:
|
||||||
|
show_gradient(my_linear_model, current_subplot=current_subplot,
|
||||||
|
title='LinearRegression', score=linear_model_score, xx0=xx0, xx1=xx1, cm=cm)
|
||||||
|
|
||||||
|
elif i == PERCEPTRON_REGRESSION_PLOT_INDEX:
|
||||||
|
show_gradient(my_perceptron_model, current_subplot=current_subplot,
|
||||||
|
title='Perceptron', score=perceptron_model_score, xx0=xx0, xx1=xx1, cm=cm)
|
||||||
|
|
||||||
|
elif i == RIDGE_POLY_REGRESSION_REGRESSION_PLOT_INDEX:
|
||||||
|
current_subplot.set_title('RidgePolyRegression')
|
||||||
|
show_gradient(pipeline, current_subplot=current_subplot,
|
||||||
|
title='RidgePolyRegression', score=polynomial_model_score, xx0=xx0, xx1=xx1, cm=cm)
|
||||||
|
|
||||||
|
current_subplot.scatter(
|
||||||
|
x_train[:, 0], x_train[:, 1], c=y_train, cmap=cm_bright)
|
||||||
|
current_subplot.scatter(
|
||||||
|
x_test[:, 0], x_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
def show_gradient(model, current_subplot: Axes, title: str, score: float, xx0, xx1, cm):
|
||||||
|
current_subplot.set_title(title)
|
||||||
|
if hasattr(model, "decision_function"):
|
||||||
|
Z = model.decision_function(np.c_[xx0.ravel(), xx1.ravel()])
|
||||||
|
elif hasattr(model, "predict_proba"):
|
||||||
|
Z = model.predict_proba(np.c_[xx0.ravel(), xx1.ravel()])[:, 1]
|
||||||
|
elif hasattr(model, "predict"):
|
||||||
|
Z = model.predict(np.c_[xx0.ravel(), xx1.ravel()])
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
|
||||||
|
Z = Z.reshape(xx0.shape)
|
||||||
|
current_subplot.contourf(xx0, xx1, Z, cmap=cm, alpha=.8)
|
||||||
|
current_subplot.set_xlim(xx0.min(), xx0.max())
|
||||||
|
current_subplot.set_ylim(xx0.min(), xx1.max())
|
||||||
|
current_subplot.set_xticks(())
|
||||||
|
current_subplot.set_yticks(())
|
||||||
|
current_subplot.text(xx0.max() - .3, xx1.min() + .3, ('%.2f' % score),
|
||||||
|
size=15, horizontalalignment='left')
|
Loading…
Reference in New Issue
Block a user