malkova_anastasia_lab_1 ready #120
44
malkova_anastasia_lab_1/README.md
Normal file
44
malkova_anastasia_lab_1/README.md
Normal file
@ -0,0 +1,44 @@
|
||||
# Лабораторная работа №1
|
||||
|
||||
> Работа с типовыми наборами данных и различными моделями
|
||||
|
||||
# Задание
|
||||
|
||||
Сгенерировать определённый тип данных, сравнить на нём разные модели и отобразить качество на графиках.
|
||||
|
||||
Данные: make_classification (n_samples=500, n_features=2, n_redundant=0, n_informative=2, random_state=rs, n_clusters_per_class=1)
|
||||
Модели:
|
||||
* Линейную регрессию
|
||||
* Персептрон
|
||||
* Гребневую полиномиальную регрессию (со степенью 3, alpha= 1.0)
|
||||
|
||||
### Как запустить лабораторную работу
|
||||
|
||||
1. Установить python, numpy, sklearn, matplotlib
|
||||
2. Запустить команду `python main.py` в корне проекта
|
||||
|
||||
### Использованные технологии
|
||||
|
||||
* Язык программирования `python`
|
||||
* Библиотеки `numpy, sklearn, matplotlib`
|
||||
* Среда разработки `PyCharm`
|
||||
|
||||
### Что делает программа?
|
||||
|
||||
Генерирует набор данных для классификации с помощью make_classification.
|
||||
Обучает на них 3 модели:
|
||||
|
||||
- Линейную регрессию
|
||||
- Персептрон
|
||||
- Гребневую полиномиальную регрессию (со степенью 3, alpha = 1.0)
|
||||
|
||||
Собирает итоговые оценки моделей:
|
||||
|
||||
- Линейная регрессия - коэффициент детерминации R2
|
||||
- Персептрон - средняя точность по заданным тестовым данным
|
||||
- Гребневая полиномиальная регрессия - Перекрёстная проверка
|
||||
|
||||
![plots screen](plots.jpg)
|
||||
|
||||
Лучший результат показала модель персептрона
|
||||
|
16
malkova_anastasia_lab_1/dataset.py
Normal file
16
malkova_anastasia_lab_1/dataset.py
Normal file
@ -0,0 +1,16 @@
|
||||
import numpy as np
|
||||
from sklearn.datasets import make_classification
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
|
||||
def generate_dataset():
|
||||
x, y = make_classification(n_samples=500, n_features=2, n_redundant=0,
|
||||
n_informative=2, random_state=0, n_clusters_per_class=1)
|
||||
random = np.random.RandomState(2)
|
||||
x += 2.5 * random.uniform(size=x.shape)
|
||||
return x, y
|
||||
|
||||
|
||||
def split_dataset(x, y):
|
||||
return train_test_split(
|
||||
x, y, test_size=.05, random_state=42)
|
19
malkova_anastasia_lab_1/main.py
Normal file
19
malkova_anastasia_lab_1/main.py
Normal file
@ -0,0 +1,19 @@
|
||||
from dataset import generate_dataset, split_dataset
|
||||
from models import launch_linear_regression, launch_perceptron, launch_ridge_poly_regression
|
||||
from plots import show_plot
|
||||
|
||||
x, y = generate_dataset()
|
||||
|
||||
x_train, x_test, y_train, y_test = split_dataset(x, y)
|
||||
|
||||
my_linear_model, linear_model_score = launch_linear_regression(
|
||||
x_train, x_test, y_train, y_test)
|
||||
my_perceptron_model, perceptron_model_score = launch_perceptron(
|
||||
x_train, x_test, y_train, y_test)
|
||||
my_polynomial_model, polynomial_model_score = launch_ridge_poly_regression(
|
||||
x_train, x_test, y_train, y_test)
|
||||
|
||||
show_plot(x, x_train, x_test, y_train, y_test,
|
||||
my_linear_model, linear_model_score,
|
||||
my_perceptron_model, perceptron_model_score,
|
||||
my_polynomial_model, polynomial_model_score)
|
37
malkova_anastasia_lab_1/models.py
Normal file
37
malkova_anastasia_lab_1/models.py
Normal file
@ -0,0 +1,37 @@
|
||||
from sklearn.linear_model import LinearRegression, Perceptron, Ridge
|
||||
from sklearn.preprocessing import PolynomialFeatures
|
||||
from sklearn.model_selection import cross_val_score
|
||||
from sklearn.pipeline import Pipeline
|
||||
|
||||
|
||||
def launch_linear_regression(x_train, x_test, y_train, y_test):
|
||||
my_linear_model = LinearRegression()
|
||||
my_linear_model.fit(x_train, y_train)
|
||||
linear_model_score = my_linear_model.score(
|
||||
x_test, y_test)
|
||||
print('linear_model_score: ', linear_model_score)
|
||||
return my_linear_model, linear_model_score
|
||||
|
||||
|
||||
# Perceptron
|
||||
def launch_perceptron(x_train, x_test, y_train, y_test):
|
||||
my_perceptron_model = Perceptron()
|
||||
my_perceptron_model.fit(x_train, y_train)
|
||||
perceptron_model_score = my_perceptron_model.score(
|
||||
x_test, y_test)
|
||||
print('perceptron_model_score: ', perceptron_model_score)
|
||||
return my_perceptron_model, perceptron_model_score
|
||||
|
||||
|
||||
# RidgePolyRegression
|
||||
def launch_ridge_poly_regression(x_train, x_test, y_train, y_test):
|
||||
my_polynomial_model = PolynomialFeatures(degree=3, include_bias=False)
|
||||
ridge = Ridge(alpha=1)
|
||||
pipeline = Pipeline(
|
||||
[("polynomial_features", my_polynomial_model), ("ridge_regression", ridge)])
|
||||
pipeline.fit(x_train, y_train)
|
||||
scores = cross_val_score(pipeline, x_test, y_test,
|
||||
scoring="neg_mean_squared_error", cv=5)
|
||||
polynomial_model_score = -scores.mean()
|
||||
print('mean polynomial_model_score: ', polynomial_model_score)
|
||||
return my_polynomial_model, polynomial_model_score
|
BIN
malkova_anastasia_lab_1/plots.jpg
Normal file
BIN
malkova_anastasia_lab_1/plots.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 194 KiB |
71
malkova_anastasia_lab_1/plots.py
Normal file
71
malkova_anastasia_lab_1/plots.py
Normal file
@ -0,0 +1,71 @@
|
||||
import numpy as np
|
||||
from matplotlib.colors import ListedColormap
|
||||
from matplotlib.axes import Axes
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
TRAIN_DATA_ROW_LENGTH = 3
|
||||
TEST_DATA_ROW_LENGTH = 6
|
||||
LINEAR_REGRESSION_PLOT_INDEX = 6
|
||||
PERCEPTRON_REGRESSION_PLOT_INDEX = 7
|
||||
RIDGE_POLY_REGRESSION_REGRESSION_PLOT_INDEX = 8
|
||||
|
||||
|
||||
def show_plot(x, x_train, x_test, y_train, y_test, my_linear_model, linear_model_score, my_perceptron_model, perceptron_model_score, pipeline, polynomial_model_score):
|
||||
h = .02 # шаг регулярной сетки
|
||||
x0_min, x0_max = x[:, 0].min() - .5, x[:, 0].max() + .5
|
||||
x1_min, x1_max = x[:, 1].min() - .5, x[:, 1].max() + .5
|
||||
xx0, xx1 = np.meshgrid(np.arange(x0_min, x0_max, h),
|
||||
np.arange(x1_min, x1_max, h))
|
||||
cm = plt.cm.RdBu
|
||||
|
||||
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
|
||||
|
||||
for i in range(9):
|
||||
current_subplot = plt.subplot(3, 3, i+1)
|
||||
if i < TRAIN_DATA_ROW_LENGTH:
|
||||
current_subplot.scatter(
|
||||
x_train[:, 0], x_train[:, 1], c=y_train, cmap=cm_bright)
|
||||
elif i < TEST_DATA_ROW_LENGTH:
|
||||
current_subplot.scatter(
|
||||
x_test[:, 0], x_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
|
||||
else:
|
||||
if i == LINEAR_REGRESSION_PLOT_INDEX:
|
||||
show_gradient(my_linear_model, current_subplot=current_subplot,
|
||||
title='LinearRegression', score=linear_model_score, xx0=xx0, xx1=xx1, cm=cm)
|
||||
|
||||
elif i == PERCEPTRON_REGRESSION_PLOT_INDEX:
|
||||
show_gradient(my_perceptron_model, current_subplot=current_subplot,
|
||||
title='Perceptron', score=perceptron_model_score, xx0=xx0, xx1=xx1, cm=cm)
|
||||
|
||||
elif i == RIDGE_POLY_REGRESSION_REGRESSION_PLOT_INDEX:
|
||||
current_subplot.set_title('RidgePolyRegression')
|
||||
show_gradient(pipeline, current_subplot=current_subplot,
|
||||
title='RidgePolyRegression', score=polynomial_model_score, xx0=xx0, xx1=xx1, cm=cm)
|
||||
|
||||
current_subplot.scatter(
|
||||
x_train[:, 0], x_train[:, 1], c=y_train, cmap=cm_bright)
|
||||
current_subplot.scatter(
|
||||
x_test[:, 0], x_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
|
||||
|
||||
plt.show()
|
||||
|
||||
|
||||
def show_gradient(model, current_subplot: Axes, title: str, score: float, xx0, xx1, cm):
|
||||
current_subplot.set_title(title)
|
||||
if hasattr(model, "decision_function"):
|
||||
Z = model.decision_function(np.c_[xx0.ravel(), xx1.ravel()])
|
||||
elif hasattr(model, "predict_proba"):
|
||||
Z = model.predict_proba(np.c_[xx0.ravel(), xx1.ravel()])[:, 1]
|
||||
elif hasattr(model, "predict"):
|
||||
Z = model.predict(np.c_[xx0.ravel(), xx1.ravel()])
|
||||
else:
|
||||
return
|
||||
|
||||
Z = Z.reshape(xx0.shape)
|
||||
current_subplot.contourf(xx0, xx1, Z, cmap=cm, alpha=.8)
|
||||
current_subplot.set_xlim(xx0.min(), xx0.max())
|
||||
current_subplot.set_ylim(xx0.min(), xx1.max())
|
||||
current_subplot.set_xticks(())
|
||||
current_subplot.set_yticks(())
|
||||
current_subplot.text(xx0.max() - .3, xx1.min() + .3, ('%.2f' % score),
|
||||
size=15, horizontalalignment='left')
|
Loading…
Reference in New Issue
Block a user