diff --git a/malkova_anastasia_lab_1/README.md b/malkova_anastasia_lab_1/README.md new file mode 100644 index 0000000..a0f26a5 --- /dev/null +++ b/malkova_anastasia_lab_1/README.md @@ -0,0 +1,44 @@ +# Лабораторная работа №1 + +> Работа с типовыми наборами данных и различными моделями + +# Задание + +Сгенерировать определённый тип данных, сравнить на нём разные модели и отобразить качество на графиках. + +Данные: make_classification (n_samples=500, n_features=2, n_redundant=0, n_informative=2, random_state=rs, n_clusters_per_class=1) +Модели: +* Линейную регрессию +* Персептрон +* Гребневую полиномиальную регрессию (со степенью 3, alpha= 1.0) + +### Как запустить лабораторную работу + +1. Установить python, numpy, sklearn, matplotlib +2. Запустить команду `python main.py` в корне проекта + +### Использованные технологии + +* Язык программирования `python` +* Библиотеки `numpy, sklearn, matplotlib` +* Среда разработки `PyCharm` + +### Что делает программа? + +Генерирует набор данных для классификации с помощью make_classification. +Обучает на них 3 модели: + +- Линейную регрессию +- Персептрон +- Гребневую полиномиальную регрессию (со степенью 3, alpha = 1.0) + +Собирает итоговые оценки моделей: + +- Линейная регрессия - коэффициент детерминации R2 +- Персептрон - средняя точность по заданным тестовым данным +- Гребневая полиномиальная регрессия - Перекрёстная проверка + +![plots screen](plots.jpg) + +Лучший результат показала модель персептрона + diff --git a/malkova_anastasia_lab_1/dataset.py b/malkova_anastasia_lab_1/dataset.py new file mode 100644 index 0000000..547d726 --- /dev/null +++ b/malkova_anastasia_lab_1/dataset.py @@ -0,0 +1,16 @@ +import numpy as np +from sklearn.datasets import make_classification +from sklearn.model_selection import train_test_split + + +def generate_dataset(): + x, y = make_classification(n_samples=500, n_features=2, n_redundant=0, + n_informative=2, random_state=0, n_clusters_per_class=1) + random = np.random.RandomState(2) + x += 2.5 * random.uniform(size=x.shape) + return x, y + + +def split_dataset(x, y): + return train_test_split( + x, y, test_size=.05, random_state=42) diff --git a/malkova_anastasia_lab_1/main.py b/malkova_anastasia_lab_1/main.py new file mode 100644 index 0000000..0e12982 --- /dev/null +++ b/malkova_anastasia_lab_1/main.py @@ -0,0 +1,19 @@ +from dataset import generate_dataset, split_dataset +from models import launch_linear_regression, launch_perceptron, launch_ridge_poly_regression +from plots import show_plot + +x, y = generate_dataset() + +x_train, x_test, y_train, y_test = split_dataset(x, y) + +my_linear_model, linear_model_score = launch_linear_regression( + x_train, x_test, y_train, y_test) +my_perceptron_model, perceptron_model_score = launch_perceptron( + x_train, x_test, y_train, y_test) +my_polynomial_model, polynomial_model_score = launch_ridge_poly_regression( + x_train, x_test, y_train, y_test) + +show_plot(x, x_train, x_test, y_train, y_test, + my_linear_model, linear_model_score, + my_perceptron_model, perceptron_model_score, + my_polynomial_model, polynomial_model_score) diff --git a/malkova_anastasia_lab_1/models.py b/malkova_anastasia_lab_1/models.py new file mode 100644 index 0000000..97fa9ec --- /dev/null +++ b/malkova_anastasia_lab_1/models.py @@ -0,0 +1,37 @@ +from sklearn.linear_model import LinearRegression, Perceptron, Ridge +from sklearn.preprocessing import PolynomialFeatures +from sklearn.model_selection import cross_val_score +from sklearn.pipeline import Pipeline + + +def launch_linear_regression(x_train, x_test, y_train, y_test): + my_linear_model = LinearRegression() + my_linear_model.fit(x_train, y_train) + linear_model_score = my_linear_model.score( + x_test, y_test) + print('linear_model_score: ', linear_model_score) + return my_linear_model, linear_model_score + + +# Perceptron +def launch_perceptron(x_train, x_test, y_train, y_test): + my_perceptron_model = Perceptron() + my_perceptron_model.fit(x_train, y_train) + perceptron_model_score = my_perceptron_model.score( + x_test, y_test) + print('perceptron_model_score: ', perceptron_model_score) + return my_perceptron_model, perceptron_model_score + + +# RidgePolyRegression +def launch_ridge_poly_regression(x_train, x_test, y_train, y_test): + my_polynomial_model = PolynomialFeatures(degree=3, include_bias=False) + ridge = Ridge(alpha=1) + pipeline = Pipeline( + [("polynomial_features", my_polynomial_model), ("ridge_regression", ridge)]) + pipeline.fit(x_train, y_train) + scores = cross_val_score(pipeline, x_test, y_test, + scoring="neg_mean_squared_error", cv=5) + polynomial_model_score = -scores.mean() + print('mean polynomial_model_score: ', polynomial_model_score) + return my_polynomial_model, polynomial_model_score diff --git a/malkova_anastasia_lab_1/plots.jpg b/malkova_anastasia_lab_1/plots.jpg new file mode 100644 index 0000000..0d4f55a Binary files /dev/null and b/malkova_anastasia_lab_1/plots.jpg differ diff --git a/malkova_anastasia_lab_1/plots.py b/malkova_anastasia_lab_1/plots.py new file mode 100644 index 0000000..5acd136 --- /dev/null +++ b/malkova_anastasia_lab_1/plots.py @@ -0,0 +1,71 @@ +import numpy as np +from matplotlib.colors import ListedColormap +from matplotlib.axes import Axes +from matplotlib import pyplot as plt + +TRAIN_DATA_ROW_LENGTH = 3 +TEST_DATA_ROW_LENGTH = 6 +LINEAR_REGRESSION_PLOT_INDEX = 6 +PERCEPTRON_REGRESSION_PLOT_INDEX = 7 +RIDGE_POLY_REGRESSION_REGRESSION_PLOT_INDEX = 8 + + +def show_plot(x, x_train, x_test, y_train, y_test, my_linear_model, linear_model_score, my_perceptron_model, perceptron_model_score, pipeline, polynomial_model_score): + h = .02 # шаг регулярной сетки + x0_min, x0_max = x[:, 0].min() - .5, x[:, 0].max() + .5 + x1_min, x1_max = x[:, 1].min() - .5, x[:, 1].max() + .5 + xx0, xx1 = np.meshgrid(np.arange(x0_min, x0_max, h), + np.arange(x1_min, x1_max, h)) + cm = plt.cm.RdBu + + cm_bright = ListedColormap(['#FF0000', '#0000FF']) + + for i in range(9): + current_subplot = plt.subplot(3, 3, i+1) + if i < TRAIN_DATA_ROW_LENGTH: + current_subplot.scatter( + x_train[:, 0], x_train[:, 1], c=y_train, cmap=cm_bright) + elif i < TEST_DATA_ROW_LENGTH: + current_subplot.scatter( + x_test[:, 0], x_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6) + else: + if i == LINEAR_REGRESSION_PLOT_INDEX: + show_gradient(my_linear_model, current_subplot=current_subplot, + title='LinearRegression', score=linear_model_score, xx0=xx0, xx1=xx1, cm=cm) + + elif i == PERCEPTRON_REGRESSION_PLOT_INDEX: + show_gradient(my_perceptron_model, current_subplot=current_subplot, + title='Perceptron', score=perceptron_model_score, xx0=xx0, xx1=xx1, cm=cm) + + elif i == RIDGE_POLY_REGRESSION_REGRESSION_PLOT_INDEX: + current_subplot.set_title('RidgePolyRegression') + show_gradient(pipeline, current_subplot=current_subplot, + title='RidgePolyRegression', score=polynomial_model_score, xx0=xx0, xx1=xx1, cm=cm) + + current_subplot.scatter( + x_train[:, 0], x_train[:, 1], c=y_train, cmap=cm_bright) + current_subplot.scatter( + x_test[:, 0], x_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6) + + plt.show() + + +def show_gradient(model, current_subplot: Axes, title: str, score: float, xx0, xx1, cm): + current_subplot.set_title(title) + if hasattr(model, "decision_function"): + Z = model.decision_function(np.c_[xx0.ravel(), xx1.ravel()]) + elif hasattr(model, "predict_proba"): + Z = model.predict_proba(np.c_[xx0.ravel(), xx1.ravel()])[:, 1] + elif hasattr(model, "predict"): + Z = model.predict(np.c_[xx0.ravel(), xx1.ravel()]) + else: + return + + Z = Z.reshape(xx0.shape) + current_subplot.contourf(xx0, xx1, Z, cmap=cm, alpha=.8) + current_subplot.set_xlim(xx0.min(), xx0.max()) + current_subplot.set_ylim(xx0.min(), xx1.max()) + current_subplot.set_xticks(()) + current_subplot.set_yticks(()) + current_subplot.text(xx0.max() - .3, xx1.min() + .3, ('%.2f' % score), + size=15, horizontalalignment='left')