Merge pull request 'malkova_anastasia_lab_1 ready' (#120) from malkova_anastasia_lab_1 into main

Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/120
2023-11-06 21:08:34 +04:00 · 2023-11-06 21:08:34 +04:00 · 28056f94bd
commit 28056f94bd
parent 1aef95a6d9 08ed6413b9
6 changed files with 187 additions and 0 deletions
--- a/malkova_anastasia_lab_1/README.md
+++ b/malkova_anastasia_lab_1/README.md
@ -0,0 +1,44 @@
 # Лабораторная работа №1
 > Работа с типовыми наборами данных и различными моделями
 # Задание
 Сгенерировать определённый тип данных, сравнить на нём разные модели и отобразить качество на графиках.
 Данные: make_classification (n_samples=500, n_features=2, n_redundant=0, n_informative=2, random_state=rs, n_clusters_per_class=1) 
 Модели:
 * Линейную регрессию 
 * Персептрон 
 * Гребневую полиномиальную регрессию (со степенью 3, alpha= 1.0)
 ### Как запустить лабораторную работу
 1. Установить python, numpy, sklearn, matplotlib
 2. Запустить команду `python main.py` в корне проекта
 ### Использованные технологии
 * Язык программирования `python`
 * Библиотеки `numpy, sklearn, matplotlib`
 * Среда разработки `PyCharm`
 ### Что делает программа?
 Генерирует набор данных для классификации с помощью make_classification.
 Обучает на них 3 модели:
 - Линейную регрессию
 - Персептрон
 - Гребневую полиномиальную регрессию (со степенью 3, alpha = 1.0)
 Собирает итоговые оценки моделей:
 - Линейная регрессия - коэффициент детерминации R2
 - Персептрон - средняя точность по заданным тестовым данным
 - Гребневая полиномиальная регрессия - Перекрёстная проверка
 ![plots screen](plots.jpg)
 Лучший результат показала модель персептрона
--- a/malkova_anastasia_lab_1/dataset.py
+++ b/malkova_anastasia_lab_1/dataset.py
@ -0,0 +1,16 @@
 import numpy as np
 from sklearn.datasets import make_classification
 from sklearn.model_selection import train_test_split
 def generate_dataset():
    x, y = make_classification(n_samples=500, n_features=2, n_redundant=0,
                               n_informative=2, random_state=0, n_clusters_per_class=1)
    random = np.random.RandomState(2)
    x += 2.5 * random.uniform(size=x.shape)
    return x, y
 def split_dataset(x, y):
    return train_test_split(
        x, y, test_size=.05, random_state=42)
--- a/malkova_anastasia_lab_1/main.py
+++ b/malkova_anastasia_lab_1/main.py
@ -0,0 +1,19 @@
 from dataset import generate_dataset, split_dataset
 from models import launch_linear_regression, launch_perceptron, launch_ridge_poly_regression
 from plots import show_plot
 x, y = generate_dataset()
 x_train, x_test, y_train, y_test = split_dataset(x, y)
 my_linear_model, linear_model_score = launch_linear_regression(
    x_train, x_test, y_train, y_test)
 my_perceptron_model, perceptron_model_score = launch_perceptron(
    x_train, x_test, y_train, y_test)
 my_polynomial_model, polynomial_model_score = launch_ridge_poly_regression(
    x_train, x_test, y_train, y_test)
 show_plot(x, x_train, x_test, y_train, y_test,
          my_linear_model, linear_model_score,
          my_perceptron_model, perceptron_model_score,
          my_polynomial_model, polynomial_model_score)
--- a/malkova_anastasia_lab_1/models.py
+++ b/malkova_anastasia_lab_1/models.py
@ -0,0 +1,37 @@
 from sklearn.linear_model import LinearRegression, Perceptron, Ridge
 from sklearn.preprocessing import PolynomialFeatures
 from sklearn.model_selection import cross_val_score
 from sklearn.pipeline import Pipeline
 def launch_linear_regression(x_train, x_test, y_train, y_test):
    my_linear_model = LinearRegression()
    my_linear_model.fit(x_train, y_train)
    linear_model_score = my_linear_model.score(
        x_test, y_test)
    print('linear_model_score: ', linear_model_score)
    return my_linear_model, linear_model_score
 # Perceptron
 def launch_perceptron(x_train, x_test, y_train, y_test):
    my_perceptron_model = Perceptron()
    my_perceptron_model.fit(x_train, y_train)
    perceptron_model_score = my_perceptron_model.score(
        x_test, y_test)
    print('perceptron_model_score: ', perceptron_model_score)
    return my_perceptron_model, perceptron_model_score
 # RidgePolyRegression
 def launch_ridge_poly_regression(x_train, x_test, y_train, y_test):
    my_polynomial_model = PolynomialFeatures(degree=3, include_bias=False)
    ridge = Ridge(alpha=1)
    pipeline = Pipeline(
        [("polynomial_features", my_polynomial_model), ("ridge_regression", ridge)])
    pipeline.fit(x_train, y_train)
    scores = cross_val_score(pipeline, x_test, y_test,
                             scoring="neg_mean_squared_error", cv=5)
    polynomial_model_score = -scores.mean()
    print('mean polynomial_model_score: ', polynomial_model_score)
    return my_polynomial_model, polynomial_model_score
--- a/malkova_anastasia_lab_1/plots.jpg
+++ b/malkova_anastasia_lab_1/plots.jpg
--- a/malkova_anastasia_lab_1/plots.py
+++ b/malkova_anastasia_lab_1/plots.py
@ -0,0 +1,71 @@
 import numpy as np
 from matplotlib.colors import ListedColormap
 from matplotlib.axes import Axes
 from matplotlib import pyplot as plt
 TRAIN_DATA_ROW_LENGTH = 3
 TEST_DATA_ROW_LENGTH = 6
 LINEAR_REGRESSION_PLOT_INDEX = 6
 PERCEPTRON_REGRESSION_PLOT_INDEX = 7
 RIDGE_POLY_REGRESSION_REGRESSION_PLOT_INDEX = 8
 def show_plot(x, x_train, x_test, y_train, y_test, my_linear_model, linear_model_score, my_perceptron_model, perceptron_model_score, pipeline, polynomial_model_score):
    h = .02  # шаг регулярной сетки
    x0_min, x0_max = x[:, 0].min() - .5, x[:, 0].max() + .5
    x1_min, x1_max = x[:, 1].min() - .5, x[:, 1].max() + .5
    xx0, xx1 = np.meshgrid(np.arange(x0_min, x0_max, h),
                           np.arange(x1_min, x1_max, h))
    cm = plt.cm.RdBu
    cm_bright = ListedColormap(['#FF0000', '#0000FF'])
    for i in range(9):
        current_subplot = plt.subplot(3, 3, i+1)
        if i < TRAIN_DATA_ROW_LENGTH:
            current_subplot.scatter(
                x_train[:, 0], x_train[:, 1], c=y_train, cmap=cm_bright)
        elif i < TEST_DATA_ROW_LENGTH:
            current_subplot.scatter(
                x_test[:, 0], x_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
        else:
            if i == LINEAR_REGRESSION_PLOT_INDEX:
                show_gradient(my_linear_model, current_subplot=current_subplot,
                              title='LinearRegression', score=linear_model_score, xx0=xx0, xx1=xx1, cm=cm)
            elif i == PERCEPTRON_REGRESSION_PLOT_INDEX:
                show_gradient(my_perceptron_model, current_subplot=current_subplot,
                              title='Perceptron', score=perceptron_model_score, xx0=xx0, xx1=xx1, cm=cm)
            elif i == RIDGE_POLY_REGRESSION_REGRESSION_PLOT_INDEX:
                current_subplot.set_title('RidgePolyRegression')
                show_gradient(pipeline, current_subplot=current_subplot,
                              title='RidgePolyRegression', score=polynomial_model_score, xx0=xx0, xx1=xx1, cm=cm)
            current_subplot.scatter(
                x_train[:, 0], x_train[:, 1], c=y_train, cmap=cm_bright)
            current_subplot.scatter(
                x_test[:, 0], x_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
    plt.show()
 def show_gradient(model, current_subplot: Axes, title: str, score: float, xx0, xx1, cm):
    current_subplot.set_title(title)
    if hasattr(model, "decision_function"):
        Z = model.decision_function(np.c_[xx0.ravel(), xx1.ravel()])
    elif hasattr(model, "predict_proba"):
        Z = model.predict_proba(np.c_[xx0.ravel(), xx1.ravel()])[:, 1]
    elif hasattr(model, "predict"):
        Z = model.predict(np.c_[xx0.ravel(), xx1.ravel()])
    else:
        return
    Z = Z.reshape(xx0.shape)
    current_subplot.contourf(xx0, xx1, Z, cmap=cm, alpha=.8)
    current_subplot.set_xlim(xx0.min(), xx0.max())
    current_subplot.set_ylim(xx0.min(), xx1.max())
    current_subplot.set_xticks(())
    current_subplot.set_yticks(())
    current_subplot.text(xx0.max() - .3, xx1.min() + .3, ('%.2f' % score),
                         size=15, horizontalalignment='left')