lab1 ready

2023-11-01 23:53:45 +04:00 · 2023-11-01 23:53:45 +04:00 · 1f35af8f8f
commit 1f35af8f8f
parent 06116369e5
6 changed files with 187 additions and 0 deletions
--- a/malkova_anastasia_lab_1/README.md
+++ b/malkova_anastasia_lab_1/README.md
@ -0,0 +1,44 @@
+# Лабораторная работа №1
+
+> Работа с типовыми наборами данных и различными моделями
+
+# Задание
+
+Сгенерировать определённый тип данных, сравнить на нём разные модели и отобразить качество на графиках.
+
+Данные: make_classification (n_samples=500, n_features=2, n_redundant=0, n_informative=2, random_state=rs, n_clusters_per_class=1) 
+Модели:
+* Линейную регрессию 
+* Персептрон 
+* Гребневую полиномиальную регрессию (со степенью 3, alpha= 1.0)
+
+### Как запустить лабораторную работу
+
+1. Установить python, numpy, sklearn, matplotlib
+2. Запустить команду `python main.py` в корне проекта
+
+### Использованные технологии
+
+* Язык программирования `python`
+* Библиотеки `numpy, sklearn, matplotlib`
+* Среда разработки `PyCharm`
+
+### Что делает программа?
+
+Генерирует набор данных для классификации с помощью make_classification.
+Обучает на них 3 модели:
+
+- Линейную регрессию
+- Персептрон
+- Гребневую полиномиальную регрессию (со степенью 3, alpha = 1.0)
+
+Собирает итоговые оценки моделей:
+
+- Линейная регрессия - коэффициент детерминации R2
+- Персептрон - средняя точность по заданным тестовым данным
+- Гребневая полиномиальная регрессия - Перекрёстная проверка
+
+![plots screen](plots.jpg)
+
+Лучший результат показала модель персептрона
+
--- a/malkova_anastasia_lab_1/dataset.py
+++ b/malkova_anastasia_lab_1/dataset.py
@ -0,0 +1,16 @@
+import numpy as np
+from sklearn.datasets import make_classification
+from sklearn.model_selection import train_test_split
+
+
+def generate_dataset():
+    x, y = make_classification(n_samples=500, n_features=2, n_redundant=0,
+                               n_informative=2, random_state=0, n_clusters_per_class=1)
+    random = np.random.RandomState(2)
+    x += 2.5 * random.uniform(size=x.shape)
+    return x, y
+
+
+def split_dataset(x, y):
+    return train_test_split(
+        x, y, test_size=.05, random_state=42)
--- a/malkova_anastasia_lab_1/main.py
+++ b/malkova_anastasia_lab_1/main.py
@ -0,0 +1,19 @@
+from dataset import generate_dataset, split_dataset
+from models import launch_linear_regression, launch_perceptron, launch_ridge_poly_regression
+from plots import show_plot
+
+x, y = generate_dataset()
+
+x_train, x_test, y_train, y_test = split_dataset(x, y)
+
+my_linear_model, linear_model_score = launch_linear_regression(
+    x_train, x_test, y_train, y_test)
+my_perceptron_model, perceptron_model_score = launch_perceptron(
+    x_train, x_test, y_train, y_test)
+my_polynomial_model, polynomial_model_score = launch_ridge_poly_regression(
+    x_train, x_test, y_train, y_test)
+
+show_plot(x, x_train, x_test, y_train, y_test,
+          my_linear_model, linear_model_score,
+          my_perceptron_model, perceptron_model_score,
+          my_polynomial_model, polynomial_model_score)
--- a/malkova_anastasia_lab_1/models.py
+++ b/malkova_anastasia_lab_1/models.py
@ -0,0 +1,37 @@
+from sklearn.linear_model import LinearRegression, Perceptron, Ridge
+from sklearn.preprocessing import PolynomialFeatures
+from sklearn.model_selection import cross_val_score
+from sklearn.pipeline import Pipeline
+
+
+def launch_linear_regression(x_train, x_test, y_train, y_test):
+    my_linear_model = LinearRegression()
+    my_linear_model.fit(x_train, y_train)
+    linear_model_score = my_linear_model.score(
+        x_test, y_test)
+    print('linear_model_score: ', linear_model_score)
+    return my_linear_model, linear_model_score
+
+
+# Perceptron
+def launch_perceptron(x_train, x_test, y_train, y_test):
+    my_perceptron_model = Perceptron()
+    my_perceptron_model.fit(x_train, y_train)
+    perceptron_model_score = my_perceptron_model.score(
+        x_test, y_test)
+    print('perceptron_model_score: ', perceptron_model_score)
+    return my_perceptron_model, perceptron_model_score
+
+
+# RidgePolyRegression
+def launch_ridge_poly_regression(x_train, x_test, y_train, y_test):
+    my_polynomial_model = PolynomialFeatures(degree=3, include_bias=False)
+    ridge = Ridge(alpha=1)
+    pipeline = Pipeline(
+        [("polynomial_features", my_polynomial_model), ("ridge_regression", ridge)])
+    pipeline.fit(x_train, y_train)
+    scores = cross_val_score(pipeline, x_test, y_test,
+                             scoring="neg_mean_squared_error", cv=5)
+    polynomial_model_score = -scores.mean()
+    print('mean polynomial_model_score: ', polynomial_model_score)
+    return my_polynomial_model, polynomial_model_score
--- a/malkova_anastasia_lab_1/plots.jpg
+++ b/malkova_anastasia_lab_1/plots.jpg
--- a/malkova_anastasia_lab_1/plots.py
+++ b/malkova_anastasia_lab_1/plots.py
@ -0,0 +1,71 @@
+import numpy as np
+from matplotlib.colors import ListedColormap
+from matplotlib.axes import Axes
+from matplotlib import pyplot as plt
+
+TRAIN_DATA_ROW_LENGTH = 3
+TEST_DATA_ROW_LENGTH = 6
+LINEAR_REGRESSION_PLOT_INDEX = 6
+PERCEPTRON_REGRESSION_PLOT_INDEX = 7
+RIDGE_POLY_REGRESSION_REGRESSION_PLOT_INDEX = 8
+
+
+def show_plot(x, x_train, x_test, y_train, y_test, my_linear_model, linear_model_score, my_perceptron_model, perceptron_model_score, pipeline, polynomial_model_score):
+    h = .02  # шаг регулярной сетки
+    x0_min, x0_max = x[:, 0].min() - .5, x[:, 0].max() + .5
+    x1_min, x1_max = x[:, 1].min() - .5, x[:, 1].max() + .5
+    xx0, xx1 = np.meshgrid(np.arange(x0_min, x0_max, h),
+                           np.arange(x1_min, x1_max, h))
+    cm = plt.cm.RdBu
+
+    cm_bright = ListedColormap(['#FF0000', '#0000FF'])
+
+    for i in range(9):
+        current_subplot = plt.subplot(3, 3, i+1)
+        if i < TRAIN_DATA_ROW_LENGTH:
+            current_subplot.scatter(
+                x_train[:, 0], x_train[:, 1], c=y_train, cmap=cm_bright)
+        elif i < TEST_DATA_ROW_LENGTH:
+            current_subplot.scatter(
+                x_test[:, 0], x_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
+        else:
+            if i == LINEAR_REGRESSION_PLOT_INDEX:
+                show_gradient(my_linear_model, current_subplot=current_subplot,
+                              title='LinearRegression', score=linear_model_score, xx0=xx0, xx1=xx1, cm=cm)
+
+            elif i == PERCEPTRON_REGRESSION_PLOT_INDEX:
+                show_gradient(my_perceptron_model, current_subplot=current_subplot,
+                              title='Perceptron', score=perceptron_model_score, xx0=xx0, xx1=xx1, cm=cm)
+
+            elif i == RIDGE_POLY_REGRESSION_REGRESSION_PLOT_INDEX:
+                current_subplot.set_title('RidgePolyRegression')
+                show_gradient(pipeline, current_subplot=current_subplot,
+                              title='RidgePolyRegression', score=polynomial_model_score, xx0=xx0, xx1=xx1, cm=cm)
+
+            current_subplot.scatter(
+                x_train[:, 0], x_train[:, 1], c=y_train, cmap=cm_bright)
+            current_subplot.scatter(
+                x_test[:, 0], x_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
+
+    plt.show()
+
+
+def show_gradient(model, current_subplot: Axes, title: str, score: float, xx0, xx1, cm):
+    current_subplot.set_title(title)
+    if hasattr(model, "decision_function"):
+        Z = model.decision_function(np.c_[xx0.ravel(), xx1.ravel()])
+    elif hasattr(model, "predict_proba"):
+        Z = model.predict_proba(np.c_[xx0.ravel(), xx1.ravel()])[:, 1]
+    elif hasattr(model, "predict"):
+        Z = model.predict(np.c_[xx0.ravel(), xx1.ravel()])
+    else:
+        return
+
+    Z = Z.reshape(xx0.shape)
+    current_subplot.contourf(xx0, xx1, Z, cmap=cm, alpha=.8)
+    current_subplot.set_xlim(xx0.min(), xx0.max())
+    current_subplot.set_ylim(xx0.min(), xx1.max())
+    current_subplot.set_xticks(())
+    current_subplot.set_yticks(())
+    current_subplot.text(xx0.max() - .3, xx1.min() + .3, ('%.2f' % score),
+                         size=15, horizontalalignment='left')