Теперь расчёты без библы.

2023-11-23 00:25:01 +04:00 · 2023-11-23 00:25:01 +04:00 · 5d6a44a23b
commit 5d6a44a23b
parent 1ce9ff58ea
2 changed files with 41 additions and 178 deletions
--- a/LabWork01/LabWork5/create_plot.py
+++ b/LabWork01/LabWork5/create_plot.py
@ -1,13 +1,9 @@
 import os
-
 import numpy as np
 import pandas as pd
-import matplotlib
 import matplotlib.pyplot as plt
-import sns
-from sklearn import metrics
-from sklearn.model_selection import train_test_split
-from sklearn.linear_model import LinearRegression
+from sklearn.metrics import r2_score
+

 # INCH = 25.4

@ -20,183 +16,57 @@ def create_plot_jpg(df: pd.DataFrame, nameFile):
        os.makedirs(results_dir)

    # набор атрибутов - независимых переменных - площадь
-    _X = df["Store_Area"].array
+    X = df["Store_Area"].array

    # набор меток - зависимых переменных, значение которых требуется предсказать - выручка
-    _Y = df["Store_Sales"].array
+    Y = df["Store_Sales"].array
+
+    n = df.shape[0]

    # делим датафрейм на набор тренировочных данных и данных для тестов, test_size содержит определние соотношения этих наборов
-    X_train, X_test, y_train, y_test = train_test_split(_X, _Y, test_size=0.01, random_state=0)
+    n_test = int(n * 0.01)
+    n_train = n - n_test
+    X_train, Y_train = X[:n_train], Y[:n_train]
+    X_test, Y_test = X[n_train:], Y[n_train:]

-    regressor = LinearRegression()
+    sumY_train = sum(Y_train)
+    sumX_train = sum(X_train)

-    X_train = X_train.reshape(-1, 1)
-    X_test = X_test.reshape(-1, 1)
+    sumXY_train = sum(X_train * Y_train)
+    sumXX_train = sum(X_train * X_train)

-    regressor.fit(X_train, y_train)
+    b1 = (sumXY_train - (sumY_train * sumX_train) / n_train) / (sumXX_train - sumX_train * sumX_train / n_train)
+    b0 = (sumY_train - b1 * sumX_train) / n_train

-    # массив numpy, который содержит все предсказанные значения для входных значений в серии X_test
-    y_pred = regressor.predict(X_test)
+    # Построение модели на обучающем наборе
+    plt.scatter(X_train, Y_train, alpha=0.8)
+    plt.axline(xy1=(0, b0), slope=b1, color='r', label=f'$y = {b1:.5f}x {b0:+.5f}$')

-    df.plot(x='Store_Sales', y='Store_Area', style='o')
-
-    plt.title('Зависимость продаж от площади магазина')
-    plt.xlabel('Продажи')
-    plt.ylabel('Площадь')
+    # Оценка производительности модели на тестовом наборе
+    Y_pred = b0 + b1 * X_test
+    first_half = sum((Y_pred - Y_test.mean()) ** 2)
+    second_half = sum((Y_test - Y_pred) ** 2) + first_half

+    plt.scatter(X_test, Y_test, alpha=0.8, color='g')
+    plt.legend()
    plt.savefig(results_dir + nameFile + '.jpg')
-    plt.close()

-    # MAE – это среднее абсолютное значение ошибок
-    # MSE – это среднее значение квадратов ошибок
-    # RMSE – это квадратный корень из среднего квадрата ошибок
-
-    listMessages = ['Средняя абсолютная ошибка (MAE): ' + str(metrics.mean_absolute_error(y_test, y_pred)),
-                    'Среднеквадратичная ошибка (MSE): ' + str(metrics.mean_squared_error(y_test, y_pred)),
-                    'Среднеквадратичная ошибка (RMSE): ' + str(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))]
+    r2 = r_squared(Y_test, Y_pred)
+    listMessages = [f"Коэффициент по странной формуле (по википедии): {first_half/second_half}",
+                    f"Истинный коэффициент (по википедии): {r2}",
+                    f"Подсчёт по библиотеке: {r2_score(Y_test, Y_pred)}"]

    return listMessages

-# def graph_regression_plot_sns(
-#         X, Y,
-#         regression_model,
-#         Xmin=None, Xmax=None,
-#         Ymin=None, Ymax=None,
-#         display_residuals=False,
-#         title_figure=None, title_figure_fontsize=None,
-#         title_axes=None, title_axes_fontsize=None,
-#         x_label=None,
-#         y_label=None,
-#         label_fontsize=None, tick_fontsize=12,
-#         label_legend_regr_model='', label_legend_fontsize=12,
-#         s=50, linewidth_regr_model=2,
-#         graph_size=None,
-#         file_name=None):
-#     X = np.array(X)
-#     Y = np.array(Y)
-#     Ycalc = Y - regression_model(X)
-#
-#     if not (Xmin) and not (Xmax):
-#         Xmin = min(X) * 0.99
-#         Xmax = max(X) * 1.01
-#     if not (Ymin) and not (Ymax):
-#         Ymin = min(Y) * 0.99
-#         Ymax = max(Y) * 1.01
-#
-#         # график с остатками
-#     # ------------------
-#     if display_residuals:
-#         if not (graph_size):
-#             graph_size = (297 / INCH, 420 / INCH / 1.5)
-#         if not (title_figure_fontsize):
-#             title_figure_fontsize = 18
-#         if not (title_axes_fontsize):
-#             title_axes_fontsize = 16
-#         if not (label_fontsize):
-#             label_fontsize = 13
-#         if not (label_legend_fontsize):
-#             label_legend_fontsize = 12
-#         fig = plt.figure(figsize=graph_size)
-#         fig.suptitle(title_figure, fontsize=title_figure_fontsize)
-#         ax1 = plt.subplot(2, 1, 1)
-#         ax2 = plt.subplot(2, 1, 2)
-#
-#         # фактические данные
-#         ax1.set_title(title_axes, fontsize=title_axes_fontsize)
-#         sns.scatterplot(
-#             x=X, y=Y,
-#             label='data',
-#             s=s,
-#             color='red',
-#             ax=ax1)
-#         ax1.set_xlim(Xmin, Xmax)
-#         ax1.set_ylim(Ymin, Ymax)
-#         ax1.axvline(x=0, color='k', linewidth=1)
-#         ax1.axhline(y=0, color='k', linewidth=1)
-#         # ax1.set_xlabel(x_label, fontsize = label_fontsize)
-#         ax1.set_ylabel(y_label, fontsize=label_fontsize)
-#         ax1.tick_params(labelsize=tick_fontsize)
-#
-#         # график регрессионной модели
-#         nx = 100
-#         hx = (Xmax - Xmin) / (nx - 1)
-#         x1 = np.linspace(Xmin, Xmax, nx)
-#         y1 = regression_model(x1)
-#         sns.lineplot(
-#             x=x1, y=y1,
-#             color='blue',
-#             linewidth=linewidth_regr_model,
-#             legend=True,
-#             label=label_legend_regr_model,
-#             ax=ax1)
-#         ax1.legend(prop={'size': label_legend_fontsize})
-#
-#         # график остатков
-#         ax2.set_title('Residuals', fontsize=title_axes_fontsize)
-#         ax2.set_xlim(Xmin, Xmax)
-#         # ax2.set_ylim(Ymin, Ymax)
-#         sns.scatterplot(
-#             x=X, y=Ycalc,
-#             # label='фактические данные',
-#             s=s,
-#             color='orange',
-#             ax=ax2)
-#
-#         ax2.axvline(x=0, color='k', linewidth=1)
-#         ax2.axhline(y=0, color='k', linewidth=1)
-#         ax2.set_xlabel(x_label, fontsize=label_fontsize)
-#         ax2.set_ylabel(r'$ΔY = Y - Y_{calc}$', fontsize=label_fontsize)
-#         ax2.tick_params(labelsize=tick_fontsize)
-#
-#     # график без остатков
-#     # -------------------
-#     else:
-#         if not (graph_size):
-#             graph_size = (297 / INCH, 210 / INCH)
-#         if not (title_figure_fontsize):
-#             title_figure_fontsize = 18
-#         if not (title_axes_fontsize):
-#             title_axes_fontsize = 16
-#         if not (label_fontsize):
-#             label_fontsize = 14
-#         if not (label_legend_fontsize):
-#             label_legend_fontsize = 12
-#         fig, axes = plt.subplots(figsize=graph_size)
-#         fig.suptitle(title_figure, fontsize=title_figure_fontsize)
-#         axes.set_title(title_axes, fontsize=title_axes_fontsize)
-#
-#         # фактические данные
-#         sns.scatterplot(
-#             x=X, y=Y,
-#             label='фактические данные',
-#             s=s,
-#             color='red',
-#             ax=axes)
-#
-#         # график регрессионной модели
-#         nx = 100
-#         hx = (Xmax - Xmin) / (nx - 1)
-#         x1 = np.linspace(Xmin, Xmax, nx)
-#         y1 = regression_model(x1)
-#         sns.lineplot(
-#             x=x1, y=y1,
-#             color='blue',
-#             linewidth=linewidth_regr_model,
-#             legend=True,
-#             label=label_legend_regr_model,
-#             ax=axes)
-#
-#         axes.set_xlim(Xmin, Xmax)
-#         axes.set_ylim(Ymin, Ymax)
-#         axes.axvline(x=0, color='k', linewidth=1)
-#         axes.axhline(y=0, color='k', linewidth=1)
-#         axes.set_xlabel(x_label, fontsize=label_fontsize)
-#         axes.set_ylabel(y_label, fontsize=label_fontsize)
-#         axes.tick_params(labelsize=tick_fontsize)
-#         axes.legend(prop={'size': label_legend_fontsize})
-#
-#     plt.show()
-#     if file_name:
-#         fig.savefig(file_name, orientation="portrait", dpi=300)
-#
-#     return
+def r_squared(y_true, y_pred):
+    # Вычисляем среднее значение целевой переменной
+    mean_y_true = np.mean(y_true)
+
+    # Вычисляем сумму квадратов отклонений от среднего
+    ss_total = np.sum((y_true - mean_y_true) ** 2)
+
+    # Вычисляем сумму квадратов остатков
+    ss_residual = np.sum((y_true - y_pred) ** 2)
+
+    # Вычисляем коэффициент детерминации
+    return 1 - (ss_residual / ss_total)
--- a/LabWork01/LoadDB.py
+++ b/LabWork01/LoadDB.py
@ -189,13 +189,6 @@ def get_page_showFindURL():
 # 5-я лабораторная
@app.route('/createPlotImage',  methods=['GET', 'POST'])
 def get_plot_image():
-
-    # 99%
-    # main_df = listShops.loc[listShops['Store_ID'] <= listShops.shape[0]*0.9]
-
-    # 1%
-    # support_df = listShops.loc[listShops['Store_ID'] > listShops.shape[0]*0.9]
-
    messages = create_plot_jpg(listShops, "myPlot")

    myPlotJpg = ['myPlot.jpg']