2023-11-03 21:58:33 +04:00
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import matplotlib
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
import sns
|
|
|
|
|
from sklearn import metrics
|
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
|
from sklearn.linear_model import LinearRegression
|
|
|
|
|
|
2023-11-03 22:14:32 +04:00
|
|
|
|
# INCH = 25.4
|
2023-11-03 21:58:33 +04:00
|
|
|
|
|
|
|
|
|
def create_plot_jpg(df: pd.DataFrame, nameFile):
|
|
|
|
|
# для сохранения диаграммы в конкретной папке
|
|
|
|
|
script_dir = os.path.dirname(__file__)
|
|
|
|
|
results_dir = os.path.join(script_dir, '../static/')
|
|
|
|
|
|
|
|
|
|
if not os.path.isdir(results_dir):
|
|
|
|
|
os.makedirs(results_dir)
|
|
|
|
|
|
|
|
|
|
# набор атрибутов - независимых переменных - площадь
|
|
|
|
|
_X = df["Store_Area"].array
|
|
|
|
|
|
|
|
|
|
# набор меток - зависимых переменных, значение которых требуется предсказать - выручка
|
|
|
|
|
_Y = df["Store_Sales"].array
|
|
|
|
|
|
|
|
|
|
# делим датафрейм на набор тренировочных данных и данных для тестов, test_size содержит определние соотношения этих наборов
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(_X, _Y, test_size=0.01, random_state=0)
|
|
|
|
|
|
|
|
|
|
regressor = LinearRegression()
|
|
|
|
|
|
|
|
|
|
X_train = X_train.reshape(-1, 1)
|
|
|
|
|
X_test = X_test.reshape(-1, 1)
|
|
|
|
|
|
|
|
|
|
regressor.fit(X_train, y_train)
|
|
|
|
|
|
|
|
|
|
# массив numpy, который содержит все предсказанные значения для входных значений в серии X_test
|
|
|
|
|
y_pred = regressor.predict(X_test)
|
|
|
|
|
|
|
|
|
|
df.plot(x='Store_Sales', y='Store_Area', style='o')
|
|
|
|
|
|
2023-11-03 22:07:27 +04:00
|
|
|
|
plt.title('Зависимость продаж от площади магазина')
|
|
|
|
|
plt.xlabel('Продажи')
|
|
|
|
|
plt.ylabel('Площадь')
|
2023-11-03 21:58:33 +04:00
|
|
|
|
|
|
|
|
|
plt.savefig(results_dir + nameFile + '.jpg')
|
|
|
|
|
plt.close()
|
|
|
|
|
|
2023-11-03 22:07:27 +04:00
|
|
|
|
# MAE – это среднее абсолютное значение ошибок
|
|
|
|
|
# MSE – это среднее значение квадратов ошибок
|
|
|
|
|
# RMSE – это квадратный корень из среднего квадрата ошибок
|
|
|
|
|
|
|
|
|
|
listMessages = ['Средняя абсолютная ошибка (MAE): ' + str(metrics.mean_absolute_error(y_test, y_pred)),
|
|
|
|
|
'Среднеквадратичная ошибка (MSE): ' + str(metrics.mean_squared_error(y_test, y_pred)),
|
|
|
|
|
'Среднеквадратичная ошибка (RMSE): ' + str(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))]
|
2023-11-03 21:58:33 +04:00
|
|
|
|
|
|
|
|
|
return listMessages
|
|
|
|
|
|
|
|
|
|
# def graph_regression_plot_sns(
|
|
|
|
|
# X, Y,
|
|
|
|
|
# regression_model,
|
|
|
|
|
# Xmin=None, Xmax=None,
|
|
|
|
|
# Ymin=None, Ymax=None,
|
|
|
|
|
# display_residuals=False,
|
|
|
|
|
# title_figure=None, title_figure_fontsize=None,
|
|
|
|
|
# title_axes=None, title_axes_fontsize=None,
|
|
|
|
|
# x_label=None,
|
|
|
|
|
# y_label=None,
|
|
|
|
|
# label_fontsize=None, tick_fontsize=12,
|
|
|
|
|
# label_legend_regr_model='', label_legend_fontsize=12,
|
|
|
|
|
# s=50, linewidth_regr_model=2,
|
|
|
|
|
# graph_size=None,
|
|
|
|
|
# file_name=None):
|
|
|
|
|
# X = np.array(X)
|
|
|
|
|
# Y = np.array(Y)
|
|
|
|
|
# Ycalc = Y - regression_model(X)
|
|
|
|
|
#
|
|
|
|
|
# if not (Xmin) and not (Xmax):
|
|
|
|
|
# Xmin = min(X) * 0.99
|
|
|
|
|
# Xmax = max(X) * 1.01
|
|
|
|
|
# if not (Ymin) and not (Ymax):
|
|
|
|
|
# Ymin = min(Y) * 0.99
|
|
|
|
|
# Ymax = max(Y) * 1.01
|
|
|
|
|
#
|
|
|
|
|
# # график с остатками
|
|
|
|
|
# # ------------------
|
|
|
|
|
# if display_residuals:
|
|
|
|
|
# if not (graph_size):
|
|
|
|
|
# graph_size = (297 / INCH, 420 / INCH / 1.5)
|
|
|
|
|
# if not (title_figure_fontsize):
|
|
|
|
|
# title_figure_fontsize = 18
|
|
|
|
|
# if not (title_axes_fontsize):
|
|
|
|
|
# title_axes_fontsize = 16
|
|
|
|
|
# if not (label_fontsize):
|
|
|
|
|
# label_fontsize = 13
|
|
|
|
|
# if not (label_legend_fontsize):
|
|
|
|
|
# label_legend_fontsize = 12
|
|
|
|
|
# fig = plt.figure(figsize=graph_size)
|
|
|
|
|
# fig.suptitle(title_figure, fontsize=title_figure_fontsize)
|
|
|
|
|
# ax1 = plt.subplot(2, 1, 1)
|
|
|
|
|
# ax2 = plt.subplot(2, 1, 2)
|
|
|
|
|
#
|
|
|
|
|
# # фактические данные
|
|
|
|
|
# ax1.set_title(title_axes, fontsize=title_axes_fontsize)
|
|
|
|
|
# sns.scatterplot(
|
|
|
|
|
# x=X, y=Y,
|
|
|
|
|
# label='data',
|
|
|
|
|
# s=s,
|
|
|
|
|
# color='red',
|
|
|
|
|
# ax=ax1)
|
|
|
|
|
# ax1.set_xlim(Xmin, Xmax)
|
|
|
|
|
# ax1.set_ylim(Ymin, Ymax)
|
|
|
|
|
# ax1.axvline(x=0, color='k', linewidth=1)
|
|
|
|
|
# ax1.axhline(y=0, color='k', linewidth=1)
|
|
|
|
|
# # ax1.set_xlabel(x_label, fontsize = label_fontsize)
|
|
|
|
|
# ax1.set_ylabel(y_label, fontsize=label_fontsize)
|
|
|
|
|
# ax1.tick_params(labelsize=tick_fontsize)
|
|
|
|
|
#
|
|
|
|
|
# # график регрессионной модели
|
|
|
|
|
# nx = 100
|
|
|
|
|
# hx = (Xmax - Xmin) / (nx - 1)
|
|
|
|
|
# x1 = np.linspace(Xmin, Xmax, nx)
|
|
|
|
|
# y1 = regression_model(x1)
|
|
|
|
|
# sns.lineplot(
|
|
|
|
|
# x=x1, y=y1,
|
|
|
|
|
# color='blue',
|
|
|
|
|
# linewidth=linewidth_regr_model,
|
|
|
|
|
# legend=True,
|
|
|
|
|
# label=label_legend_regr_model,
|
|
|
|
|
# ax=ax1)
|
|
|
|
|
# ax1.legend(prop={'size': label_legend_fontsize})
|
|
|
|
|
#
|
|
|
|
|
# # график остатков
|
|
|
|
|
# ax2.set_title('Residuals', fontsize=title_axes_fontsize)
|
|
|
|
|
# ax2.set_xlim(Xmin, Xmax)
|
|
|
|
|
# # ax2.set_ylim(Ymin, Ymax)
|
|
|
|
|
# sns.scatterplot(
|
|
|
|
|
# x=X, y=Ycalc,
|
|
|
|
|
# # label='фактические данные',
|
|
|
|
|
# s=s,
|
|
|
|
|
# color='orange',
|
|
|
|
|
# ax=ax2)
|
|
|
|
|
#
|
|
|
|
|
# ax2.axvline(x=0, color='k', linewidth=1)
|
|
|
|
|
# ax2.axhline(y=0, color='k', linewidth=1)
|
|
|
|
|
# ax2.set_xlabel(x_label, fontsize=label_fontsize)
|
|
|
|
|
# ax2.set_ylabel(r'$ΔY = Y - Y_{calc}$', fontsize=label_fontsize)
|
|
|
|
|
# ax2.tick_params(labelsize=tick_fontsize)
|
|
|
|
|
#
|
|
|
|
|
# # график без остатков
|
|
|
|
|
# # -------------------
|
|
|
|
|
# else:
|
|
|
|
|
# if not (graph_size):
|
|
|
|
|
# graph_size = (297 / INCH, 210 / INCH)
|
|
|
|
|
# if not (title_figure_fontsize):
|
|
|
|
|
# title_figure_fontsize = 18
|
|
|
|
|
# if not (title_axes_fontsize):
|
|
|
|
|
# title_axes_fontsize = 16
|
|
|
|
|
# if not (label_fontsize):
|
|
|
|
|
# label_fontsize = 14
|
|
|
|
|
# if not (label_legend_fontsize):
|
|
|
|
|
# label_legend_fontsize = 12
|
|
|
|
|
# fig, axes = plt.subplots(figsize=graph_size)
|
|
|
|
|
# fig.suptitle(title_figure, fontsize=title_figure_fontsize)
|
|
|
|
|
# axes.set_title(title_axes, fontsize=title_axes_fontsize)
|
|
|
|
|
#
|
|
|
|
|
# # фактические данные
|
|
|
|
|
# sns.scatterplot(
|
|
|
|
|
# x=X, y=Y,
|
|
|
|
|
# label='фактические данные',
|
|
|
|
|
# s=s,
|
|
|
|
|
# color='red',
|
|
|
|
|
# ax=axes)
|
|
|
|
|
#
|
|
|
|
|
# # график регрессионной модели
|
|
|
|
|
# nx = 100
|
|
|
|
|
# hx = (Xmax - Xmin) / (nx - 1)
|
|
|
|
|
# x1 = np.linspace(Xmin, Xmax, nx)
|
|
|
|
|
# y1 = regression_model(x1)
|
|
|
|
|
# sns.lineplot(
|
|
|
|
|
# x=x1, y=y1,
|
|
|
|
|
# color='blue',
|
|
|
|
|
# linewidth=linewidth_regr_model,
|
|
|
|
|
# legend=True,
|
|
|
|
|
# label=label_legend_regr_model,
|
|
|
|
|
# ax=axes)
|
|
|
|
|
#
|
|
|
|
|
# axes.set_xlim(Xmin, Xmax)
|
|
|
|
|
# axes.set_ylim(Ymin, Ymax)
|
|
|
|
|
# axes.axvline(x=0, color='k', linewidth=1)
|
|
|
|
|
# axes.axhline(y=0, color='k', linewidth=1)
|
|
|
|
|
# axes.set_xlabel(x_label, fontsize=label_fontsize)
|
|
|
|
|
# axes.set_ylabel(y_label, fontsize=label_fontsize)
|
|
|
|
|
# axes.tick_params(labelsize=tick_fontsize)
|
|
|
|
|
# axes.legend(prop={'size': label_legend_fontsize})
|
|
|
|
|
#
|
|
|
|
|
# plt.show()
|
|
|
|
|
# if file_name:
|
|
|
|
|
# fig.savefig(file_name, orientation="portrait", dpi=300)
|
|
|
|
|
#
|
|
|
|
|
# return
|