2025-02-15 12:11:09 +04:00

250 KiB
Raw Blame History

Начало лабы №7

Загрузка даанных

In [15]:
import pandas as pd
df = pd.read_csv("..//static//csv//FINAL_USO.csv")
print(df.columns)
display(df.head())
Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
       'SP_open', 'SP_high', 'SP_low', 'SP_close', 'SP_Ajclose', 'SP_volume',
       'DJ_open', 'DJ_high', 'DJ_low', 'DJ_close', 'DJ_Ajclose', 'DJ_volume',
       'EG_open', 'EG_high', 'EG_low', 'EG_close', 'EG_Ajclose', 'EG_volume',
       'EU_Price', 'EU_open', 'EU_high', 'EU_low', 'EU_Trend', 'OF_Price',
       'OF_Open', 'OF_High', 'OF_Low', 'OF_Volume', 'OF_Trend', 'OS_Price',
       'OS_Open', 'OS_High', 'OS_Low', 'OS_Trend', 'SF_Price', 'SF_Open',
       'SF_High', 'SF_Low', 'SF_Volume', 'SF_Trend', 'USB_Price', 'USB_Open',
       'USB_High', 'USB_Low', 'USB_Trend', 'PLT_Price', 'PLT_Open', 'PLT_High',
       'PLT_Low', 'PLT_Trend', 'PLD_Price', 'PLD_Open', 'PLD_High', 'PLD_Low',
       'PLD_Trend', 'RHO_PRICE', 'USDI_Price', 'USDI_Open', 'USDI_High',
       'USDI_Low', 'USDI_Volume', 'USDI_Trend', 'GDX_Open', 'GDX_High',
       'GDX_Low', 'GDX_Close', 'GDX_Adj Close', 'GDX_Volume', 'USO_Open',
       'USO_High', 'USO_Low', 'USO_Close', 'USO_Adj Close', 'USO_Volume'],
      dtype='object')
Date Open High Low Close Adj Close Volume SP_open SP_high SP_low ... GDX_Low GDX_Close GDX_Adj Close GDX_Volume USO_Open USO_High USO_Low USO_Close USO_Adj Close USO_Volume
0 2011-12-15 154.740005 154.949997 151.710007 152.330002 152.330002 21521900 123.029999 123.199997 121.989998 ... 51.570000 51.680000 48.973877 20605600 36.900002 36.939999 36.049999 36.130001 36.130001 12616700
1 2011-12-16 154.309998 155.369995 153.899994 155.229996 155.229996 18124300 122.230003 122.949997 121.300003 ... 52.040001 52.680000 49.921513 16285400 36.180000 36.500000 35.730000 36.270000 36.270000 12578800
2 2011-12-19 155.479996 155.860001 154.360001 154.869995 154.869995 12547200 122.059998 122.320000 120.029999 ... 51.029999 51.169998 48.490578 15120200 36.389999 36.450001 35.930000 36.200001 36.200001 7418200
3 2011-12-20 156.820007 157.429993 156.580002 156.979996 156.979996 9136300 122.180000 124.139999 120.370003 ... 52.369999 52.990002 50.215282 11644900 37.299999 37.610001 37.220001 37.560001 37.560001 10041600
4 2011-12-21 156.979996 157.529999 156.130005 157.160004 157.160004 11996100 123.930000 124.360001 122.750000 ... 52.419998 52.959999 50.186852 8724300 37.669998 38.240002 37.520000 38.110001 38.110001 10728000

5 rows × 81 columns

Создание лингвистических переменных

Входные переменные: OF_Price (цены на нефть) и SF_Price (цена на серебро) .
Выходная переменная: Adj Close (цена).

In [16]:
import numpy as np
from skfuzzy import control as ctrl


# Инициализация лингвистических переменных
oil_price = ctrl.Antecedent(np.arange(df['OF_Price'].min(), df['OF_Price'].max(), 10), "oil_price")
silver_price = ctrl.Antecedent(np.arange(df['SF_Price'].min(), df['SF_Price'].max(), 1000), "silver_price")
adj_close = ctrl.Consequent(np.arange(df['Adj Close'].min(), df['Adj Close'].max(), 10), "adj_close")

Формирование нечетких переменных для лингвистических переменных и их визуализация

In [ ]:
import skfuzzy as fuzz

oil_price['low'] = fuzz.zmf(oil_price.universe, 40, 50)
oil_price['average'] = fuzz.trapmf(oil_price.universe, [60, 70, 80, 90])
oil_price['high'] = fuzz.smf(oil_price.universe, 100, 120)

silver_price['low'] = fuzz.zmf(silver_price.universe, 35000, 45000)
silver_price['average'] = fuzz.trapmf(silver_price.universe, [35000, 45000, 50000, 60000])
silver_price['high'] = fuzz.smf(silver_price.universe, 50000, 60000)

adj_close['low'] = fuzz.zmf(adj_close.universe,110, 135)
adj_close['average'] = fuzz.trapmf(adj_close.universe, [135, 145, 155, 165])
adj_close['high'] = fuzz.smf(adj_close.universe, 160, 170)

oil_price.view()
silver_price.view()
adj_close.view()
c:\Users\Алина\AppData\Local\Programs\Python\Python312\Lib\site-packages\skfuzzy\control\fuzzyvariable.py:125: UserWarning: FigureCanvasAgg is non-interactive, and thus cannot be shown
  fig.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Формирование и визуализация базы нечетких правил

In [18]:
import numpy as np
import skfuzzy as fuzz
from skfuzzy import control as ctrl
# Нечеткие правила
rule1 = ctrl.Rule(silver_price["low"] & oil_price["low"], adj_close["low"])
rule2 = ctrl.Rule(silver_price["low"] & oil_price["average"], adj_close["low"])
rule3 = ctrl.Rule(silver_price["low"] & oil_price["high"], adj_close["low"])
rule4 = ctrl.Rule(silver_price["average"] & oil_price["low"], adj_close["low"])
rule5 = ctrl.Rule(silver_price["average"] & oil_price["average"], adj_close["low"])
rule6 = ctrl.Rule(silver_price["average"] & oil_price["high"], adj_close["low"])
rule7 = ctrl.Rule(silver_price["high"] & oil_price["low"], adj_close["average"])
rule8 = ctrl.Rule(silver_price["high"] & oil_price["average"], adj_close["high"])
rule9 = ctrl.Rule(silver_price["high"] & oil_price["high"], adj_close["high"])
rule1.view()
Out[18]:
(<Figure size 640x480 with 1 Axes>, <Axes: >)
No description has been provided for this image

Создание нечеткой системы и добавление нечетких правил в базу знаний нечеткой системы

In [19]:
price_ctrl = ctrl.ControlSystem(
    [
        rule1,
        rule2,
        rule3,
        rule4,
        rule5,
        rule6,
        rule7,
        rule8,
        rule9,
    ]
)

# Создание симулятора нечеткой системы
price_sim = ctrl.ControlSystemSimulation(price_ctrl)

Пример расчета выходной переменной adj_close на основе входных переменных silver_price и oil_price
Система также формирует подробный журнал выполнения процесса нечеткого логического вывода

In [20]:
price_sim.input['silver_price'] = 60000
price_sim.input['oil_price'] = 30
price_sim.compute()

price_sim.print_state()

price_sim.output["adj_close"]
=============
 Antecedents 
=============
Antecedent: silver_price            = 60000
  - low                             : 0.0
  - average                         : 0.014110000000000011
  - high                            : 0.99765774
Antecedent: oil_price               = 30
  - low                             : 1.0
  - average                         : 0.0
  - high                            : 0.0

=======
 Rules 
=======
RULE #0:
  IF silver_price[low] AND oil_price[low] THEN adj_close[low]
	AND aggregation function : fmin
	OR aggregation function  : fmax

  Aggregation (IF-clause):
  - silver_price[low]                                      : 0.0
  - oil_price[low]                                         : 1.0
                      silver_price[low] AND oil_price[low] = 0.0
  Activation (THEN-clause):
                                            adj_close[low] : 0.0

RULE #1:
  IF silver_price[low] AND oil_price[average] THEN adj_close[low]
	AND aggregation function : fmin
	OR aggregation function  : fmax

  Aggregation (IF-clause):
  - silver_price[low]                                      : 0.0
  - oil_price[average]                                     : 0.0
                  silver_price[low] AND oil_price[average] = 0.0
  Activation (THEN-clause):
                                            adj_close[low] : 0.0

RULE #2:
  IF silver_price[low] AND oil_price[high] THEN adj_close[low]
	AND aggregation function : fmin
	OR aggregation function  : fmax

  Aggregation (IF-clause):
  - silver_price[low]                                      : 0.0
  - oil_price[high]                                        : 0.0
                     silver_price[low] AND oil_price[high] = 0.0
  Activation (THEN-clause):
                                            adj_close[low] : 0.0

RULE #3:
  IF silver_price[average] AND oil_price[low] THEN adj_close[low]
	AND aggregation function : fmin
	OR aggregation function  : fmax

  Aggregation (IF-clause):
  - silver_price[average]                                  : 0.014110000000000011
  - oil_price[low]                                         : 1.0
                  silver_price[average] AND oil_price[low] = 0.014110000000000011
  Activation (THEN-clause):
                                            adj_close[low] : 0.014110000000000011

RULE #4:
  IF silver_price[average] AND oil_price[average] THEN adj_close[low]
	AND aggregation function : fmin
	OR aggregation function  : fmax

  Aggregation (IF-clause):
  - silver_price[average]                                  : 0.014110000000000011
  - oil_price[average]                                     : 0.0
              silver_price[average] AND oil_price[average] = 0.0
  Activation (THEN-clause):
                                            adj_close[low] : 0.0

RULE #5:
  IF silver_price[average] AND oil_price[high] THEN adj_close[low]
	AND aggregation function : fmin
	OR aggregation function  : fmax

  Aggregation (IF-clause):
  - silver_price[average]                                  : 0.014110000000000011
  - oil_price[high]                                        : 0.0
                 silver_price[average] AND oil_price[high] = 0.0
  Activation (THEN-clause):
                                            adj_close[low] : 0.0

RULE #6:
  IF silver_price[high] AND oil_price[low] THEN adj_close[average]
	AND aggregation function : fmin
	OR aggregation function  : fmax

  Aggregation (IF-clause):
  - silver_price[high]                                     : 0.99765774
  - oil_price[low]                                         : 1.0
                     silver_price[high] AND oil_price[low] = 0.99765774
  Activation (THEN-clause):
                                        adj_close[average] : 0.99765774

RULE #7:
  IF silver_price[high] AND oil_price[average] THEN adj_close[high]
	AND aggregation function : fmin
	OR aggregation function  : fmax

  Aggregation (IF-clause):
  - silver_price[high]                                     : 0.99765774
  - oil_price[average]                                     : 0.0
                 silver_price[high] AND oil_price[average] = 0.0
  Activation (THEN-clause):
                                           adj_close[high] : 0.0

RULE #8:
  IF silver_price[high] AND oil_price[high] THEN adj_close[high]
	AND aggregation function : fmin
	OR aggregation function  : fmax

  Aggregation (IF-clause):
  - silver_price[high]                                     : 0.99765774
  - oil_price[high]                                        : 0.0
                    silver_price[high] AND oil_price[high] = 0.0
  Activation (THEN-clause):
                                           adj_close[high] : 0.0


==============================
 Intermediaries and Conquests 
==============================
Consequent: adj_close                = 149.2413379761956
  low:
    Accumulate using accumulation_max : 0.014110000000000011
  average:
    Accumulate using accumulation_max : 0.99765774
  high:
    Accumulate using accumulation_max : 0.0

Out[20]:
np.float64(149.2413379761956)

Визуализация функции принадлежности для выходной переменной adj_close
Функция получена в процессе аккумуляции и используется для дефаззификации значения выходной переменной influx

In [21]:
adj_close.view(sim=price_sim)
No description has been provided for this image

Функция для автоматизации вычисления целевой переменной Y на основе вектора признаков X

In [22]:
def fuzzy_pred(row):
    price_sim.input["silver_price"] = row["SF_Price"]
    price_sim.input["oil_price"] = row["OF_Price"]
    price_sim.compute()
    return price_sim.output["adj_close"]

Создадим выборки

In [23]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
df=pd.read_csv("..//static//csv//FINAL_USO.csv")
# Разделение данных на обучающую и временную выборки
train_df, temp_df = train_test_split(df, test_size=0.4, random_state=42)

# Разделение остатка на контрольную и тестовую выборки
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Проверка размеров выборок
print("Размер обучающей выборки:", len(train_df))
print("Размер контрольной выборки:", len(val_df))
print("Размер тестовой выборки:", len(test_df))

# Сохранение выборок в файлы
train_df.to_csv("..//static//csv//train_data.csv", index=False)
val_df.to_csv("..//static//csv//val_data.csv", index=False)
test_df.to_csv("..//static//csv//test_data.csv", index=False)
Размер обучающей выборки: 1030
Размер контрольной выборки: 344
Размер тестовой выборки: 344

Тестирование нечеткой системы на обучающей выборке

In [24]:
import pandas as pd
train_df = pd.read_csv("..//static//csv//train_data.csv")

result_train = train_df.copy()


result_train["Adj_Pred"] = result_train.apply(fuzzy_pred, axis=1)
selected_cm=result_train[['Adj Close','Adj_Pred']]
selected_cm.head(15)
Out[24]:
Adj Close Adj_Pred
0 168.000000 130.562212
1 112.570000 114.718604
2 152.619995 128.227957
3 114.099998 113.722111
4 122.370003 114.119637
5 110.739998 118.652335
6 120.339996 114.292079
7 108.529999 117.657836
8 155.990005 119.316656
9 152.619995 122.122485
10 114.690002 113.144200
11 116.720001 119.858747
12 133.919998 117.724467
13 124.389999 114.499433
14 124.230003 117.682681

Тестирование нечеткой системы на тестовой выборке

In [25]:
import pandas as pd
test_df=pd.read_csv("..//static//csv//test_data.csv")
result_test = test_df.copy()

result_test["Adj_Pred"] = result_test.apply(fuzzy_pred, axis=1)

selected_cm=result_test[['Adj Close','Adj_Pred']]
selected_cm.head(25)
Out[25]:
Adj Close Adj_Pred
0 160.539993 126.992652
1 121.610001 120.454951
2 126.160004 116.328038
3 160.990005 137.572396
4 173.610001 166.750349
5 118.970001 115.560821
6 126.680000 116.500640
7 117.519997 114.245149
8 126.730003 113.759291
9 120.309998 114.218952
10 114.419998 113.384163
11 124.540001 113.769799
12 115.430000 117.661916
13 118.220001 113.783256
14 121.050003 120.408936
15 106.220001 115.897552
16 109.139999 118.876038
17 112.239998 114.777997
18 122.879997 116.584875
19 117.290001 119.730281
20 127.400002 114.648531
21 171.020004 134.490239
22 118.120003 115.509442
23 119.430000 115.422122
24 115.800003 113.634607

Тестирование нечёткой системы на контрольной выборке

In [26]:
import pandas as pd
val_df=pd.read_csv("..//static//csv//val_data.csv")
result_val = val_df.copy()

result_val["Adj_Pred"] = result_val.apply(fuzzy_pred, axis=1)

selected_cm=result_val[['Adj Close','Adj_Pred']]
selected_cm.head(25)
Out[26]:
Adj Close Adj_Pred
0 117.589996 116.428542
1 121.650002 117.405395
2 166.339996 139.323579
3 116.309998 113.383973
4 115.199997 120.439259
5 126.940002 113.266077
6 127.480003 115.502123
7 120.779999 118.493430
8 151.619995 128.041918
9 118.290001 115.096439
10 122.860001 114.249403
11 118.360001 113.150825
12 123.320000 115.117051
13 120.650002 118.467985
14 161.509995 131.978154
15 120.589996 118.541233
16 120.959999 115.595202
17 115.989998 113.315718
18 120.989998 115.774836
19 168.789993 166.732904
20 114.290001 119.828262
21 114.209999 119.669656
22 115.050003 112.657356
23 118.860001 114.268250
24 120.050003 115.009975

Оценка результатов на основе метрик для задачи регрессии

In [27]:
import math
from sklearn import metrics


rmetrics = {}
rmetrics["RMSE_train"] = math.sqrt(
    metrics.mean_squared_error(result_train["Adj Close"], result_train["Adj_Pred"])
)
rmetrics["RMSE_test"] = math.sqrt(
    metrics.mean_squared_error(result_test["Adj Close"], result_test["Adj_Pred"])
)
rmetrics["RMAE_test"] = math.sqrt(
    metrics.mean_absolute_error(result_test["Adj Close"], result_test["Adj_Pred"])
)
rmetrics["R2_test"] = metrics.r2_score(
    result_test["Adj Close"], result_test["Adj_Pred"]
)

rmetrics
Out[27]:
{'RMSE_train': 14.063806361503557,
 'RMSE_test': 13.63382917388284,
 'RMAE_test': 3.152710529049624,
 'R2_test': 0.35443568923237845}