206 KiB
206 KiB
загрузка данных¶
In [5]:
import pandas as pd
df = pd.read_csv("C://Users//annal//aim//static//csv//Forbes_Billionaires.csv")
df
Out[5]:
создание лингвистических переменных¶
In [6]:
import skfuzzy as fuzz
from skfuzzy import control as ctrl
age_cat = ctrl.Antecedent(df["Age"].sort_values(), "age_cat")
networth = ctrl.Antecedent(df["Networth"].sort_values(), "networth")
rank = ctrl.Consequent(df["Rank "].sort_values(), "rank")
формирование нечетких переменных для лингвистических переменных и их визуализация¶¶
In [7]:
age_cat.automf(3, variable_type= "quant")
age_cat.view()
networth.automf(3, variable_type="quant")
networth.view()
rank.automf(5, variable_type="quant")
rank.view()
формирование и визуализация нечётких правил¶
In [ ]:
import skfuzzy as fuzz
from skfuzzy import control as ctrl
rule1 = ctrl.Rule(age_cat['low'] & networth['low'], rank['lower'])
rule2 = ctrl.Rule(age_cat['low'] & networth['average'], rank['low'])
rule3 = ctrl.Rule(age_cat['low'] & networth['high'], rank['average'])
rule4 = ctrl.Rule(age_cat['average'] & networth['low'], rank['lower'])
rule5 = ctrl.Rule(age_cat['average'] & networth['average'], rank['average'])
rule6 = ctrl.Rule(age_cat['average'] & networth['high'], rank['high'])
rule7 = ctrl.Rule(age_cat['high'] & networth['low'], rank['average'])
rule8 = ctrl.Rule(age_cat['high'] & networth['average'], rank['high'])
rule9 = ctrl.Rule(age_cat['high'] & networth['high'], rank['higher'])
rule1.view()
Out[ ]:
создание нечеткой системы и добавление нечетких правил в базу знаний нечеткой системы¶
In [19]:
rank_ctrl = ctrl.ControlSystem(
[
rule1,
rule2,
rule3,
rule4,
rule5,
rule6,
rule7,
rule8,
rule9,
]
)
ranks = ctrl.ControlSystemSimulation(rank_ctrl)
пример расчёта rank на основе age_cat и networth¶
In [21]:
ranks.input['age_cat'] = 50
ranks.input['networth'] = 219
ranks.compute()
ranks.print_state()
print(ranks.output['rank'])
тестирование нечёткой системы¶
In [22]:
def fuzzy_pred(row):
ranks.input['age_cat'] = row['Age']
ranks.input['networth'] = row['Networth']
ranks.compute()
return ranks.output['rank']
res = df[['Age', 'Networth', 'Rank ']].head(100)
res['Pred'] = res.apply(fuzzy_pred, axis=1)
res.head(15)
Out[22]:
оценка результатов (метрики для задачи регрессии)¶
In [23]:
import math
from sklearn import metrics
rmetrics = {}
rmetrics["RMSE"] = math.sqrt(metrics.mean_squared_error(res['Rank '], res['Pred']))
rmetrics["RMAE"] = math.sqrt(metrics.mean_absolute_error(res['Rank '], res['Pred']))
rmetrics["R2"] = metrics.r2_score(res['Rank '], res['Pred'])
rmetrics
Out[23]:
p.s. менять датасет нет смысла. все подобные датасеты имеют одинаковые колонки. какой не возьми, резульат не хороший