196 KiB
196 KiB
Начало лабораторной¶
Выгрузка данных из csv файла в датафрейм
Импорт библиотек
In [102]:
import pandas as pd
import numpy as np
import skfuzzy as fuzz
import matplotlib.pyplot as plt
from skfuzzy import control as ctrl
from sklearn import metrics
import math
Загрузка данных
In [94]:
file_path = "./static/csv/ds_salaries.csv"
df = pd.read_csv(file_path)
print(df.columns)
df
Out[94]:
Определение лингвистических переменных
In [95]:
experience_mapping = {"EN": 1, "MI": 2, "SE": 3, "EX": 4}
df["experience_level"] = df["experience_level"].map(experience_mapping)
experience = ctrl.Antecedent(np.arange(df["experience_level"].min(), df["experience_level"].max() + 1, 1), "experience")
remote_ratio = ctrl.Antecedent(np.arange(df["remote_ratio"].min(), df["remote_ratio"].max() + 1, 1), "remote_ratio")
salary = ctrl.Consequent(np.arange(df["salary_in_usd"].min(), df["salary_in_usd"].max() + 1, 1), "salary")
Настройка лингвистических переменных
In [96]:
experience["junior"] = fuzz.zmf(experience.universe, df["experience_level"].min(), df["experience_level"].quantile(0.33))
experience["mid"] = fuzz.trapmf(experience.universe, [df["experience_level"].min(), df["experience_level"].quantile(0.33), df["experience_level"].quantile(0.66), df["experience_level"].max()])
experience["senior"] = fuzz.smf(experience.universe, df["experience_level"].quantile(0.66), df["experience_level"].max())
experience.view()
remote_ratio["low"] = fuzz.zmf(remote_ratio.universe, df["remote_ratio"].min(), df["remote_ratio"].quantile(0.33))
remote_ratio["medium"] = fuzz.trapmf(remote_ratio.universe, [df["remote_ratio"].min(), df["remote_ratio"].quantile(0.33), df["remote_ratio"].quantile(0.66), df["remote_ratio"].max()])
remote_ratio["high"] = fuzz.smf(remote_ratio.universe, df["remote_ratio"].quantile(0.66), df["remote_ratio"].max())
remote_ratio.view()
salary["low"] = fuzz.zmf(salary.universe, df["salary_in_usd"].min(), df["salary_in_usd"].quantile(0.33))
salary["medium"] = fuzz.trapmf(salary.universe, [df["salary_in_usd"].min(), df["salary_in_usd"].quantile(0.33), df["salary_in_usd"].quantile(0.66), df["salary_in_usd"].max()])
salary["high"] = fuzz.smf(salary.universe, df["salary_in_usd"].quantile(0.66), df["salary_in_usd"].max())
salary.view()
Создание базы нечетких правил
In [109]:
rule1 = ctrl.Rule(experience["junior"] & remote_ratio["low"], salary["low"])
rule2 = ctrl.Rule(experience["junior"] & remote_ratio["medium"], salary["medium"])
rule3 = ctrl.Rule(experience["junior"] & remote_ratio["high"], salary["medium"])
rule4 = ctrl.Rule(experience["mid"] & remote_ratio["low"], salary["medium"])
rule5 = ctrl.Rule(experience["mid"] & remote_ratio["medium"], salary["medium"])
rule6 = ctrl.Rule(experience["mid"] & remote_ratio["high"], salary["high"])
rule7 = ctrl.Rule(experience["senior"] & remote_ratio["low"], salary["low"])
rule8 = ctrl.Rule(experience["senior"] & remote_ratio["medium"], salary["medium"])
rule9 = ctrl.Rule(experience["senior"] & remote_ratio["high"], salary["high"])
#rule1.view()
print(rule1)
Создание нечеткой системы
In [108]:
salary_ctrl = ctrl.ControlSystem([
rule1,
rule2,
rule3,
rule4,
rule5,
rule6,
rule7,
rule8,
rule9,
])
salary_simulation = ctrl.ControlSystemSimulation(salary_ctrl)
for rule in salary_ctrl.rules:
print(rule)
#salary_ctrl.view()
Проверка расчета выходной переменной
In [101]:
salary_simulation.input["experience"] = 3
salary_simulation.input["remote_ratio"] = 50
salary_simulation.compute()
salary_simulation.print_state()
print(salary_simulation.output["salary"])
salary.view(sim=salary_simulation)
Оценка качества предсказаний
In [103]:
def fuzzy_pred(row):
salary_simulation.input["experience"] = row["experience_level"]
salary_simulation.input["remote_ratio"] = row["remote_ratio"]
salary_simulation.compute()
return salary_simulation.output["salary"]
result = df.copy()
result["salary_pred"] = result.apply(fuzzy_pred, axis=1)
print(result.loc[115:130, ["experience_level", "remote_ratio", "salary_in_usd", "salary_pred"]])
rmetrics = {}
rmetrics["RMSE"] = math.sqrt(metrics.mean_squared_error(result["salary_in_usd"], result["salary_pred"]))
rmetrics["RMAE"] = math.sqrt(metrics.mean_absolute_error(result["salary_in_usd"], result["salary_pred"]))
rmetrics["R2"] = metrics.r2_score(result["salary_in_usd"], result["salary_pred"])
print(rmetrics)
Вывод: данные прогнозируются с низкой точностью и допускается множество ошибок