2025-02-08 13:32:07 +04:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Начало лабораторной\n",
"\n",
"Выгрузка данных из csv файла в датафрейм"
]
},
{
"cell_type": "code",
2025-02-15 09:27:14 +04:00
"execution_count": 3,
2025-02-08 13:32:07 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['work_year', 'experience_level', 'employment_type', 'job_title',\n",
" 'salary', 'salary_currency', 'salary_in_usd', 'employee_residence',\n",
" 'remote_ratio', 'company_location', 'company_size'],\n",
" dtype='object')\n"
]
}
],
"source": [
"import pandas as pd\n",
"df = pd.read_csv(\"..//static//csv//ds_salaries.csv\")\n",
"print(df.columns)"
]
},
{
"cell_type": "code",
2025-02-15 10:32:36 +04:00
"execution_count": 10,
2025-02-08 13:32:07 +04:00
"metadata": {},
"outputs": [
{
"data": {
2025-02-15 10:32:36 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHHCAYAAABDUnkqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADBm0lEQVR4nOydd3gT9R/HX0mb7g2UFiiUUjayUfZQ9lRRhiBDRX8oAuJAVJa4RUQFQXCgAgoiMhSZskSULQoUqLTslg5Kd5M29/vjuNDQQdOmuST9vp6nTy6XG5/k0uSdz9RIkiQhEAgEAoFA4CRo1TZAIBAIBAKBwJoIcSMQCAQCgcCpEOJGIBAIBAKBUyHEjUAgEAgEAqdCiBuBQCAQCAROhRA3AoFAIBAInAohbgQCgUAgEDgVQtwIBAKBQCBwKoS4EQgEAoFA4FQIcSMQCASCCoFGo2HWrFlqmyGwAULcCErNmDFj8PHxUdsMgUDggCxbtgyNRgPArl270Gg0xMbGmm2zceNGunTpQnBwMF5eXkRERDBkyBA2b96sgsVlY9asWYSHhwPmz11QPghxI7CIpKQkPv74Yx5++GF+/fVXsrKy6N27N9OmTePo0aNqmycQCJyEuXPnMnDgQDQaDdOmTePDDz9k8ODBnD17lu+//15t8wR2jqvaBggch++//55x48aRnp5OeHg4Op0OjUZDUlISH3zwAe+88w6jR49myZIluLm5qW2uQCBwUHJzc5kzZw49evRg69atBR6/du2aClaZk5GRgbe3t9pmCIpAeG4EJWLfvn2MHDmSkJAQ9u3bR0xMDN27d8fDw4ODBw9y5coVhg8fztdff81zzz0HgCRJhIeHM2jQoALHy87Oxt/fn6eeegq45ZZes2ZNgW19fHwYM2aM6b7i0s3vwj5x4gSBgYH079+f3Nxcs+0OHTpkdrzExMRCY++FrXv//ffRaDR07drVtE6xddeuXWbb9uvXr0QxfWX/ov4Ke6579uzhqaeeolKlSvj5+TFq1CiuX79udtzw8HCzfQF++OEHNBqNyR0OEBsbi0ajYe7cuQVsa9Kkidlz1ev1zJgxg1atWuHv74+3tzedOnVi586dhT43xd7b//KfPz9FvQa3hyeWL19Oq1at8PT0JCgoiGHDhnHx4kUzO7t06UJERAQJCQmm9bNmzSrg/p87dy6urq5s2rTJtK5r1640adKkgH1z584t1J5PP/2Uxo0b4+7uTrVq1XjmmWdISUkpsP9ff/1F3759CQwMxNvbm6ZNm/LRRx8Bcli3uPdB/vOW9NoWh3LdS/J6K69JYdsuW7bMbLtFixbRpEkTvLy8zLYr7H+5pCQmJpKamkqHDh0KfTw4ONi0bOl7ND/nz5/n6aefpn79+nh6elKpUiUefvjhAq+H8r7evXs3Tz/9NMHBwdSoUYOdO3ei0Wj46aefChx75cqVaDQa9u/fb9mTF1gF4bkRlIh33nkHo9HI999/T6tWrQo8XrlyZb755htOnjzJZ599xsyZMwkODmbkyJG89957JCcnExQUZNp+48aNpKamMnLkyDLbdvHiRXr37k2DBg1YvXo1rq7WeVunpKTw9ttvl2jbPXv2mH1ZloSJEyfSpk0bs3VPPPFEodtOmDCBgIAAZs2axenTp1m0aBHnz583CaXCyM3N5dVXX7XIpttJTU3l888/Z/jw4YwbN460tDS++OILevXqxYEDB2jevHmh+73++uvUrl0bgA8++KCAEMvPAw88wIMPPgjA3r17WbJkidnjb775JtOnT2fIkCE88cQTJCQk8Mknn9C5c2eOHj1KQEAAbm5urF27lrZt2/LAAw+wY8cO3N3dC5xr/fr1TJ06lfnz59O3b99SvSazZs1i9uzZdO/enfHjx5uux8GDB9m3bx86nQ6Abdu20b9/f0JDQ5k0aRIhISGcOnWKn3/+mUmTJvHUU0/RvXt303EfffRRs9cCoEqVKoXaUJZrO3z4cNNz37RpE999912R2zZo0MB0nsTERNMPF4VVq1bx9NNP07VrV5599lm8vb05deoUb731VqlsUwgODsbT05ONGzfy7LPPmn123E5p36MABw8e5I8//mDYsGHUqFGD2NhYFi1aRNeuXTl58iReXl5m2z/99NNUqVKFGTNmkJGRQdeuXQkLC2PFihU88MADZtuuWLGCOnXq0K5duzK9FoJSIgkEJSAoKEiqVauW2brRo0dL3t7eZuumT58uAdLGjRslSZKk06dPS4C0aNEis+0GDhwohYeHS0ajUZIkSdq5c6cESD/88EOBc3t7e0ujR4823f/qq68kQIqJiZGSk5OlRo0aSfXr15cSExPN9lO2O3jwoNn6hIQECZBmzpxptv72dS+99JIUHBwstWrVSurSpYtpvWLrzp07TevuueceqU+fPoUe93ZK81xbtWol6fV60/r33ntPAqT169eb1tWqVcts308//VRyd3eXunXrZnbtYmJiJEB6//33C5y/cePGZs81NzdXysnJMdvm+vXrUtWqVaXHHnuswP5LliyRAOnQoUOmdf369Svw3pEkSTIYDBIgzZ49u8DzjYmJkSRJkmJjYyUXFxfpzTffNNv3n3/+kVxdXQusP336tBQYGCiNHDlSkiRJmjlzpqR8zB09elTy9vaWnnnmmQK2dOnSRWrcuHGB9e+//76ZPdeuXZPc3Nyknj17Snl5eabtFixYIAHSl19+KUmS/LrVrl1bqlWrlnT9+nWzYyrv+dsp7r1T0mtbHGfOnJEAae7cuUU+v/x06NBB6tatm+m+8r756quvTOuGDx8uBQQESFlZWaZ1xb2/LWHGjBkSIHl7e0t9+vSR3nzzTenw4cMFtrPkPXr7a5yZmVngePv375cA6ZtvvjGtU96XHTt2lHJzc822nzZtmuTu7i6lpKSY1l27dk1ydXW942eBoPwQYSlBiUhLSzNzBRdF1apVAfnXFEC9evW45557WLFihWmb5ORkfv31V0aMGFHA65CWlkZiYqLZX1FkZ2czcOBAEhIS2Lx5M5UqVSrNUyuUy5cv88knnzB9+vQ7VoStXbuWgwcP8s4771jt/Lfz5JNPmjwCAOPHjy8QWslPZmYmr7/+OhMmTKBmzZpFbnP7a52Xl2e2jYuLiyl/ymg0kpycTG5uLq1bt+bIkSMFjpmdnQ2Ah4fHHZ+TXq8HKNTDorB27VqMRiNDhgwxszMkJIS6desWCD3Uq1ePH3/8kRUrVvDGG2+Y1l+9epUBAwbQrl07U1jodvLy8gq8HpmZmWbbbN++Hb1ez+TJk9Fqb318jhs3Dj8/P3755RcAjh49SkxMDJMnTyYgIMDsGGWtkinJtS0MS64NyNenuGsD8v+rl5dXiY9pCbNnz2blypW0aNGCLVu28Oqrr9KqVStatmzJqVOnTNtZ+h7Nj6enp2nZYDCQlJREZGQkAQEBhe47btw4XFxczNaNGjWKnJwcszDcqlWryM3NtYpnWlA6hLgRlIhq1arx33//3XG76OhoAKpXr25aN2rUKPbt28f58+cBOVfAYDDw6KOPFtj/scceo0qVKmZ/GRkZhZ5r7Nix/P7776SlpZnybKzFzJkzqVatmiknqCjy8vJ45ZVXGDFiBE2bNrWqDfmpW7eu2X0fHx9CQ0MLzZUAmDdvHtnZ2bzyyitFHnPmzJkFXuuoqKgC23399dc0bdoUDw8PKlWqRJUqVfjll1+4ceNGgW0VMerv73/H56TkqBQnHs+ePYskSdStW7eAradOnSo0sTQhIQFJkpgxY4ZJbAwaNIhLly4Vm4gaFRVV4BwzZ84020Z5D9evX99svZubGxEREabHlf+VwvJ4ykpJrm1hWHJtQL4+dxL27dq148qVK8yaNYsLFy6QmJhY6PuitAwfPpy9e/dy/fp1tm7dyiOPPMLRo0cZMGCASayBZe/R/GRlZTFjxgzCwsJwd3encuXKVKlShZSUlEL3VUKt+WnQoAFt2rQx+wG3YsUK2rZtS2RkZBmevaAsiJwbQYno378/Cxcu5IsvvuDxxx8vdJv4+Hi+/vprqlSpQtu2bU3rhw0bxnPPPceKFSt
2025-02-08 13:32:07 +04:00
"text/plain": [
2025-02-08 13:44:18 +04:00
"<Figure size 640x480 with 1 Axes>"
2025-02-08 13:32:07 +04:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
2025-02-08 13:44:18 +04:00
"import numpy as np\n",
"import skfuzzy as fuzz\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Загрузка датасета\n",
2025-02-08 13:32:07 +04:00
"df = pd.read_csv(\"..//static//csv//ds_salaries.csv\")\n",
"\n",
2025-02-08 13:44:18 +04:00
"# Определение диапазона значений для переменной \"salary_in_usd\"\n",
"x_salary = np.linspace(df['salary_in_usd'].min(), df['salary_in_usd'].max(), 100)\n",
"\n",
2025-02-15 10:32:36 +04:00
"low_mean = 25000\n",
"low_sigma = 10000\n",
"low = fuzz.gaussmf(x_salary, low_mean, low_sigma)\n",
"\n",
2025-02-08 13:44:18 +04:00
"# Средний\n",
2025-02-15 10:32:36 +04:00
"medium_mean = 70000\n",
"medium_sigma = 15000\n",
"medium = fuzz.gaussmf(x_salary, medium_mean, medium_sigma)\n",
"\n",
2025-02-08 13:44:18 +04:00
"# Высокий\n",
2025-02-15 10:32:36 +04:00
"high_mean = 150000\n",
"high_sigma = 30000\n",
"high = fuzz.gaussmf(x_salary, high_mean, high_sigma)\n",
"\n",
"# Очень высокий (сигмоидная функция, неограниченная вправо)\n",
"veryhigh_center = 300000 # Точка, где функция достигает 0.5\n",
"veryhigh_slope = 0.00002 # Наклон функции (чем меньше, тем плавнее рост)\n",
"veryhigh = fuzz.sigmf(x_salary, veryhigh_center, veryhigh_slope)\n",
2025-02-08 13:32:07 +04:00
"\n",
2025-02-08 13:44:18 +04:00
"# Визуализация функций принадлежности\n",
"plt.figure()\n",
"plt.plot(x_salary, low, label='Низкий', color='blue')\n",
"plt.plot(x_salary, medium, label='Средний', color='green')\n",
"plt.plot(x_salary, high, label='Высокий', color='red')\n",
2025-02-15 10:23:21 +04:00
"plt.plot(x_salary, veryhigh, label='Очень Высокий', color='purple')\n",
2025-02-08 13:44:18 +04:00
"plt.title('Функции принадлежности для \"Salary\"')\n",
"plt.xlabel('Зарплата в USD')\n",
"plt.ylabel('Степень принадлежности')\n",
"plt.legend()\n",
"plt.grid()\n",
2025-02-08 13:32:07 +04:00
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2025-02-08 13:44:18 +04:00
"База нечетких правил"
2025-02-08 13:32:07 +04:00
]
},
{
"cell_type": "code",
2025-02-15 10:32:36 +04:00
"execution_count": 7,
2025-02-08 13:32:07 +04:00
"metadata": {},
"outputs": [
2025-02-08 13:44:18 +04:00
{
"name": "stdout",
"output_type": "stream",
"text": [
"Нечеткие правила:\n",
"Если Опыт Низкий И Тип занятости Частичная, Тогда Зарплата Низкая.\n",
"Если Опыт Средний И Тип занятости Частичная, Тогда Зарплата Средняя.\n",
"Если Опыт Высокий И Тип занятости Полная, Тогда Зарплата Высокая.\n",
"Если Опыт Низкий И Тип занятости Полная, Тогда Зарплата Средняя.\n",
"Если Опыт Средний И Тип занятости Полная, Тогда Зарплата Высокая.\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAk4AAASmCAYAAADYniQgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gU1dfA8e+m91BDAgYChC5ICSAdpIReFUGUIlJFmgX5KU1UEFFRgdCLooIignQCCKGIEKp0AqETOoT0svP+Me+uhCSQTXYzye75PE+e7M5OOXdnk5zMPXOvTlEUBSGEEEII8Ux2WgcghBBCCJFfSOIkhBBCCJFFkjgJIYQQQmSRJE5CCCGEEFkkiZMQQgghRBZJ4iSEEEIIkUWSOAkhhBBCZJEkTkIIIYQQWSSJkxBCCJFHPHjwgIiICFJSUrQORWRCEichhBBCI8nJyUybNo0XXngBZ2dnChYsSLly5di2bZvWoYlMSOJkg/r27YuHh4fWYQgh8rETJ07w+uuvU6JECZydnSlevDi9evXixIkTWoeWbyQmJtKiRQvGjRtH06ZN+e233wgNDWX79u3Uq1dP6/BEJhy0DkDkjrt37/LTTz+xa9cuwsLCiI+Pp3Xr1tSoUYPu3btTo0YNrUMUQuQTq1atomfPnhQqVIj+/ftTunRpLl68yMKFC1m5ciXLly+nS5cuWoeZ533xxRf8888/bN68maZNm2odjsginUzya/2WL1/OgAEDiImJISAggOTkZKKioqhRowZHjx4lOTmZPn36MG/ePJycnLQOVwiRh50/f55q1apRsmRJwsLCKFq0qPG1O3fu0KhRI65cucKxY8coU6aMhpHmbSkpKfj4+DBkyBA+++wzrcMRJpCuOiu3Z88eXn/9dXx9fdmzZw+RkZG0aNECFxcXDhw4wPXr1+nZsydLly5l1KhRACiKQkBAAJ06dUq3v4SEBLy9vRk0aBAAO3bsQKfTsXLlynTrenh40LdvX+PzJUuWoNPpuHjxonHZiRMnKFiwIO3btzcWQxrWCw8PT7O/O3fuoNPpmDhxYprlGS378ssv0el0af6LM8S6Y8eONOu2a9cuw308ybB9Zl8ZtTUsLIxBgwZRuHBhvLy86N27N/fv30+z34CAgDTbAvz222/odDoCAgKMyy5evIhOp2P69OnpYnv++efTtDUpKYnx48dTq1YtvL29cXd3p1GjRvz1118Zts0Q75Nfjx//cZm9B4+fW4Bly5ZRq1YtXF1dKVSoED169ODKlStp4mzSpAllypTh9u3bxuUTJ05Ep9Ol2df06dNxcHBgw4YNxmVNmzbl+eefTxff9OnTM4xn9uzZVKlSxdi19Pbbb/PgwYN02//zzz+0bduWggUL4u7uTrVq1fj2228Btav7aZ+Dx4+b1XP7NIbznpX32/CeZLTukiVL0qwXEhLC888/j5ubW5r1MvpZftyXX35JXFwc8+bNS5M0ARQpUoS5c+cSGxvLtGnTjMsN5zOzr8djy0opweM/r8/a9+M/87t27eKVV16hZMmSODs74+/vz6hRo4iPj3/q8cC0n+k1a9bQrl07ihcvjrOzM2XLlmXy5MmkpqYa1zlz5gz379/H09OTJk2a4Obmhre3N+3bt+f48ePpjn/48GHatGmDl5cXHh4eNG/enH379qWLLyvvs6nvsUhLuuqs3NSpU9Hr9SxfvpxatWqle71IkSL88MMPnDx5krlz5zJhwgR8fHx4/fXXmTZtGvfu3aNQoULG9deuXUt0dDSvv/56jmO7cuUKrVu3pmLFivz66684OJjn4/jgwQOmTJmSpXXDwsLS/CHOiuHDh1O7du00y956660M1x02bBgFChRg4sSJnDlzhpCQEC5dumRMwjKSkpLCRx99ZFJMT4qOjmbBggX07NmTAQMG8OjRIxYuXEhwcDD79++nevXqGW73ySefULp0aQC++uqrdH8QHtelSxe6du0KqH+Q5s2bl+b1zz77jHHjxtG9e3feeustbt++zffff0/jxo05fPgwBQoUwMnJiVWrVvHiiy/SpUsXtm3bhrOzc7pjrVmzhjFjxjBjxgzatm2brfdk4sSJTJo0iRYtWjBkyBDj+Thw4AB79uzB0dERgNDQUNq3b4+fnx8jRozA19eXU6dOsW7dOkaMGMGgQYNo0aKFcb9vvPFGmvcCSJdQGOTk3Pbs2dPY9g0bNvDLL79kum7FihWNx7lz547xnyKDFStWMHToUJo2bco777yDu7s7p06d4vPPP39mHGvXriUgIIBGjRpl+Hrjxo0JCAhg/fr16V4LCQlJ8wc7MjKS8ePHP/OYT9O1a1cCAwONz0eNGkWlSpUYOHCgcVmlSpUANWmNi4tjyJAhFC5cmP379/P9999z9epVfvvttywdLys/00uWLMHDw4PRo0fj4eHB9u3bGT9+PNHR0Xz55ZeAWj4BMHbsWMqVK8ekSZNISEhg1qxZNGjQgAMHDlC+fHlA/QezUaNGeHl58cEHH+Do6MjcuXNp2rQpO3fupG7dujRu3Jgff/zRGKfhKtbjn7f69eub/P6KDCjCqhUqVEgpVapUmmV9+vRR3N3d0ywbN26cAihr165VFEVRzpw5owBKSEhImvU6duyoBAQEKHq9XlEURfnrr78UQPntt9/SHdvd3V3p06eP8fnixYsVQImMjFTu3bunVK5cWalQoYJy586dNNsZ1jtw4ECa5bdv31YAZcKECWmWP7nsgw8+UHx8fJRatWopTZo0MS43xPrXX38Zl9WtW1dp06ZNhvt9UnbaWqtWLSUpKcm4fNq0aQqgrFmzxrisVKlSabadPXu24uzsrDRr1izNuYuMjFQA5csvv0x3/CpVqqRpa0pKipKYmJhmnfv37yvFihVT3nzzzXTbz5s3TwGU8PBw47J27dql++woiqIkJycrgDJp0qR07Y2MjFQURVEuXryo2NvbK5999lmabf/991/FwcEh3fIzZ84oBQsWVF5//XVFURRlwoQJiuHX0+HDhxV3d3fl7bffThdLkyZNlCpVqqRb/uWXX6aJ59atW4qTk5PSqlUrJTU11bjezJkzFUBZtGiRoijq+1a6dGmlVKlSyv3799Ps0/CZf9LTPjtZPbdPc/bsWQVQpk+fnmn7HtegQQOlWbNmxueGz83ixYuNy3r27KkUKFBAiY+PNy572ufb4MGDBwqgdOrU6akxd+zYUQGU6OhoRVH+O5+3b99Os96BAwfSxZbR76cnmfKePy4uLi7dsilTpig6nU65dOnSU49pys90RscZNGiQ4ubmpiQkJCiK8t/7XaRIkTS/A8+ePas4Ojoq3bp1My7r3Lmz4uTkpJw/f9647Pr164qnp6fSuHHjDONt0qRJmt8Jj8vpe2zrpKvOyj169AgfH59nrlesWDFAvVIBUL58eerWrctPP/1kXOfevXts3LiRXr16pbta8ujRI+7cuZPmKzMJCQl07NiR27dvs2nTJgoXLpydpmXo2rVrfP/994wbN+6Zl6JXrVrFgQMHmDp1qtmO/6SBAwcar2QADBkyJF130+Pi4uL45JNPGDZsGCVLlsx0nSff68e7AADs7e2N9Wp6vZ579+6RkpJCUFAQhw4dSrfPhIQEAFxcXJ7ZpqSkJIAMrwwZrFq1Cr1eT/fu3dPE6evrS7ly5dJ1GZYvX57ff/+dn376iU8//dS4/MaNG3To0IF69eoZu8qelJqamu79iIuLS7PO1q1bSUpKYuTIkdjZ/fdrb8CAAXh5eRmvjhw+fJjIyEhGjhxJgQIF0uwjsyuEWZWVc5sRU84NqOfnaecG1J9XNze3LO/z8e0APD09n7qe4XXD75PsMJxLQ/vNwdXV1fg4NjaWO3fuUL9+fRRF4fDhw1naR1Z+ph8/juF3Y6NGjYiLi+P06dNp9tevX780vwPLlStHx44d2bRpE6mpqaSmprJlyxY6d+6cpmbMz8+P1157jd27d2f7fbbEe2wLJHGycsWLF+f8+fPPXC8iIgKAEiVKGJf17t2bPXv2cOnSJUC9zJ2cnMwbb7yRbvs333yTokWLpvmKjY3N8Fj9+vVj9+7
"text/plain": [
"<Figure size 600x1200 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
2025-02-08 13:32:07 +04:00
{
"data": {
2025-02-08 13:44:18 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAAJOCAYAAACqS2TfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3yN1xvAv+/NTm6GUYmREhLE3luNIlaoUjVaQqlWrQpCa1Ojtau0KDGL1iilZs3Qmtfeo6gdstcd7++P676/3NybhUjC+X4+L7nvOe95n/ec847nPM95jiTLsoxAIBAIBAKBQCAQCASCl44quwUQCAQCgUAgEAgEAoHgdUUo3QKBQCAQCAQCgUAgEGQRQukWCAQCgUAgEAgEAoEgixBKt0AgEAgEAoFAIBAIBFmEULoFAoFAIBAIBAKBQCDIIoTSLRAIBAKBQCAQCAQCQRYhlG6BQCAQCAQCgUAgEAiyCKF0CwQCgUAgEAgEAoFAkEUIpVsgEAgEAoFAIBAIBIIsQijdAoFAIBAIBAKBQCAQZBFC6RYIBII3kNDQUCRJMtsKFChAo0aN+PPPP7NbPIFAIBAIBILXBtvsFkAgEAgE2cf48ePx8fFBlmUePHhAaGgoLVu2ZPPmzbRu3Tq7xRMIBAKBQCDI9QilWyAQCN5gWrRoQbVq1ZTfn3zyCZ6envzyyy9C6RYIBAKBQCB4CQj3coFAIBAoeHh44OTkhK3t/8dkb968iSRJhIaGmuX94osvkCSJoKAgZd/69eupUaMGefPmxcnJidKlSzN16lRkWQZgz549SJLEhg0bLM69atUqJEni8OHDAJw+fZqgoCCKFy+Oo6MjXl5e9OzZk/DwcKuyFytWzMJlXpIk9u7da5YnubwAv/76K5IkUaxYMWXfpUuXaNy4MV5eXjg4OODt7c1nn33GkydPlDxJSUmMHj2aqlWr4u7ujouLC/Xr12fPnj1m5Zvqb9q0aRYylytXjoYNG5rta9iwocW+o0ePKteTnJiYGIKDgylevDh2dnZm1/348WOr9ZT8POXKlbPYP23aNCRJ4ubNm2b7//zzT+rXr4+Liwuurq60atWKc+fOmeWxVr979+61aAeAf/75h+bNm+Pu7o6zszMNGjQgLCxMSR87dqzV9rTWttbq7JtvvkGlUrFq1Sqza85I3S5ZsgRJkli8eLFZ3kmTJiFJElu3brWot5T1kJbcyZEkiX79+rFy5UpKlSqFo6MjVatWZf/+/Wb5/v33X/r27UupUqVwcnIiX758fPDBBxbtlHLqiLOzM+XLl2fRokVm+YKCgsz6PMDt27dxcnKy2v6mdky5pSxDIBAIBJYIS7dAIBC8wURGRvL48WNkWebhw4d8//33xMTE8NFHH6V53NWrV1m4cKHF/qioKGrWrEn37t2xs7Nj27ZtDB8+HFtbW4KDg2nYsCHe3t6sXLmSdu3amR27cuVKSpQoQe3atQHYuXMn169fp0ePHnh5eXHu3DkWLFjAuXPn+Pvvvy2UF4D69evz6aefAnDhwgUmTZqU5nXodDq+/vpri/2xsbEUKVKEwMBA3NzcOHv2LD/88AP//fcfmzdvVq510aJFdO7cmd69exMdHc3PP/9MQEAAR44coVKlSmmeOzOEhIRY3T906FB+/PFHPvnkE+rWrYudnR3r16+3OqjxIixfvpzu3bsTEBDA1KlTiYuLY/78+dSrV4+TJ09mWvH666+/aNGiBVWrVmXMmDGoVCqWLFlC48aNOXDgADVq1OD999/H19dXOebLL7/E399faV8Af39/q+UvWbKEkSNHMn36dLp06ZKmLNbqtkePHqxfv57BgwfTtGlTvL29OXPmDOPGjeOTTz6hZcuW6V5jpUqVCA4ONtu3bNkydu7caZF33759rFmzhgEDBuDg4MC8efNo3rw5R44cUQZGjh49yqFDh+jUqRNFihTh5s2bzJ8/n4YNG3L+/HmcnZ3Nypw5cyb58+cnKiqKxYsX07t3b4oVK0aTJk1SlXn06NEkJCSkeV1fffWVUu8LFizg1q1b6daFQCAQvPHIAoFAIHjjWLJkiQxYbA4ODnJoaKhZ3hs3bsiAvGTJEmVfx44d5XLlysne3t5y9+7d0zxXmTJl5NatWyu/R4wYITs4OMgRERHKvocPH8q2trbymDFjlH1xcXEWZf3yyy8yIO/fv98irXDhwnKPHj2U33v27JEBec+ePcq+okWLmsk7b9482cHBQW7UqJFctGjRNK+jb9++slqtVn7rdDo5MTHRLM/Tp09lT09PuWfPnso+U/199913FmWWLVtWbtCggdm+Bg0amO3bunWrDMjNmzeXU762CxYsKAcEBJjtGzNmjAzIjx49SvN6GjRoIJctW9Zi/3fffScD8o0bN2RZluXo6GjZw8ND7t27t1m++/fvy+7u7mb7fXx85G7dupnlS9kOBoNB9vPzkwMCAmSDwaDki4uLk318fOSmTZtalTdl26W8FlOdbdmyRba1tZWDg4PTzCfLadftvXv35Lx588pNmzaVExMT5cqVK8tvv/22HBkZaVWGlLK2atXKYv8XX3xhcR7TvXfs2DFl37///is7OjrK7dq1U/ZZux8OHz4sA/KyZcuUfaZ729R+sizLly9flgH522+/VfZ1797drM+fPXtWVqlUcosWLSyOl2VZ3rlzpwzI+/btS7UMgUAgEFhHuJcLBALBG8wPP/zAzp072blzJytWrKBRo0b06tWL9evXp3rM8ePH+fXXX5k8eTIqlfXXyOPHj7lz5w6hoaFcvXqVd955R0nr1q0biYmJ/Pbbb8q+NWvWoNPpzCzsTk5Oyt8JCQk8fvyYWrVqAXDixAmLcyYlJeHg4JDha4+Li2P8+PH069ePt99+22qeyMhIHjx4wO7du9myZYvZddjY2GBvbw+AwWDgyZMn6HQ6qlWrZlW+50GWZUaMGEH79u2pWbOmRXp0dDT58uV77vL1ej2PHz822+Li4szy7Ny5k4iICDp37myWz8bGhpo1a5q50xcoUIA7d+6keU6NRsOVK1fo0qUL4eHhSnmxsbG8++677N+/H4PB8FzXc+TIETp27Ej79u357rvv0sybXt16eXkp90f9+vXRaDQsXrwYNze355ItLWrXrk3VqlWV32+//TZt27Zl+/bt6PV6wPx+0Gq1hIeH4+vri4eHh9X+9vTpUx4/fsz169eZOXMmNjY2NGjQIFUZRowYQZUqVfjggw+spiclJQFk6h4TCAQCgRHhXi4QCARvMDVq1DALpNa5c2cqV65Mv379aN26taJUJmf48OHUr1+f1q1b069fP4v0hIQE3nrrLcA4X/Wrr75i6NChSnrp0qWpXr06K1eu5JNPPgGMruW1atUycyd+8uQJ48aNY/Xq1Tx8+NDsHJGRkRbnjYyMRK1WZ/jaZ8yYQUJCAl999RWDBw+2micgIIB//vkHgObNm7NmzRqz9KVLlzJ9+nQuXryIVqtV9vv4+GRYjrRYuXIl586dY+3atWZzk03Url2bDRs28Ntvvynu5SmV5rS4ePGi0lapceXKFQAaN25sNT25ElqnTh3mzJnD6tWrady4MSqVyqKtTOV179491XNGRkaSJ0+eDF2Dif/++49WrVoRGxtLeHi41ekHyUmvbgE6derEihUr2LJlC59++invvvtupmTKKH5+fhb7SpYsSVxcHI8ePcLLy4v4+HgmT57MkiVL+O+//5Q4CWD9fqhSpYryt4ODA3PnzqVGjRpWz3/w4EE2b97M7t27U3UXj4iIAMjUPSYQCAQCI0LpFggEAoGCSqWiUaNGzJ49mytXrlC2bFmz9B07drBr1y4l2Jk17O3t2blzJ3FxcRw4cICpU6fi7e1Nnz59lDzdunVj4MCB3Llzh8TERP7++2/mzp1rVk7Hjh05dOgQQ4cOpVKlSqjVagwGA82bN7ewhD558oSkpCS8vLwydJ2PHz/mu+++Y8SIEeTNmzfVfN9//z2PHz/m/PnzTJ48mc8++4wVK1YAsGLFCoKCgnjvvfcYOnQoBQoUwMbGhsmTJ3Pt2rUMyZEWSUlJjBo1ik8++YSSJUt
2025-02-08 13:32:07 +04:00
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2025-02-08 13:44:18 +04:00
"import pandas as pd\n",
"import numpy as np\n",
"import skfuzzy as fuzz\n",
2025-02-08 13:32:07 +04:00
"import matplotlib.pyplot as plt\n",
"\n",
2025-02-08 13:44:18 +04:00
"# Загрузка датасета\n",
"df = pd.read_csv(\"..//static//csv//ds_salaries.csv\")\n",
"\n",
"# Определение диапазонов значений для входных переменных\n",
"x_experience = np.linspace(0, 40, 100) # Опыт работы в годах\n",
"x_employment = np.array([0, 1]) # 0 - Частичная занятость, 1 - Полная занятость\n",
"x_salary = np.linspace(0, df['salary_in_usd'].max(), 100) # Зарплата в USD\n",
"\n",
"# Определение функций принадлежности для опыта работы\n",
"low_experience = fuzz.trimf(x_experience, [0, 0, 5])\n",
"medium_experience = fuzz.trimf(x_experience, [3, 10, 20])\n",
"high_experience = fuzz.trimf(x_experience, [15, 40, 40])\n",
"\n",
"# Определение функций принадлежности для типа занятости\n",
"part_time = fuzz.trimf(x_employment, [0, 0, 1]) # Частичная занятость\n",
"full_time = fuzz.trimf(x_employment, [0, 1, 1]) # Полная занятость\n",
"\n",
"# Определение функций принадлежности для зарплаты\n",
"low_salary = fuzz.trimf(x_salary, [0, 0, 50000])\n",
"medium_salary = fuzz.trimf(x_salary, [30000, 70000, 100000])\n",
"high_salary = fuzz.trimf(x_salary, [70000, 150000, df['salary_in_usd'].max()])\n",
"\n",
"# Определение нечетких правил\n",
"fuzzy_rules = [\n",
" (\"Низкий\", \"Частичная\", \"Низкая\"),\n",
" (\"Средний\", \"Частичная\", \"Средняя\"),\n",
" (\"Высокий\", \"Полная\", \"Высокая\"),\n",
" (\"Низкий\", \"Полная\", \"Средняя\"),\n",
" (\"Средний\", \"Полная\", \"Высокая\")\n",
"]\n",
"\n",
"# Вывод правил\n",
"print(\"Нечеткие правила:\")\n",
"for rule in fuzzy_rules:\n",
" print(f\"Если Опыт {rule[0]} И Тип занятости {rule[1]}, Тогда Зарплата {rule[2]}.\")\n",
"\n",
"# Визуализация функций принадлежности\n",
"fig, axs = plt.subplots(3, 1, figsize=(6, 12))\n",
"\n",
"# Опыт работы\n",
"axs[0].plot(x_experience, low_experience, label='Низкий', color='blue')\n",
"axs[0].plot(x_experience, medium_experience, label='Средний', color='green')\n",
"axs[0].plot(x_experience, high_experience, label='Высокий', color='red')\n",
"axs[0].set_title('Функции принадлежности для Опыта работы')\n",
"axs[0].set_xlabel('Опыт работы (годы)')\n",
"axs[0].set_ylabel('Степень принадлежности')\n",
"axs[0].legend()\n",
"axs[0].grid()\n",
"\n",
"# Тип занятости\n",
"axs[1].plot(x_employment, part_time, label='Частичная занятость', color='orange')\n",
"axs[1].plot(x_employment, full_time, label='Полная занятость', color='purple')\n",
"axs[1].set_title('Функции принадлежности для Типа занятости')\n",
"axs[1].set_xlabel('Тип занятости')\n",
"axs[1].set_ylabel('Степень принадлежности')\n",
"axs[1].legend()\n",
"axs[1].grid()\n",
"\n",
"# Зарплата\n",
"axs[2].plot(x_salary, low_salary, label='Низкая', color='blue')\n",
"axs[2].plot(x_salary, medium_salary, label='Средняя', color='green')\n",
"axs[2].plot(x_salary, high_salary, label='Высокая', color='red')\n",
"axs[2].set_title('Функции принадлежности для Зарплаты')\n",
"axs[2].set_xlabel('Зарплата в USD')\n",
"axs[2].set_ylabel('Степень принадлежности')\n",
"axs[2].legend()\n",
"axs[2].grid()\n",
"\n",
"plt.tight_layout()\n",
"plt.show()\n",
"\n",
"# Визуализация нечетких правил\n",
"fig, ax = plt.subplots(figsize=(10, 6))\n",
"\n",
"# Заполнение областей для каждого правила с новыми цветами\n",
"ax.fill_between(x_experience, low_experience, 0, color='lightblue', alpha=0.5, \n",
" label='Правило 1: Низкий опыт, Частичная занятость => Низкая Зарплата')\n",
"ax.fill_between(x_experience, medium_experience, medium_salary, color='lightgreen', alpha=0.5, \n",
" label='Правило 2: Средний опыт, Частичная занятость => Средняя Зарплата')\n",
"ax.fill_between(x_experience, high_experience, high_salary, color='lightpink', alpha=0.5, \n",
" label='Правило 3: Высокий опыт, Полная занятость => Высокая Зарплата')\n",
"ax.fill_between(x_experience, low_experience, medium_salary, color='lightcoral', alpha=0.5, \n",
" label='Правило 4: Низкий опыт, Полная занятость => Средняя Зарплата')\n",
"ax.fill_between(x_experience, medium_experience, high_salary, color='lightyellow', alpha=0.5, \n",
" label='Правило 5: Средний опыт, Полная занятость => Высокая Зарплата')\n",
"\n",
"ax.set_title('Визуализация нечетких правил')\n",
"ax.set_xlabel('Опыт работы (годы)')\n",
"ax.set_ylabel('Зарплата')\n",
"ax.legend()\n",
"ax.grid()\n",
"\n",
"plt.tight_layout()\n",
2025-02-08 13:32:07 +04:00
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2025-02-08 13:44:18 +04:00
"Оценка качества полученной нечеткой системы"
2025-02-08 13:32:07 +04:00
]
},
{
"cell_type": "code",
2025-02-15 09:27:14 +04:00
"execution_count": 6,
2025-02-08 13:32:07 +04:00
"metadata": {},
"outputs": [
{
2025-02-08 13:44:18 +04:00
"name": "stdout",
"output_type": "stream",
"text": [
"Опыт работы | Тип занятости | Эталонная зарплата | Оцененная зарплата\n",
" 2 | Частичная | 40000 | 16666.67\n",
" 5 | Частичная | 60000 | 66585.98\n",
" 10 | Полная | 80000 | 0.00\n",
" 15 | Полная | 120000 | 223308.38\n",
"\n",
"Средняя абсолютная ошибка (MAE): 53306.92\n",
"Среднеквадратичная ошибка (RMSE): 66446.29\n"
]
2025-02-08 13:32:07 +04:00
}
],
"source": [
2025-02-08 13:44:18 +04:00
"import pandas as pd\n",
"import numpy as np\n",
"import skfuzzy as fuzz\n",
2025-02-08 13:32:07 +04:00
"import matplotlib.pyplot as plt\n",
"\n",
2025-02-08 13:44:18 +04:00
"# Загрузка датасета\n",
"df = pd.read_csv(\"..//static//csv//ds_salaries.csv\")\n",
2025-02-08 13:32:07 +04:00
"\n",
2025-02-08 13:44:18 +04:00
"# Определение диапазонов значений для входных переменных\n",
"x_experience = np.linspace(0, 40, 100) # Опыт работы в годах\n",
"x_employment = np.array([0, 1]) # 0 - Частичная занятость, 1 - Полная занятость\n",
"x_salary = np.linspace(0, df['salary_in_usd'].max(), 100) # Зарплата в USD\n",
2025-02-08 13:32:07 +04:00
"\n",
2025-02-08 13:44:18 +04:00
"# Определение функций принадлежности для опыта работы\n",
"low_experience = fuzz.trimf(x_experience, [0, 0, 5])\n",
"medium_experience = fuzz.trimf(x_experience, [3, 10, 20])\n",
"high_experience = fuzz.trimf(x_experience, [15, 40, 40])\n",
2025-02-08 13:32:07 +04:00
"\n",
2025-02-08 13:44:18 +04:00
"# Определение функций принадлежности для типа занятости\n",
"part_time = fuzz.trimf(x_employment, [0, 0, 1]) # Частичная занятость\n",
"full_time = fuzz.trimf(x_employment, [0, 1, 1]) # Полная занятость\n",
"\n",
"# Определение функций принадлежности для зарплаты\n",
"low_salary = fuzz.trimf(x_salary, [0, 0, 50000])\n",
"medium_salary = fuzz.trimf(x_salary, [30000, 70000, 100000])\n",
"high_salary = fuzz.trimf(x_salary, [70000, 150000, df['salary_in_usd'].max()])\n",
"\n",
"# Создаем тестовые данные (опыт работы, тип занятости, эталонная зарплата)\n",
"test_data = [\n",
" (2, 0, 40000), # Низкий опыт, частичная занятость => Низкая зарплата\n",
" (5, 0, 60000), # Низкий опыт, частичная занятость => Средняя зарплата\n",
" (10, 1, 80000), # Средний опыт, полная занятость => Средняя зарплата\n",
" (15, 1, 120000), # Высокий опыт, полная занятость => Высокая зарплата\n",
"]\n",
"\n",
"# Функция для вычисления нечеткой оценки\n",
"def fuzzy_inference(experience, employment):\n",
" # Определение степени принадлежности\n",
" exp_low = fuzz.interp_membership(x_experience, low_experience, experience)\n",
" exp_medium = fuzz.interp_membership(x_experience, medium_experience, experience)\n",
" exp_high = fuzz.interp_membership(x_experience, high_experience, experience)\n",
"\n",
" emp_part_time = fuzz.interp_membership(x_employment, part_time, employment)\n",
" emp_full_time = fuzz.interp_membership(x_employment, full_time, employment)\n",
"\n",
" # Применяем правила\n",
" salary_low = np.fmin(exp_low, emp_part_time)\n",
" salary_medium = np.fmin(exp_medium, emp_part_time)\n",
" salary_high = np.fmin(exp_high, emp_full_time)\n",
"\n",
" # Вычисляем выходные значения при наличии ненулевых областей\n",
" salary0 = low_salary * salary_low\n",
" salary1 = medium_salary * salary_medium\n",
" salary2 = high_salary * salary_high\n",
"\n",
" # Проверка на ненулевые области перед дефуззацией\n",
" if salary_low > 0:\n",
" salary0 = fuzz.defuzz(x_salary, salary0, 'centroid')\n",
" else:\n",
" salary0 = 0 # Значение по умолчанию\n",
"\n",
" if salary_medium > 0:\n",
" salary1 = fuzz.defuzz(x_salary, salary1, 'centroid')\n",
" else:\n",
" salary1 = 0 # Значение по умолчанию\n",
"\n",
" if salary_high > 0:\n",
" salary2 = fuzz.defuzz(x_salary, salary2, 'centroid')\n",
" else:\n",
" salary2 = 0 # Значение по умолчанию\n",
"\n",
" return max(salary0, salary1, salary2)\n",
"\n",
"# Список для хранения результатов\n",
"results = []\n",
"\n",
"# Оценка системы на тестовом наборе данных\n",
"for experience, employment, actual_salary in test_data:\n",
" inferred_salary = fuzzy_inference(experience, employment)\n",
" results.append((experience, employment, actual_salary, inferred_salary))\n",
"\n",
"# Вывод результатов\n",
"print(\"Опыт работы | Тип занятости | Эталонная зарплата | Оцененная зарплата\")\n",
"for experience, employment, actual_salary, inferred_salary in results:\n",
" employment_type = \"Частичная\" if employment == 0 else \"Полная\"\n",
" print(f\"{experience:12} | {employment_type:13} | {actual_salary:20} | {inferred_salary:.2f}\")\n",
"\n",
"# Вычисление метрик качества\n",
"actual_salaries = [actual for _, _, actual, _ in results]\n",
"inferred_salaries = [inferred for _, _, _, inferred in results]\n",
"\n",
"mae = np.mean(np.abs(np.array(actual_salaries) - np.array(inferred_salaries)))\n",
"rmse = np.sqrt(np.mean((np.array(actual_salaries) - np.array(inferred_salaries)) ** 2))\n",
"\n",
"print(f\"\\nС р е дняя абсолютная ошибка (MAE): {mae:.2f}\")\n",
"print(f\"Среднеквадратичная ошибка (RMSE): {rmse:.2f}\")"
2025-02-08 13:32:07 +04:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2025-02-08 13:44:18 +04:00
"\n",
"Полученные результаты указывают на необходимость значительных улучшений в проектировании нечеткой системы. Высокие значения средней абсолютной ошибки (MAE) и среднеквадратичной ошибки (RMSE) свидетельствуют о том, что система неэффективно обрабатывает входные данные. Особенно заметна проблема нулевой оценки для случая с 10 годами опыта, что указывает на серьезные недостатки в определении правил и функций принадлежности. Это требует дальнейшей работы над уточнением правил и пересмотром функций принадлежности для достижения более точных оценок зарплат. "
2025-02-08 13:32:07 +04:00
]
}
],
"metadata": {
"kernelspec": {
"display_name": "aimenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}