387 lines
345 KiB
Plaintext
Raw Normal View History

2025-02-08 13:32:07 +04:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Начало лабораторной\n",
"\n",
"Выгрузка данных из csv файла в датафрейм"
]
},
{
"cell_type": "code",
2025-02-15 09:27:14 +04:00
"execution_count": 3,
2025-02-08 13:32:07 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['work_year', 'experience_level', 'employment_type', 'job_title',\n",
" 'salary', 'salary_currency', 'salary_in_usd', 'employee_residence',\n",
" 'remote_ratio', 'company_location', 'company_size'],\n",
" dtype='object')\n"
]
}
],
"source": [
"import pandas as pd\n",
"df = pd.read_csv(\"..//static//csv//ds_salaries.csv\")\n",
"print(df.columns)"
]
},
{
"cell_type": "code",
2025-02-15 09:27:14 +04:00
"execution_count": 4,
2025-02-08 13:32:07 +04:00
"metadata": {},
"outputs": [
{
"data": {
2025-02-15 09:27:14 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHHCAYAAABDUnkqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACdKElEQVR4nOzdd3QUZRfA4d+mN1IgJKEEQm9KV6qAShEEBBsiSBFRelUR6SBVqlQFERQQPhFQBBVEmoiCVJEuoZNQQkgjdef7Y9xNQuomu5ndzX3OyclkdnbmZmeT3LzlvjpFURSEEEIIIeyEg9YBCCGEEEKYkyQ3QgghhLArktwIIYQQwq5IciOEEEIIuyLJjRBCCCHsiiQ3QgghhLArktwIIYQQwq5IciOEEEIIuyLJjRBCCCHsiiQ3QgghCgWdTsfEiRO1DkMUAEluRJ716tULLy8vrcMQQtigVatWodPpANizZw86nY7Lly+nO2br1q00b96cgIAAPDw8KF++PK+++io//fSTBhHnz8SJEwkJCQHSf+/CMiS5ESa5d+8en3zyCa+88go//vgjDx8+5LnnnmP06NEcO3ZM6/CEEHZi9uzZdOzYEZ1Ox+jRo5k3bx4vvfQSFy5cYP369VqHJ6yck9YBCNuxfv16+vbtS0xMDCEhITg7O6PT6bh37x5z5sxhxowZ9OzZk88++wwXFxetwxVC2Kjk5GSmTJlCq1at2LFjR4bHb9++rUFU6cXGxuLp6al1GCIL0nIjcuXAgQN0796doKAgDhw4QGhoKC1btsTNzY3Dhw9z8+ZNunbtyurVqxk+fDgAiqIQEhLCCy+8kOF88fHx+Pj48M477wCpzdIbN27McKyXlxe9evUyfm1o0k3bhP3PP//g5+dH+/btSU5OTnfcX3/9le58d+/ezbTvPbN9H3/8MTqdjhYtWhj3GWLds2dPumOff/75XPXpG56f1Udm3+u+fft45513KFasGN7e3vTo0YP79++nO29ISEi65wJ888036HQ6Y3M4wOXLl9HpdMyePTtDbI899li67zUxMZHx48dTr149fHx88PT05KmnnmL37t2Zfm+GeB/9SHv9tLJ6DR7tnlizZg316tXD3d2dokWL8tprr3Ht2rV0cTZv3pzy5ctz584d4/6JEydmaP6fPXs2Tk5ObN++3bivRYsWPPbYYxnimz17dqbxLFmyhBo1auDq6krJkiUZOHAgkZGRGZ7/559/0q5dO/z8/PD09KRmzZosWLAAULt1s3sfpL1ubu9tdgz3PTevt+E1yezYVatWpTtu6dKlPPbYY3h4eKQ7LrOf5dy6e/cuUVFRNGnSJNPHAwICjNumvkfTunLlCgMGDKBKlSq4u7tTrFgxXnnllQyvh+F9vXfvXgYMGEBAQAClS5dm9+7d6HQ6Nm/enOHc69atQ6fTcfDgQdO+eWEW0nIjcmXGjBno9XrWr19PvXr1Mjzu7+/Pl19+yenTp/n000+ZMGECAQEBdO/enVmzZhEREUHRokWNx2/dupWoqCi6d++e79iuXbvGc889R9WqVfnf//6Hk5N53taRkZFMnz49V8fu27cv3R/L3BgyZAhPPPFEun1vvfVWpscOGjQIX19fJk6cyLlz51i6dClXrlwxJkqZSU5OZsyYMSbF9KioqChWrFhB165d6du3L9HR0Xz++ee0adOGQ4cOUbt27UyfN3nyZMqVKwfAnDlzMiRiaXXu3JkXX3wRgP379/PZZ5+le3zq1KmMGzeOV199lbfeeos7d+6wcOFCmjVrxrFjx/D19cXFxYVNmzbRsGFDOnfuzK5du3B1dc1wre+++45Ro0Yxf/582rVrl6fXZOLEiUyaNImWLVvSv39/4/04fPgwBw4cwNnZGYCdO3fSvn17SpQowdChQwkKCuLMmTP88MMPDB06lHfeeYeWLVsaz/vGG2+key0AihcvnmkM+bm3Xbt2NX7v27dv5+uvv87y2KpVqxqvc/fuXeM/LgYbNmxgwIABtGjRgsGDB+Pp6cmZM2eYNm1anmIzCAgIwN3dna1btzJ48OB0vzseldf3KMDhw4f5/fffee211yhdujSXL19m6dKltGjRgtOnT+Ph4ZHu+AEDBlC8eHHGjx9PbGwsLVq0IDg4mLVr19K5c+d0x65du5YKFSrQqFGjfL0WIo8UIXKhaNGiStmyZdPt69mzp+Lp6Zlu37hx4xRA2bp1q6IoinLu3DkFUJYuXZruuI4dOyohISGKXq9XFEVRdu/erQDKN998k+Hanp6eSs+ePY1ff/HFFwqghIaGKhEREUr16tWVKlWqKHfv3k33PMNxhw8fTrf/zp07CqBMmDAh3f5H973//vtKQECAUq9ePaV58+bG/YZYd+/ebdzXoEEDpW3btpme91F5+V7r1aunJCYmGvfPmjVLAZTvvvvOuK9s2bLpnrtkyRLF1dVVefrpp9Pdu9DQUAVQPv744wzXr1GjRrrvNTk5WUlISEh3zP3795XAwEDlzTffzPD8zz77TAGUv/76y7jv+eefz/DeURRFSUpKUgBl0qRJGb7f0NBQRVEU5fLly4qjo6MyderUdM/9+++/FScnpwz7z507p/j5+Sndu3dXFEVRJkyYoBh+zR07dkzx9PRUBg4cmCGW5s2bKzVq1Miw/+OPP04Xz+3btxUXFxeldevWSkpKivG4RYsWKYCycuVKRVHU161cuXJK2bJllfv376c7p+E9/6js3ju5vbfZOX/+vAIos2fPzvL7S6tJkybK008/bfza8L754osvjPu6du2q+Pr6Kg8fPjTuy+79bYrx48crgOLp6am0bdtWmTp1qnLkyJEMx5nyHn30NY6Li8twvoMHDyqA8uWXXxr3Gd6XTZs2VZKTk9MdP3r0aMXV1VWJjIw07rt9+7bi5OSU4+8CYTnSLSVyJTo6Ol1TcFYCAwMB9b8pgMqVK9OgQQPWrl1rPCYiIoIff/yRbt26ZWh1iI6O5u7du+k+shIfH0/Hjh25c+cOP/30E8WKFcvLt5apGzdusHDhQsaNG5fjjLBNmzZx+PBhZsyYYbbrP+rtt982tggA9O/fP0PXSlpxcXFMnjyZQYMGUaZMmSyPefS1TklJSXeMo6OjcfyUXq8nIiKC5ORk6tevz9GjRzOcMz4+HgA3N7ccv6fExESATFtYDDZt2oRer+fVV19NF2dQUBCVKlXK0PVQuXJlvv32W9auXctHH31k3H/r1i06dOhAo0aNjN1Cj0pJScnwesTFxaU75pdffiExMZFhw4bh4JD667Nv3754e3uzbds2AI4dO0ZoaCjDhg3D19c33TnyO0smN/c2M6bcG1DvT3b3BtSfVw8Pj1yf0xSTJk1i3bp11KlTh59//pkxY8ZQr1496taty5kzZ4zHmfoeTcvd3d24nZSUxL1796hYsSK+vr6ZPrdv3744Ojqm29ejRw8SEhLSdcNt2LCB5ORks7RMi7yR5EbkSsmSJfn3339zPO7ixYsAlCpVyrivR48eHDhwgCtXrgDqWIGkpCTeeOONDM9/8803KV68eLqP2NjYTK/Vu3dvfvvtN6Kjo43jbMxlwoQJlCxZ0jgmKCspKSl8+OGHdOvWjZo1a5o1hrQqVaqU7msvLy9KlCiR6VgJgLlz5xIfH8+HH36Y5TknTJiQ4bU+e/ZshuNWr15NzZo1cXNzo1ixYhQvXpxt27bx4MGDDMcaklEfH58cvyfDGJXskscLFy6gKAqVKlXKEOuZM2cyHVh6584dFEVh/PjxxmTjhRde4Pr169kORD179myGa0yYMCHdMYb3cJUqVdLtd3FxoXz58sbHDT8rmY3jya/c3NvMmHJvQL0/OSX2jRo14ubNm0ycOJGrV69y9+7dTN8XedW1a1f279/P/fv32bFjB6+//jrHjh2jQ4cOxmQNTHuPpvXw4UPGjx9PcHAwrq6u+Pv7U7x4cSIjIzN9rqGrNa2qVavyxBNPpPsHbu3atTRs2JCKFSvm47sX+SFjbkSutG/fnsWLF/P555/Tp0+fTI8JDw9n9erVFC9enIYNGxr3v/baawwfPpy1a9fy4YcfsmbNGur
2025-02-08 13:32:07 +04:00
"text/plain": [
2025-02-08 13:44:18 +04:00
"<Figure size 640x480 with 1 Axes>"
2025-02-08 13:32:07 +04:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
2025-02-08 13:44:18 +04:00
"import numpy as np\n",
"import skfuzzy as fuzz\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Загрузка датасета\n",
2025-02-08 13:32:07 +04:00
"df = pd.read_csv(\"..//static//csv//ds_salaries.csv\")\n",
"\n",
2025-02-08 13:44:18 +04:00
"# Определение диапазона значений для переменной \"salary_in_usd\"\n",
"x_salary = np.linspace(df['salary_in_usd'].min(), df['salary_in_usd'].max(), 100)\n",
"\n",
"# Определение функций принадлежности для термов\n",
"# Низкий\n",
"low = fuzz.trimf(x_salary, [0, 0, 50000])\n",
"# Средний\n",
"medium = fuzz.trimf(x_salary, [30000, 70000, 100000])\n",
"# Высокий\n",
2025-02-15 09:27:14 +04:00
"high = fuzz.trimf(x_salary, [70000, 250000, 400000])\n",
2025-02-08 13:32:07 +04:00
"\n",
2025-02-08 13:44:18 +04:00
"# Визуализация функций принадлежности\n",
"plt.figure()\n",
"plt.plot(x_salary, low, label='Низкий', color='blue')\n",
"plt.plot(x_salary, medium, label='Средний', color='green')\n",
"plt.plot(x_salary, high, label='Высокий', color='red')\n",
"plt.title('Функции принадлежности для \"Salary\"')\n",
"plt.xlabel('Зарплата в USD')\n",
"plt.ylabel('Степень принадлежности')\n",
"plt.legend()\n",
"plt.grid()\n",
2025-02-08 13:32:07 +04:00
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2025-02-08 13:44:18 +04:00
"База нечетких правил"
2025-02-08 13:32:07 +04:00
]
},
{
"cell_type": "code",
2025-02-15 09:27:14 +04:00
"execution_count": 5,
2025-02-08 13:32:07 +04:00
"metadata": {},
"outputs": [
2025-02-08 13:44:18 +04:00
{
"name": "stdout",
"output_type": "stream",
"text": [
"Нечеткие правила:\n",
"Если Опыт Низкий И Тип занятости Частичная, Тогда Зарплата Низкая.\n",
"Если Опыт Средний И Тип занятости Частичная, Тогда Зарплата Средняя.\n",
"Если Опыт Высокий И Тип занятости Полная, Тогда Зарплата Высокая.\n",
"Если Опыт Низкий И Тип занятости Полная, Тогда Зарплата Средняя.\n",
"Если Опыт Средний И Тип занятости Полная, Тогда Зарплата Высокая.\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAk4AAASmCAYAAADYniQgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gU1dfA8e+m91BDAgYChC5ICSAdpIReFUGUIlJFmgX5KU1UEFFRgdCLooIignQCCKGIEKp0AqETOoT0svP+Me+uhCSQTXYzye75PE+e7M5OOXdnk5zMPXOvTlEUBSGEEEII8Ux2WgcghBBCCJFfSOIkhBBCCJFFkjgJIYQQQmSRJE5CCCGEEFkkiZMQQgghRBZJ4iSEEEIIkUWSOAkhhBBCZJEkTkIIIYQQWSSJkxBCCJFHPHjwgIiICFJSUrQORWRCEichhBBCI8nJyUybNo0XXngBZ2dnChYsSLly5di2bZvWoYlMSOJkg/r27YuHh4fWYQgh8rETJ07w+uuvU6JECZydnSlevDi9evXixIkTWoeWbyQmJtKiRQvGjRtH06ZN+e233wgNDWX79u3Uq1dP6/BEJhy0DkDkjrt37/LTTz+xa9cuwsLCiI+Pp3Xr1tSoUYPu3btTo0YNrUMUQuQTq1atomfPnhQqVIj+/ftTunRpLl68yMKFC1m5ciXLly+nS5cuWoeZ533xxRf8888/bN68maZNm2odjsginUzya/2WL1/OgAEDiImJISAggOTkZKKioqhRowZHjx4lOTmZPn36MG/ePJycnLQOVwiRh50/f55q1apRsmRJwsLCKFq0qPG1O3fu0KhRI65cucKxY8coU6aMhpHmbSkpKfj4+DBkyBA+++wzrcMRJpCuOiu3Z88eXn/9dXx9fdmzZw+RkZG0aNECFxcXDhw4wPXr1+nZsydLly5l1KhRACiKQkBAAJ06dUq3v4SEBLy9vRk0aBAAO3bsQKfTsXLlynTrenh40LdvX+PzJUuWoNPpuHjxonHZiRMnKFiwIO3btzcWQxrWCw8PT7O/O3fuoNPpmDhxYprlGS378ssv0el0af6LM8S6Y8eONOu2a9cuw308ybB9Zl8ZtTUsLIxBgwZRuHBhvLy86N27N/fv30+z34CAgDTbAvz222/odDoCAgKMyy5evIhOp2P69OnpYnv++efTtDUpKYnx48dTq1YtvL29cXd3p1GjRvz1118Zts0Q75Nfjx//cZm9B4+fW4Bly5ZRq1YtXF1dKVSoED169ODKlStp4mzSpAllypTh9u3bxuUTJ05Ep9Ol2df06dNxcHBgw4YNxmVNmzbl+eefTxff9OnTM4xn9uzZVKlSxdi19Pbbb/PgwYN02//zzz+0bduWggUL4u7uTrVq1fj2228Btav7aZ+Dx4+b1XP7NIbznpX32/CeZLTukiVL0qwXEhLC888/j5ubW5r1MvpZftyXX35JXFwc8+bNS5M0ARQpUoS5c+cSGxvLtGnTjMsN5zOzr8djy0opweM/r8/a9+M/87t27eKVV16hZMmSODs74+/vz6hRo4iPj3/q8cC0n+k1a9bQrl07ihcvjrOzM2XLlmXy5MmkpqYa1zlz5gz379/H09OTJk2a4Obmhre3N+3bt+f48ePpjn/48GHatGmDl5cXHh4eNG/enH379qWLLyvvs6nvsUhLuuqs3NSpU9Hr9SxfvpxatWqle71IkSL88MMPnDx5krlz5zJhwgR8fHx4/fXXmTZtGvfu3aNQoULG9deuXUt0dDSvv/56jmO7cuUKrVu3pmLFivz66684OJjn4/jgwQOmTJmSpXXDwsLS/CHOiuHDh1O7du00y956660M1x02bBgFChRg4sSJnDlzhpCQEC5dumRMwjKSkpLCRx99ZFJMT4qOjmbBggX07NmTAQMG8OjRIxYuXEhwcDD79++nevXqGW73ySefULp0aQC++uqrdH8QHtelSxe6du0KqH+Q5s2bl+b1zz77jHHjxtG9e3feeustbt++zffff0/jxo05fPgwBQoUwMnJiVWrVvHiiy/SpUsXtm3bhrOzc7pjrVmzhjFjxjBjxgzatm2brfdk4sSJTJo0iRYtWjBkyBDj+Thw4AB79uzB0dERgNDQUNq3b4+fnx8jRozA19eXU6dOsW7dOkaMGMGgQYNo0aKFcb9vvPFGmvcCSJdQGOTk3Pbs2dPY9g0bNvDLL79kum7FihWNx7lz547xnyKDFStWMHToUJo2bco777yDu7s7p06d4vPPP39mHGvXriUgIIBGjRpl+Hrjxo0JCAhg/fr16V4LCQlJ8wc7MjKS8ePHP/OYT9O1a1cCAwONz0eNGkWlSpUYOHCgcVmlSpUANWmNi4tjyJAhFC5cmP379/P9999z9epVfvvttywdLys/00uWLMHDw4PRo0fj4eHB9u3bGT9+PNHR0Xz55ZeAWj4BMHbsWMqVK8ekSZNISEhg1qxZNGjQgAMHDlC+fHlA/QezUaNGeHl58cEHH+Do6MjcuXNp2rQpO3fupG7dujRu3Jgff/zRGKfhKtbjn7f69eub/P6KDCjCqhUqVEgpVapUmmV9+vRR3N3d0ywbN26cAihr165VFEVRzpw5owBKSEhImvU6duyoBAQEKHq9XlEURfnrr78UQPntt9/SHdvd3V3p06eP8fnixYsVQImMjFTu3bunVK5cWalQoYJy586dNNsZ1jtw4ECa5bdv31YAZcKECWmWP7nsgw8+UHx8fJRatWopTZo0MS43xPrXX38Zl9WtW1dp06ZNhvt9UnbaWqtWLSUpKcm4fNq0aQqgrFmzxrisVKlSabadPXu24uzsrDRr1izNuYuMjFQA5csvv0x3/CpVqqRpa0pKipKYmJhmnfv37yvFihVT3nzzzXTbz5s3TwGU8PBw47J27dql++woiqIkJycrgDJp0qR07Y2MjFQURVEuXryo2NvbK5999lmabf/991/FwcEh3fIzZ84oBQsWVF5//XVFURRlwoQJiuHX0+HDhxV3d3fl7bffThdLkyZNlCpVqqRb/uWXX6aJ59atW4qTk5PSqlUrJTU11bjezJkzFUBZtGiRoijq+1a6dGmlVKlSyv3799Ps0/CZf9LTPjtZPbdPc/bsWQVQpk+fnmn7HtegQQOlWbNmxueGz83ixYuNy3r27KkUKFBAiY+PNy572ufb4MGDBwqgdOrU6akxd+zYUQGU6OhoRVH+O5+3b99Os96BAwfSxZbR76cnmfKePy4uLi7dsilTpig6nU65dOnSU49pys90RscZNGiQ4ubmpiQkJCiK8t/7XaRIkTS/A8+ePas4Ojoq3bp1My7r3Lmz4uTkpJw/f9647Pr164qnp6fSuHHjDONt0qRJmt8Jj8vpe2zrpKvOyj169AgfH59nrlesWDFAvVIBUL58eerWrctPP/1kXOfevXts3LiRXr16pbta8ujRI+7cuZPmKzMJCQl07NiR27dvs2nTJgoXLpydpmXo2rVrfP/994wbN+6Zl6JXrVrFgQMHmDp1qtmO/6SBAwcar2QADBkyJF130+Pi4uL45JNPGDZsGCVLlsx0nSff68e7AADs7e2N9Wp6vZ579+6RkpJCUFAQhw4dSrfPhIQEAFxcXJ7ZpqSkJIAMrwwZrFq1Cr1eT/fu3dPE6evrS7ly5dJ1GZYvX57ff/+dn376iU8//dS4/MaNG3To0IF69eoZu8qelJqamu79iIuLS7PO1q1bSUpKYuTIkdjZ/fdrb8CAAXh5eRmvjhw+fJjIyEhGjhxJgQIF0uwjsyuEWZWVc5sRU84NqOfnaecG1J9XNze3LO/z8e0APD09n7qe4XXD75PsMJxLQ/vNwdXV1fg4NjaWO3fuUL9+fRRF4fDhw1naR1Z+ph8/juF3Y6NGjYiLi+P06dNp9tevX780vwPLlStHx44d2bRpE6mpqaSmprJlyxY6d+6cpmbMz8+P1157jd27d2f7fbbEe2wLJHGycsWLF+f8+fPPXC8iIgKAEiVKGJf17t2bPXv2cOnSJUC9zJ2cnMwbb7yRbvs333yTokWLpvmKjY3N8Fj9+vVj9+7
"text/plain": [
"<Figure size 600x1200 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
2025-02-08 13:32:07 +04:00
{
"data": {
2025-02-08 13:44:18 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAAJOCAYAAACqS2TfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3yN1xvAv+/NTm6GUYmREhLE3luNIlaoUjVaQqlWrQpCa1Ojtau0KDGL1iilZs3Qmtfeo6gdstcd7++P676/3NybhUjC+X4+L7nvOe95n/ec847nPM95jiTLsoxAIBAIBAKBQCAQCASCl44quwUQCAQCgUAgEAgEAoHgdUUo3QKBQCAQCAQCgUAgEGQRQukWCAQCgUAgEAgEAoEgixBKt0AgEAgEAoFAIBAIBFmEULoFAoFAIBAIBAKBQCDIIoTSLRAIBAKBQCAQCAQCQRYhlG6BQCAQCAQCgUAgEAiyCKF0CwQCgUAgEAgEAoFAkEUIpVsgEAgEAoFAIBAIBIIsQijdAoFAIBAIBAKBQCAQZBFC6RYIBII3kNDQUCRJMtsKFChAo0aN+PPPP7NbPIFAIBAIBILXBtvsFkAgEAgE2cf48ePx8fFBlmUePHhAaGgoLVu2ZPPmzbRu3Tq7xRMIBAKBQCDI9QilWyAQCN5gWrRoQbVq1ZTfn3zyCZ6envzyyy9C6RYIBAKBQCB4CQj3coFAIBAoeHh44OTkhK3t/8dkb968iSRJhIaGmuX94osvkCSJoKAgZd/69eupUaMGefPmxcnJidKlSzN16lRkWQZgz549SJLEhg0bLM69atUqJEni8OHDAJw+fZqgoCCKFy+Oo6MjXl5e9OzZk/DwcKuyFytWzMJlXpIk9u7da5YnubwAv/76K5IkUaxYMWXfpUuXaNy4MV5eXjg4OODt7c1nn33GkydPlDxJSUmMHj2aqlWr4u7ujouLC/Xr12fPnj1m5Zvqb9q0aRYylytXjoYNG5rta9iwocW+o0ePKteTnJiYGIKDgylevDh2dnZm1/348WOr9ZT8POXKlbPYP23aNCRJ4ubNm2b7//zzT+rXr4+Liwuurq60atWKc+fOmeWxVr979+61aAeAf/75h+bNm+Pu7o6zszMNGjQgLCxMSR87dqzV9rTWttbq7JtvvkGlUrFq1Sqza85I3S5ZsgRJkli8eLFZ3kmTJiFJElu3brWot5T1kJbcyZEkiX79+rFy5UpKlSqFo6MjVatWZf/+/Wb5/v33X/r27UupUqVwcnIiX758fPDBBxbtlHLqiLOzM+XLl2fRokVm+YKCgsz6PMDt27dxcnKy2v6mdky5pSxDIBAIBJYIS7dAIBC8wURGRvL48WNkWebhw4d8//33xMTE8NFHH6V53NWrV1m4cKHF/qioKGrWrEn37t2xs7Nj27ZtDB8+HFtbW4KDg2nYsCHe3t6sXLmSdu3amR27cuVKSpQoQe3atQHYuXMn169fp0ePHnh5eXHu3DkWLFjAuXPn+Pvvvy2UF4D69evz6aefAnDhwgUmTZqU5nXodDq+/vpri/2xsbEUKVKEwMBA3NzcOHv2LD/88AP//fcfmzdvVq510aJFdO7cmd69exMdHc3PP/9MQEAAR44coVKlSmmeOzOEhIRY3T906FB+/PFHPvnkE+rWrYudnR3r16+3OqjxIixfvpzu3bsTEBDA1KlTiYuLY/78+dSrV4+TJ09mWvH666+/aNGiBVWrVmXMmDGoVCqWLFlC48aNOXDgADVq1OD999/H19dXOebLL7/E399faV8Af39/q+UvWbKEkSNHMn36dLp06ZKmLNbqtkePHqxfv57BgwfTtGlTvL29OXPmDOPGjeOTTz6hZcuW6V5jpUqVCA4ONtu3bNkydu7caZF33759rFmzhgEDBuDg4MC8efNo3rw5R44cUQZGjh49yqFDh+jUqRNFihTh5s2bzJ8/n4YNG3L+/HmcnZ3Nypw5cyb58+cnKiqKxYsX07t3b4oVK0aTJk1SlXn06NEkJCSkeV1fffWVUu8LFizg1q1b6daFQCAQvPHIAoFAIHjjWLJkiQxYbA4ODnJoaKhZ3hs3bsiAvGTJEmVfx44d5XLlysne3t5y9+7d0zxXmTJl5NatWyu/R4wYITs4OMgRERHKvocPH8q2trbymDFjlH1xcXEWZf3yyy8yIO/fv98irXDhwnKPHj2U33v27JEBec+ePcq+okWLmsk7b9482cHBQW7UqJFctGjRNK+jb9++slqtVn7rdDo5MTHRLM/Tp09lT09PuWfPnso+U/199913FmWWLVtWbtCggdm+Bg0amO3bunWrDMjNmzeXU762CxYsKAcEBJjtGzNmjAzIjx49SvN6GjRoIJctW9Zi/3fffScD8o0bN2RZluXo6GjZw8ND7t27t1m++/fvy+7u7mb7fXx85G7dupnlS9kOBoNB9vPzkwMCAmSDwaDki4uLk318fOSmTZtalTdl26W8FlOdbdmyRba1tZWDg4PTzCfLadftvXv35Lx588pNmzaVExMT5cqVK8tvv/22HBkZaVWGlLK2atXKYv8XX3xhcR7TvXfs2DFl37///is7OjrK7dq1U/ZZux8OHz4sA/KyZcuUfaZ729R+sizLly9flgH522+/VfZ1797drM+fPXtWVqlUcosWLSyOl2VZ3rlzpwzI+/btS7UMgUAgEFhHuJcLBALBG8wPP/zAzp072blzJytWrKBRo0b06tWL9evXp3rM8ePH+fXXX5k8eTIqlfXXyOPHj7lz5w6hoaFcvXqVd955R0nr1q0biYmJ/Pbbb8q+NWvWoNPpzCzsTk5Oyt8JCQk8fvyYWrVqAXDixAmLcyYlJeHg4JDha4+Li2P8+PH069ePt99+22qeyMhIHjx4wO7du9myZYvZddjY2GBvbw+AwWDgyZMn6HQ6qlWrZlW+50GWZUaMGEH79u2pWbOmRXp0dDT58uV77vL1ej2PHz822+Li4szy7Ny5k4iICDp37myWz8bGhpo1a5q50xcoUIA7d+6keU6NRsOVK1fo0qUL4eHhSnmxsbG8++677N+/H4PB8FzXc+TIETp27Ej79u357rvv0sybXt16eXkp90f9+vXRaDQsXrwYNze355ItLWrXrk3VqlWV32+//TZt27Zl+/bt6PV6wPx+0Gq1hIeH4+vri4eHh9X+9vTpUx4/fsz169eZOXMmNjY2NGjQIFUZRowYQZUqVfjggw+spiclJQFk6h4TCAQCgRHhXi4QCARvMDVq1DALpNa5c2cqV65Mv379aN26taJUJmf48OHUr1+f1q1b069fP4v0hIQE3nrrLcA4X/Wrr75i6NChSnrp0qWpXr06K1eu5JNPPgGMruW1atUycyd+8uQJ48aNY/Xq1Tx8+NDsHJGRkRbnjYyMRK1WZ/jaZ8yYQUJCAl999RWDBw+2micgIIB//vkHgObNm7NmzRqz9KVLlzJ9+nQuXryIVqtV9vv4+GRYjrRYuXIl586dY+3atWZzk03Url2bDRs28Ntvvynu5SmV5rS4ePGi0lapceXKFQAaN25sNT25ElqnTh3mzJnD6tWrady4MSqVyqKtTOV179491XNGRkaSJ0+eDF2Dif/++49WrVoRGxtLeHi41ekHyUmvbgE6derEihUr2LJlC59++invvvtupmTKKH5+fhb7SpYsSVxcHI8ePcLLy4v4+HgmT57MkiVL+O+//5Q4CWD9fqhSpYryt4ODA3PnzqVGjRpWz3/w4EE2b97M7t27U3UXj4iIAMjUPSYQCAQCI0LpFggEAoGCSqWiUaNGzJ49mytXrlC2bFmz9B07drBr1y4l2Jk17O3t2blzJ3FxcRw4cICpU6fi7e1Nnz59lDzdunVj4MCB3Llzh8TERP7++2/mzp1rVk7Hjh05dOgQQ4cOpVKlSqjVagwGA82bN7ewhD558oSkpCS8vLwydJ2PHz/mu+++Y8SIEeTNmzfVfN9//z2PHz/m/PnzTJ48mc8++4wVK1YAsGLFCoKCgnjvvfcYOnQoBQoUwMbGhsmTJ3Pt2rUMyZEWSUlJjBo1ik8++YSSJUt
2025-02-08 13:32:07 +04:00
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2025-02-08 13:44:18 +04:00
"import pandas as pd\n",
"import numpy as np\n",
"import skfuzzy as fuzz\n",
2025-02-08 13:32:07 +04:00
"import matplotlib.pyplot as plt\n",
"\n",
2025-02-08 13:44:18 +04:00
"# Загрузка датасета\n",
"df = pd.read_csv(\"..//static//csv//ds_salaries.csv\")\n",
"\n",
"# Определение диапазонов значений для входных переменных\n",
"x_experience = np.linspace(0, 40, 100) # Опыт работы в годах\n",
"x_employment = np.array([0, 1]) # 0 - Частичная занятость, 1 - Полная занятость\n",
"x_salary = np.linspace(0, df['salary_in_usd'].max(), 100) # Зарплата в USD\n",
"\n",
"# Определение функций принадлежности для опыта работы\n",
"low_experience = fuzz.trimf(x_experience, [0, 0, 5])\n",
"medium_experience = fuzz.trimf(x_experience, [3, 10, 20])\n",
"high_experience = fuzz.trimf(x_experience, [15, 40, 40])\n",
"\n",
"# Определение функций принадлежности для типа занятости\n",
"part_time = fuzz.trimf(x_employment, [0, 0, 1]) # Частичная занятость\n",
"full_time = fuzz.trimf(x_employment, [0, 1, 1]) # Полная занятость\n",
"\n",
"# Определение функций принадлежности для зарплаты\n",
"low_salary = fuzz.trimf(x_salary, [0, 0, 50000])\n",
"medium_salary = fuzz.trimf(x_salary, [30000, 70000, 100000])\n",
"high_salary = fuzz.trimf(x_salary, [70000, 150000, df['salary_in_usd'].max()])\n",
"\n",
"# Определение нечетких правил\n",
"fuzzy_rules = [\n",
" (\"Низкий\", \"Частичная\", \"Низкая\"),\n",
" (\"Средний\", \"Частичная\", \"Средняя\"),\n",
" (\"Высокий\", \"Полная\", \"Высокая\"),\n",
" (\"Низкий\", \"Полная\", \"Средняя\"),\n",
" (\"Средний\", \"Полная\", \"Высокая\")\n",
"]\n",
"\n",
"# Вывод правил\n",
"print(\"Нечеткие правила:\")\n",
"for rule in fuzzy_rules:\n",
" print(f\"Если Опыт {rule[0]} И Тип занятости {rule[1]}, Тогда Зарплата {rule[2]}.\")\n",
"\n",
"# Визуализация функций принадлежности\n",
"fig, axs = plt.subplots(3, 1, figsize=(6, 12))\n",
"\n",
"# Опыт работы\n",
"axs[0].plot(x_experience, low_experience, label='Низкий', color='blue')\n",
"axs[0].plot(x_experience, medium_experience, label='Средний', color='green')\n",
"axs[0].plot(x_experience, high_experience, label='Высокий', color='red')\n",
"axs[0].set_title('Функции принадлежности для Опыта работы')\n",
"axs[0].set_xlabel('Опыт работы (годы)')\n",
"axs[0].set_ylabel('Степень принадлежности')\n",
"axs[0].legend()\n",
"axs[0].grid()\n",
"\n",
"# Тип занятости\n",
"axs[1].plot(x_employment, part_time, label='Частичная занятость', color='orange')\n",
"axs[1].plot(x_employment, full_time, label='Полная занятость', color='purple')\n",
"axs[1].set_title('Функции принадлежности для Типа занятости')\n",
"axs[1].set_xlabel('Тип занятости')\n",
"axs[1].set_ylabel('Степень принадлежности')\n",
"axs[1].legend()\n",
"axs[1].grid()\n",
"\n",
"# Зарплата\n",
"axs[2].plot(x_salary, low_salary, label='Низкая', color='blue')\n",
"axs[2].plot(x_salary, medium_salary, label='Средняя', color='green')\n",
"axs[2].plot(x_salary, high_salary, label='Высокая', color='red')\n",
"axs[2].set_title('Функции принадлежности для Зарплаты')\n",
"axs[2].set_xlabel('Зарплата в USD')\n",
"axs[2].set_ylabel('Степень принадлежности')\n",
"axs[2].legend()\n",
"axs[2].grid()\n",
"\n",
"plt.tight_layout()\n",
"plt.show()\n",
"\n",
"# Визуализация нечетких правил\n",
"fig, ax = plt.subplots(figsize=(10, 6))\n",
"\n",
"# Заполнение областей для каждого правила с новыми цветами\n",
"ax.fill_between(x_experience, low_experience, 0, color='lightblue', alpha=0.5, \n",
" label='Правило 1: Низкий опыт, Частичная занятость => Низкая Зарплата')\n",
"ax.fill_between(x_experience, medium_experience, medium_salary, color='lightgreen', alpha=0.5, \n",
" label='Правило 2: Средний опыт, Частичная занятость => Средняя Зарплата')\n",
"ax.fill_between(x_experience, high_experience, high_salary, color='lightpink', alpha=0.5, \n",
" label='Правило 3: Высокий опыт, Полная занятость => Высокая Зарплата')\n",
"ax.fill_between(x_experience, low_experience, medium_salary, color='lightcoral', alpha=0.5, \n",
" label='Правило 4: Низкий опыт, Полная занятость => Средняя Зарплата')\n",
"ax.fill_between(x_experience, medium_experience, high_salary, color='lightyellow', alpha=0.5, \n",
" label='Правило 5: Средний опыт, Полная занятость => Высокая Зарплата')\n",
"\n",
"ax.set_title('Визуализация нечетких правил')\n",
"ax.set_xlabel('Опыт работы (годы)')\n",
"ax.set_ylabel('Зарплата')\n",
"ax.legend()\n",
"ax.grid()\n",
"\n",
"plt.tight_layout()\n",
2025-02-08 13:32:07 +04:00
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2025-02-08 13:44:18 +04:00
"Оценка качества полученной нечеткой системы"
2025-02-08 13:32:07 +04:00
]
},
{
"cell_type": "code",
2025-02-15 09:27:14 +04:00
"execution_count": 6,
2025-02-08 13:32:07 +04:00
"metadata": {},
"outputs": [
{
2025-02-08 13:44:18 +04:00
"name": "stdout",
"output_type": "stream",
"text": [
"Опыт работы | Тип занятости | Эталонная зарплата | Оцененная зарплата\n",
" 2 | Частичная | 40000 | 16666.67\n",
" 5 | Частичная | 60000 | 66585.98\n",
" 10 | Полная | 80000 | 0.00\n",
" 15 | Полная | 120000 | 223308.38\n",
"\n",
"Средняя абсолютная ошибка (MAE): 53306.92\n",
"Среднеквадратичная ошибка (RMSE): 66446.29\n"
]
2025-02-08 13:32:07 +04:00
}
],
"source": [
2025-02-08 13:44:18 +04:00
"import pandas as pd\n",
"import numpy as np\n",
"import skfuzzy as fuzz\n",
2025-02-08 13:32:07 +04:00
"import matplotlib.pyplot as plt\n",
"\n",
2025-02-08 13:44:18 +04:00
"# Загрузка датасета\n",
"df = pd.read_csv(\"..//static//csv//ds_salaries.csv\")\n",
2025-02-08 13:32:07 +04:00
"\n",
2025-02-08 13:44:18 +04:00
"# Определение диапазонов значений для входных переменных\n",
"x_experience = np.linspace(0, 40, 100) # Опыт работы в годах\n",
"x_employment = np.array([0, 1]) # 0 - Частичная занятость, 1 - Полная занятость\n",
"x_salary = np.linspace(0, df['salary_in_usd'].max(), 100) # Зарплата в USD\n",
2025-02-08 13:32:07 +04:00
"\n",
2025-02-08 13:44:18 +04:00
"# Определение функций принадлежности для опыта работы\n",
"low_experience = fuzz.trimf(x_experience, [0, 0, 5])\n",
"medium_experience = fuzz.trimf(x_experience, [3, 10, 20])\n",
"high_experience = fuzz.trimf(x_experience, [15, 40, 40])\n",
2025-02-08 13:32:07 +04:00
"\n",
2025-02-08 13:44:18 +04:00
"# Определение функций принадлежности для типа занятости\n",
"part_time = fuzz.trimf(x_employment, [0, 0, 1]) # Частичная занятость\n",
"full_time = fuzz.trimf(x_employment, [0, 1, 1]) # Полная занятость\n",
"\n",
"# Определение функций принадлежности для зарплаты\n",
"low_salary = fuzz.trimf(x_salary, [0, 0, 50000])\n",
"medium_salary = fuzz.trimf(x_salary, [30000, 70000, 100000])\n",
"high_salary = fuzz.trimf(x_salary, [70000, 150000, df['salary_in_usd'].max()])\n",
"\n",
"# Создаем тестовые данные (опыт работы, тип занятости, эталонная зарплата)\n",
"test_data = [\n",
" (2, 0, 40000), # Низкий опыт, частичная занятость => Низкая зарплата\n",
" (5, 0, 60000), # Низкий опыт, частичная занятость => Средняя зарплата\n",
" (10, 1, 80000), # Средний опыт, полная занятость => Средняя зарплата\n",
" (15, 1, 120000), # Высокий опыт, полная занятость => Высокая зарплата\n",
"]\n",
"\n",
"# Функция для вычисления нечеткой оценки\n",
"def fuzzy_inference(experience, employment):\n",
" # Определение степени принадлежности\n",
" exp_low = fuzz.interp_membership(x_experience, low_experience, experience)\n",
" exp_medium = fuzz.interp_membership(x_experience, medium_experience, experience)\n",
" exp_high = fuzz.interp_membership(x_experience, high_experience, experience)\n",
"\n",
" emp_part_time = fuzz.interp_membership(x_employment, part_time, employment)\n",
" emp_full_time = fuzz.interp_membership(x_employment, full_time, employment)\n",
"\n",
" # Применяем правила\n",
" salary_low = np.fmin(exp_low, emp_part_time)\n",
" salary_medium = np.fmin(exp_medium, emp_part_time)\n",
" salary_high = np.fmin(exp_high, emp_full_time)\n",
"\n",
" # Вычисляем выходные значения при наличии ненулевых областей\n",
" salary0 = low_salary * salary_low\n",
" salary1 = medium_salary * salary_medium\n",
" salary2 = high_salary * salary_high\n",
"\n",
" # Проверка на ненулевые области перед дефуззацией\n",
" if salary_low > 0:\n",
" salary0 = fuzz.defuzz(x_salary, salary0, 'centroid')\n",
" else:\n",
" salary0 = 0 # Значение по умолчанию\n",
"\n",
" if salary_medium > 0:\n",
" salary1 = fuzz.defuzz(x_salary, salary1, 'centroid')\n",
" else:\n",
" salary1 = 0 # Значение по умолчанию\n",
"\n",
" if salary_high > 0:\n",
" salary2 = fuzz.defuzz(x_salary, salary2, 'centroid')\n",
" else:\n",
" salary2 = 0 # Значение по умолчанию\n",
"\n",
" return max(salary0, salary1, salary2)\n",
"\n",
"# Список для хранения результатов\n",
"results = []\n",
"\n",
"# Оценка системы на тестовом наборе данных\n",
"for experience, employment, actual_salary in test_data:\n",
" inferred_salary = fuzzy_inference(experience, employment)\n",
" results.append((experience, employment, actual_salary, inferred_salary))\n",
"\n",
"# Вывод результатов\n",
"print(\"Опыт работы | Тип занятости | Эталонная зарплата | Оцененная зарплата\")\n",
"for experience, employment, actual_salary, inferred_salary in results:\n",
" employment_type = \"Частичная\" if employment == 0 else \"Полная\"\n",
" print(f\"{experience:12} | {employment_type:13} | {actual_salary:20} | {inferred_salary:.2f}\")\n",
"\n",
"# Вычисление метрик качества\n",
"actual_salaries = [actual for _, _, actual, _ in results]\n",
"inferred_salaries = [inferred for _, _, _, inferred in results]\n",
"\n",
"mae = np.mean(np.abs(np.array(actual_salaries) - np.array(inferred_salaries)))\n",
"rmse = np.sqrt(np.mean((np.array(actual_salaries) - np.array(inferred_salaries)) ** 2))\n",
"\n",
"print(f\"\\nСредняя абсолютная ошибка (MAE): {mae:.2f}\")\n",
"print(f\"Среднеквадратичная ошибка (RMSE): {rmse:.2f}\")"
2025-02-08 13:32:07 +04:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2025-02-08 13:44:18 +04:00
"\n",
"Полученные результаты указывают на необходимость значительных улучшений в проектировании нечеткой системы. Высокие значения средней абсолютной ошибки (MAE) и среднеквадратичной ошибки (RMSE) свидетельствуют о том, что система неэффективно обрабатывает входные данные. Особенно заметна проблема нулевой оценки для случая с 10 годами опыта, что указывает на серьезные недостатки в определении правил и функций принадлежности. Это требует дальнейшей работы над уточнением правил и пересмотром функций принадлежности для достижения более точных оценок зарплат. "
2025-02-08 13:32:07 +04:00
]
}
],
"metadata": {
"kernelspec": {
"display_name": "aimenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}