diff --git a/lab_4/lab4.ipynb b/lab_4/lab4.ipynb index 9b7c97e..cc304b7 100644 --- a/lab_4/lab4.ipynb +++ b/lab_4/lab4.ipynb @@ -4,12 +4,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Начало лабораторной работы" + "# Вариант задания: Прогнозирование выпучки в магазинах\n", + "### Бизнес-цели:\n", + "Цель: Разработать модель машинного обучения, которая позволит прогнозировать распродажи магазина в зависимоси от его ассортимента.\n", + "\n", + "### Цели технического проекта:\n", + "\n", + "Сбор и подготовка данных:\n", + "Очистка данных от пропусков, выбросов и дубликатов.\n", + "Преобразование категориальных переменных в числовые.\n", + "Разделение данных на обучающую и тестовую выборки.\n" ] }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 62, "metadata": {}, "outputs": [ { @@ -23,8 +32,11 @@ } ], "source": [ - "import pandas as pd\n", - "df = pd.read_csv(\".//static//csv//Stores.csv\")\n", + "import pandas as pn\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib\n", + "import matplotlib.ticker as ticker\n", + "df = pn.read_csv(\".//static//csv//Stores.csv\")\n", "print(df.columns)" ] }, @@ -32,241 +44,539 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Бизнес-цели" + "Разделим на 3 выборки\n" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Размер обучающей выборки: 572\n", + "Размер контрольной выборки: 144\n", + "Размер тестовой выборки: 180\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Разделение данных на обучающую и тестовую выборки (80% - обучение, 20% - тест)\n", + "train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)\n", + "\n", + "# Разделение обучающей выборки на обучающую и контрольную (80% - обучение, 20% - контроль)\n", + "train_data, val_data = train_test_split(train_data, test_size=0.2, random_state=42)\n", + "\n", + "print(\"Размер обучающей выборки:\", len(train_data))\n", + "print(\"Размер контрольной выборки:\", len(val_data))\n", + "print(\"Размер тестовой выборки:\", len(test_data))" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjIAAAHHCAYAAACle7JuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB5Z0lEQVR4nO3dd3gU1f4G8HdbdlM3vZFCSCEh9NACIgihCVhAsMAV1IvoBRW44r2oiB27iKJcvQh4BVFUUBRBCEVK6DUQQktISEjZ9LpJds/vj5D9sSRACBtmN3k/z7PPw86cnXmzs5t8OXPOjEwIIUBERERkg+RSByAiIiJqKhYyREREZLNYyBAREZHNYiFDRERENouFDBEREdksFjJERERks1jIEBERkc1iIUNEREQ2i4UMEVETFBYW4uzZs6ipqZE6ClmQEAL5+fk4c+aM1FGokVjIEBE1QnV1Nd577z106dIFarUabm5uCA8PR3x8vNTRbEJiYiLWrl1ren7kyBH8/vvv0gW6QklJCV5++WW0b98ednZ28PDwQEREBJKTk6WORo2glDoA3bply5bhscceMz1Xq9UICgrC0KFDMXfuXPj4+EiYjsj26fV6DB06FHv27MFTTz2FN954Aw4ODlAoFIiJiZE6nk0oKSnB1KlT4evrCw8PDzz33HMYMWIERo4cKWmuvLw8DBgwAGlpaXjmmWfQr18/2NnZQaVSoW3btpJmo8ZhIdOCvP766wgJCUFlZSV27tyJL774AuvXr0diYiIcHBykjkdks959913s3bsXGzduxMCBA6WOY5NiY2NNDwCIiIjAlClTJE4FzJ49G5cuXUJCQgKio6OljkNNwEKmBRkxYgR69OgBAPj73/8ODw8PfPTRR/jll1/w8MMPS5yOyDbV1NRgwYIF+Oc//8ki5hatXbsWJ0+eREVFBTp16gQ7OztJ8+Tk5GD58uVYvHgxixgbxjEyLdigQYMAACkpKQCA/Px8PP/88+jUqROcnJzg4uKCESNG4OjRo/VeW1lZiVdffRURERHQaDTw8/PDmDFjcO7cOQBAamoqZDLZNR9X/sLftm0bZDIZvv/+e7z44ovw9fWFo6Mj7rnnHqSnp9fb9969ezF8+HBotVo4ODhgwIAB2LVrV4M/48CBAxvc/6uvvlqv7bfffouYmBjY29vD3d0dDz30UIP7v97PdiWj0YgFCxYgOjoaGo0GPj4+mDp1KgoKCszatW3bFqNGjaq3n+nTp9fbZkPZ33///XrvKVB7umPevHkICwuDWq1GYGAgXnjhBej1+gbfqysNHDgQHTt2rLf8gw8+gEwmQ2pqqtnywsJCzJgxA4GBgVCr1QgLC8O7774Lo9FoalP3vn3wwQf1ttuxY8cGPxM//vjjNTNOnjy5UV37bdu2NR0fuVwOX19fPPjgg0hLS7vhawHg888/R3R0NNRqNfz9/TFt2jQUFhaa1icnJ6OgoADOzs4YMGAAHBwcoNVqMWrUKCQmJprabd26FTKZDGvWrKm3j5UrV0ImkyEhIcGUefLkyWZt6t6Tbdu2mZbt2LED48aNQ1BQkOkYz5w5ExUVFWavffXVV+t9llasWIGuXbtCo9HAw8MDDz/8cL33ZPLkyXBycjJb9uOPP9bLAQBOTk71MgON+14NHDjQdPw7dOiAmJgYHD16tMHvVUOu/p57enpi5MiRZu8/UPv9mT59+jW3s2zZMrPP9/79+2E0GlFVVYUePXpc970CgC1btqB///5wdHSEq6sr7r33XiQlJZm1qTsWp06dwvjx4+Hi4mI6lVZZWVkv75Xf95qaGtx9991wd3fHyZMnzdo29vdXa8QemRasrujw8PAAAJw/fx5r167FuHHjEBISguzsbPznP//BgAEDcPLkSfj7+wMADAYDRo0ahfj4eDz00EN47rnnUFJSgk2bNiExMRGhoaGmfTz88MO4++67zfY7Z86cBvO89dZbkMlk+Ne//oWcnBwsWLAAcXFxOHLkCOzt7QHU/qIYMWIEYmJiMG/ePMjlcixduhSDBg3Cjh070KtXr3rbDQgIwPz58wEApaWlePrppxvc99y5czF+/Hj8/e9/R25uLj799FPceeedOHz4MFxdXeu95sknn0T//v0BAD///HO9P1BTp041jU969tlnkZKSgs8++wyHDx/Grl27oFKpGnwfbkZhYaHpZ7uS0WjEPffcg507d+LJJ59EVFQUjh8/jo8//hinT582G1R5q8rLyzFgwABkZGRg6tSpCAoKwu7duzFnzhxcunQJCxYssNi+mqp///548sknYTQakZiYiAULFiAzMxM7duy47uteffVVvPbaa4iLi8PTTz+N5ORkfPHFF9i/f7/pGObl5QGo/VyHh4fjtddeQ2VlJRYtWoR+/fph//79iIiIwMCBAxEYGIgVK1bg/vvvN9vPihUrEBoaajqt0lirV69GeXk5nn76aXh4eGDfvn349NNPcfHiRaxevfqar1u5ciUmTpyILl26YP78+cjLy8PChQuxc+dOHD58GJ6enjeV41qa8r2q869//eum9hUZGYmXXnoJQgicO3cOH330Ee6+++5GF6wNqTu206dPR0xMDN555x3k5uY2+F5t3rwZI0aMQLt27fDqq6+ioqICn376Kfr164dDhw7VK7rHjx+Ptm3bYv78+dizZw8WLlyIgoICfPPNN9fM8/e//x3btm3Dpk2b0KFDB9PyW3mfWwVBNm/p0qUCgNi8ebPIzc0V6enpYtWqVcLDw0PY29uLixcvCiGEqKysFAaDwey1KSkpQq1Wi9dff9207OuvvxYAxEcffVRvX0aj0fQ6AOL999+v1yY6OloMGDDA9Hzr1q0CgGjTpo0oLi42Lf/hhx8EAPHJJ5+Yth0eHi6GDRtm2o8QQpSXl4uQkBAxZMiQevvq27ev6Nixo+l5bm6uACDmzZtnWpaamioUCoV46623zF57/PhxoVQq6y0/c+aMACCWL19uWjZv3jxx5ddlx44dAoBYsWKF2Ws3bNhQb3lwcLAYOXJkvezTpk0TV38Fr87+wgsvCG9vbxETE2P2nv7vf/8Tcrlc7Nixw+z1ixcvFgDErl276u3vSgMGDBDR0dH1lr///vsCgEhJSTEte+ONN4Sjo6M4ffq0Wdt///vfQqFQiLS0NCFE0z4Tq1evvmbGSZMmieDg4Ov+HELUvr+TJk0yW/bII48IBweH674uJydH2NnZiaFDh5p9Lz777DMBQHz99ddmWT09PYVOpzO1O336tFCpVGLs2LGmZXPmzBFqtVoUFhaa7UepVJod15CQEPHoo4+a5anbz9atW03LysvL6+WeP3++kMlk4sKFC6ZlV34+a2pqhI+PjwgNDRWlpaWmNtu2bRMAxD//+U/TskmTJglHR0ez7a9evbpeDiGEcHR0NHufb+Z7NWDAALPjv379egFADB8+vN53oCFXv14IIV588UUBQOTk5JiWARDTpk275nbqflfWfb7rnnfo0MHsva47Fle+V127dhXe3t4iLy/PtOzo0aNCLpebHcu6Y3HPPfeY7fsf//iHACCOHj1qlrfuczFnzhyhUCjE2rVrzV53s7+/WiOeWmpB4uLi4OXlhcDAQDz00ENwcnLCmjVr0KZNGwC1s5nk8tpDbjAYkJeXBycnJ7Rv3x6HDh0ybeenn36Cp6cnnnnmmXr7aEw38LU8+uijcHZ2Nj1/4IEH4Ofnh/Xr1wOonY555swZPPLII8jLy4NOp4NOp0NZWRkGDx6Mv/76y+xUBlB7Ckyj0Vx3vz///DOMRiPGjx9v2qZOp4Ovry/Cw8OxdetWs/ZVVVUAat+va1m9ejW0Wi2GDBlits2YmBg4OTnV22Z1dbVZO51OV6+b+WoZGRn49NNPMXfu3Hrd/6tXr0ZUVBQiIyPNtll3OvHq/d+K1atXo3///nBzczPbV1xcHAwGA/766y+z9uXl5fV+VoPB0OC2S0pKoNPpzE7lNIVer4dOp0NOTg42bdqELVu2YPDgwdd9zebNm1FVVYUZM2aYvhcAMGXKFLi4uNSbGvzYY4+ZejcBIDw8HPfccw82bNhg+vkeffRR6PV6s1Nm33//PWpqajBx4kTTMm9vb1y8ePGGP1ddTyUAlJWVQafToW/fvhBC4PDhw/Xa63Q6bNu2DdnZ2Zg6dSocHR1N6wYMGICYmBiLTXm+2e9VHSEE5syZg7Fjx6J3796N3l/ddyg3NxcJCQlYs2YNOnfuXK93qbKyEjqdDnl5efV+X1zLtGnTzN7rgQMHmr1Xly5dwpEjRzB58mS4u7ub2nXu3BlDhgwx/Q67eptXqvt92lDbzz77DPPnz8fChQtx7733mq1r6vvcmvDUUguyaNEiREREQKlUwsfHB+3btzf7BW00GvHJJ5/g888/R0pKitkflyt/QZ87dw7t27eHUmnZj0d4eLjZc5lMhrCwMNP56roLUE2aNOma2ygqKoKbm5vpuU6nq7fdq505cwZCiGu2u/oUUN0f1auLh6u3WVRUBG9v7wbX5+TkmD3/888/4eXldd2cV5s3bx78/f0xderUemNJzpw5g6SkpGtu8+r934ozZ87g2LFjjd7XvHnzMG/evHrtGroMwOOPP276t5OTE0aPHo2PP/74pi8ZsGrVKqxatcr0vGfPnvjvf/973ddcuHABANC+fXuz5XZ2dmjXrp1pfV3xHhkZWW8bUVFR+Omnn6DT6eDj44PIyEj07NkTK1aswBNPPAGg9rRSnz59EBYWZnpd3759sXDhQqxatQqDBg2CXC5HUVFRve2npaXhlVdewa+//lpv7FVD7a88Rlf/XHV5rzcu6Wbc7PeqzooVK3DixAn88MMPWLlyZaP3t3v3brOfLzw8HGvXrq33n6slS5ZgyZIlAGqPZe/evfHRRx+ZJkJc6UbHtu69utZnpa7dxo0bUVZWZlY4Xv2+hIaGQi6X1xt/9scff+DAgQMAascxXq2p73NrwkKmBenVq1eDX9Y6b7/9NubOnYvHH38cb7zxBtzd3SGXyzFjxoxG/8+lOdVleP/999G1a9cG21xZXFRVVeHSpUsYMmTIDbcrk8nwxx9/QKFQXHebAJCVlQUA8PX1ve42vb29sWLFigbXX/1Hv3fv3njzzTfNln322Wf45ZdfGnx9UlISli1bhm+//bbBX1RGoxGdOnXCRx991ODrAwMDr5n9ZhmNRgwZMgQvvPBCg+sjIiLMnj/55JMYN26c2bJrTbN95ZVX0L9/f1RXV+PgwYN4/fXXUVhY2OD/Wq9n6NChmD17NgDg4sWLePfdd3HXXXfhwIEDZv/Tboqbff2jjz6K5557DhcvXoRer8eePXvw2WefmbV58cUXsWvXruvOJjQYDBgyZAjy8/Pxr3/9C5GRkXB0dERGRgYmT57c4Hd206ZNSEhIwCuvvHJTmZviZr9XQO13du7cuXjiiSfqfW5upHPnzvjwww8BwDSOZeDAgTh06JDZd/Xee+/F9OnTIYRASkoKXn/9dYwaNarBK/Xe6mfjZlyrN3vfvn2YMmUKHB0d8eabb2LcuHFmBVNT3ufWhoVMK/Ljjz/irrvuMv1vpU5hYaFZ92xoaCj27t2L6upqi1b7V/8iEULg7Nmz6Ny5s2m/AODi4oK4uLgbbu/o0aOorq6+bvFWt10hBEJCQhr1y/PkyZOQyWQN/u/rym1u3rwZ/fr1a9QvQ09Pz3o/0/UG5M6ZMwddu3bFgw8+eM39Hz16FIMHD76l032NERoaitLS0kYdE6D2f6JXt73yf6pX6tSpk6ntiBEjkJaWhuXLl9/0Zf/9/PzM9tm+fXv07dsXa9euvWaxEBwcDKB2VlK7du1My6uqqpCSkmLaXkhIiKnd1U6dOgVHR0ez789DDz2EWbNm4bvvvkNFRQVUKlW94+jp6YmEhAScPHnSVDgfPXoUzz//vKnN8ePHcfr0aSxfvhyPPvqoafmmTZuu+T7ExcVBq9XilVdeuWZeS13k7Wa/V0DtDLGcnJwGZxXeiJubm9kxHjhwIPz9/bF06VKzCQYBAQFm7ZycnDBhwoQGT8VdeWzrTsvWufK9uvKzcrVTp07B09Oz3mf8zJkzpu0DwNmzZ2E0Guu9/0OGDMEXX3yByspKrF27Fk8++aRpBhvQtPe5teEYmVZEoVBACGG2bPXq1cjIyDBbNnbsWOh0unr/iwRQ7/U345tvvkFJSYnp+Y8//ohLly5hxIgRAICYmBiEhobigw8+QGlpab3X5+bm1suuUCganNp8pTFjxkChUOC1116rl18IYZq5ANROf/zpp5/Qq1ev6/5PZ/z48TAYDHjjjTfqraupqbmlMR8JCQn45Zdf8M4771yzSBk/fjwyMjLw1Vdf1VtXUVGBsrKyJu+/oX0lJCRg48aN9dYVFhZa9F5DRqMRcrn8louzuunJ15uKHhcXBzs7OyxcuNDsc7FkyRIUFRWZrjjr5eWFHj16YPny5Wand86dO4dff/0VI0aMMPufsqenJ0aMGIFvv/0WK1aswPDhwxucJSSXy9GxY0fExcUhLi6u3hWC67Z5ZTYhBD755JPr/uxdu3aFj48PvvrqK5SXl5uW79ixAwcOHLjh96WxbuZ7BdSOh3rrrbcwc+bM6/Z2NlZjjjHw/z29DfVmdOvWDb6+vli8eLHZdq5+r/z8/NC1a1csX77c7LudmJiIP//8s97MTaD2VP+VPv30UwAw/b6r07dvXygUCjg6OmLx4sX466+/zL7XN/s+t0bskWlFRo0ahddffx2PPfYY+vbti+PHj2PFihVm/xsFarvGv/nmG8yaNQv79u1D//79UVZWhs2bN+Mf//hHvcFojeXu7o477rgDjz32GLKzs7FgwQKEhYWZTjvI5XL897//xYgRIxAdHY3HHnsMbdq0QUZGBrZu3QoXFxesW7cOZWVlWLRoERYuXIiIiAiz613UFUDHjh1DQkICYmNjERoaijfffBNz5sxBamoq7rvvPjg7OyMlJQVr1qzBk08+ieeffx6bN2/G3LlzcezYMaxbt+66P8uAAQMwdepUzJ8/H0eOHMHQoUOhUqlw5swZrF69Gp988gkeeOCBJr1Pf/75J4YMGXLdHpC//e1v+OGHH/DUU09h69at6NevHwwGA06dOoUffvgBGzduvGFPVWlpKTZs2GC2rO5/nNu3b4dKpUKbNm0we/Zs/Prrrxg1ahQmT56MmJgYlJWV4fjx4/jxxx+Rmpra5Om8R44cgZOTE2pqanDw4EF88803uPfeexv8o3M958+fx7fffgugdpD0Z599BhcXl+sO+PXy8sKcOXPw2muvYfjw4bjnnnuQnJyMzz//HD179jQbnPvee+9h6NChiI2Nxd///nfT9GuNRoO33nqr3rYfffRR0/FvqNhtjMjISISGhuL5559HRkYGXFxc8NNPP9UbK3M1lUqFd999F5MnT0a/fv0wadIk5Ofn45NPPkGbNm3qTXs2GAxmn4MjR44AqD3lceWAdIPBgIyMDOzbtw+9evVq9PeqzqFDh+Dp6XnNU5Q3kp2dbTrGOp0O//nPf6BUKusVZmlpadiwYYPp1NJbb72F4OBgdOvWrV6vsFKpxHvvvYdHH30U/fv3x4QJE0ynrQICAszeq/fffx8jRoxAbGwsnnjiCdP0a61W22APU0pKCu655x4MHz4cCQkJ+Pbbb/HII4+gS5cu1/wZhw0bhokTJ+KFF17A6NGj4efnd9Pvc6t0O6dIUfOom0K4f//+67arrKwU//znP4Wfn5+wt7cX/fr1EwkJCQ1ObSwvLxcvvfSSCAkJESqVSvj6+ooHHnhAnDt3TgjRtKm23333nZgzZ47w9vYW9vb2YuTIkWZTSOscPnxYjBkzRnh4eAi1Wi2Cg4PF+PHjRXx8vNm+b/S4ekruTz/9JO644w7h6OgoHB0dRWRkpJg2bZpITk4WQgjxzDPPiDvvvFNs2LChXqarp1/X+fLLL0VMTIywt7cXzs7OolOnTuKFF14QmZmZpjY3O/1aJpOJgwcPmi1v6BhVVVWJd999V0RHRwu1Wi3c3NxETEyMeO2110RRUVG9/V29vRu9f0uXLjW1LykpEXPmzBFhYWHCzs5OeHp6ir59+4oPPvhAVFVVCSGa9pmoeyiVShEcHCyeffZZUVBQIIS4uenXV27L09NTDB06VCQkJNzwtULUTreOjIwUKpVK+Pj4iKefftqU4Urx8fGiX79+wt7eXri4uIiRI0eK48ePN7hNvV4v3NzchFarFRUVFY3K0dD065MnT4q4uDjh5OQkPD09xZQpU8TRo0frHZ+GPp+rVq0SXbt2NX02HnzwQZGammrWZtKkSY36Ll35uPpzeKPvlRD//3n7+OOPzV57re/V1a7+vLq6uop+/fqJ9evXm7W7so1MJhO+vr5izJgxIikpSQhRf/p1nR9++EF069ZNqNVq4e7uLh5++OEGfzdt3rzZ7DMwevRocfLkyQZ/ppMnT4oHHnhAODs7Czc3NzF9+vR6nwVcdbkFIYTQ6XTCy8tL3H///WbLG/M+t1YyIW7hXAFRI2zbtg133XUXVq9e3eReiiulpqYiJCQEKSkp1zzf/+qrryI1NRXLli275f21Rm3btsWrr77a4JVc6cZqamrg7++P0aNH1xuTZsuWLVuGZcuW1bvqL/2/uoss5ubmWuzCg3R9HCNDRGRha9euRW5urtkgXSJqHhwjQzanbhbC9Qbjdu7c2XTLBbp5AwYMMF1IkRpv7969OHbsGN544w1069YNAwYMkDqSRbVp06bB24QQSYmFDNkcT09P06C/axkzZsxtStMyLV++XOoINumLL77At99+i65du7bI05pDhgy54XWbiG43jpEhIiIim8UxMkRERGSzWMgQERGRzWrxY2SMRiMyMzPh7Ozc7JdyJyIiIssQQqCkpAT+/v5mN0C+WosvZDIzMy16Az0iIiK6fdLT0xEQEHDN9S2+kHF2dgZQ+0a4uLhInIaIiIgao7i4GIGBgaa/49fS4guZutNJLi4uLGSIiIhszI2GhXCwLxEREdksFjJERERks1jIEBERkc1iIUNEREQ2i4UMERER2SwWMkRERGSzWMgQERGRzWIhQ0RERDaLhQwRERHZLBYyREREZLNYyBAREZHNYiFDRERENouFDBEREdksFjJERERks5RSByCi+tLS0qDT6W77fj09PREUFHTb90tE1FQsZIisTFpaGiKjolBRXn7b923v4IBTSUksZojIZrCQIbIyOp0OFeXlmPCv9+ETFHrb9puddg4r3p0NnU7HQoaIbAYLGSIr5RMUioDwaKljEBFZNQ72JSIiIpvFQoaIiIhslqSFTNu2bSGTyeo9pk2bBgCorKzEtGnT4OHhAScnJ4wdOxbZ2dlSRiYiIiIrImkhs3//fly6dMn02LRpEwBg3LhxAICZM2di3bp1WL16NbZv347MzEyMGTNGyshERERkRSQd7Ovl5WX2/J133kFoaCgGDBiAoqIiLFmyBCtXrsSgQYMAAEuXLkVUVBT27NmDPn36SBGZiIiIrIjVjJGpqqrCt99+i8cffxwymQwHDx5EdXU14uLiTG0iIyMRFBSEhISEa25Hr9ejuLjY7EFEREQtk9UUMmvXrkVhYSEmT54MAMjKyoKdnR1cXV3N2vn4+CArK+ua25k/fz60Wq3pERgY2IypiYiISEpWU8gsWbIEI0aMgL+//y1tZ86cOSgqKjI90tPTLZSQiIiIrI1VXBDvwoUL2Lx5M37++WfTMl9fX1RVVaGwsNCsVyY7Oxu+vr7X3JZarYZarW7OuERERGQlrKJHZunSpfD29sbIkSNNy2JiYqBSqRAfH29alpycjLS0NMTGxkoRk4iIiKyM5D0yRqMRS5cuxaRJk6BU/n8crVaLJ554ArNmzYK7uztcXFzwzDPPIDY2ljOWiIiICIAVFDKbN29GWloaHn/88XrrPv74Y8jlcowdOxZ6vR7Dhg3D559/LkFKIiIiskaSFzJDhw6FEKLBdRqNBosWLcKiRYtucyoiIiKyBVYxRoaIiIioKSTvkSEi6ZVUViOtTA73IU/j9e15KNm6DeVVBshlMmhUcgS6OyDY3QHdg90Q284D3i4aqSMTEQFgIUPUaulrDDidVYqTl4qRVVwJQAnn7iNxJLsKQJVZ23O5ZQCA5QkXAAChbioMDLbHHUEaaDWKW87i6emJoKCgW94OEbU+LGSIWhl9jQFH0gtxOK0Q+hojAEAGwFlehfQ9v6E69wIMxbkw6ssAmRxytQOUWh+oPIOgDuwIO592OFdQjXMF1fhqfy7Kjm9C0d6fYSjOaXImewcHnEpKYjFDRDeNhQxRKyGEQFJWCXae0aGi2gAAcHNQoaO/Fu19nXFq53oc3/o1Rk59Ce07x1xzO5WGGlwsl+NCqRyFUMO5+yi4dB+Jdk5GdNAaYHeTHTTZaeew4t3Z0Ol0LGSI6KaxkCFqBYorqrEpKRsXCyoA1BYwvUM8EO7jBLlMZtbWwz8YAeHR191eGGoLo4sFFThwoQBp+eU4V6rAxUoV7gjzRLS/C2RXbZeIqDmwkCFq4c7llmLTyWzoa4xQymXo3c4d3QLdoJDfWqEhk8kQ6O6AQHcHpOWX46/Tucgrq0L8qRyczi7B4CgfaO1VFvopiIgaxunXRC2UEAI7z+rw27FL0NcY4eOixsQ+wegR7H7LRczVgtwd8EivIPQP94RSLkN6QQVW7k1DclaJRfdDRHQ19sgQtUDVBiM2nsgyzTbqHuSKvqGeFi9griSXy9A9yA0hno7YdDIbl4oqseFEFtLyy3FXey8oFfx/ExFZHn+zELUwldUG/HwoA+dyy6CQyTAs2gf9w72atYi5kpuDHR7oHoBebd0BACcvFePHQxdRqq+5LfsnotaFhQxRC1JeVYOfD2Ugq7gSaqUc93drg0hfl9ueQy6XITbUA/d3awONUo7sYj1W7UtDVlHlbc9CRC0bCxmiFqK8qgY/HcpAbqkeDnYKPBATgDZu9pJmCnJ3wIM9A+HuaIeyKgN+PHQRpy4VS5qJiFoWFjJELUBltQFrDmcgv6wKTmolHugeAE8ntdSxAACuDnYY3yMAIZ6OMBgFNp7Mxu5zumveLJaI6GawkCGycVU1RvxyJBO60io42CkwpnsbuDnaSR3LjFqpwOjOfujZ1g0AsD+1AJuSsmEwspgholvDWUtENsxoFPgj8ZJpTMx9XdvAzcG6ipg6MpkMfUM94WKvwpZTOUi6VILyKgO6Snv2i4hsHAsZIhslhMDW5Byk5pVDKZfh3q7+8HK2jtNJ19PRXwtHOyXWH7+EC3nlKLZTQm5/+wckE1HLwFNLRDbqYFoBEjNrB84O7+gLP63tdG2EeDpiTPc20KjkKKiSw3fiB8gu5fRsIrp5LGSIbNCFvDLsOpsHABgQ4YVQLyeJE908P609xscEwkEhoHL3x4tb8nAqizOaiOjmsJAhsjFFFdX4IzELABDt74IuAVqJEzWdm6MdBvpUoyo3FQWVRoxfnID9qflSxyIiG8JChsiGVBuM+O1YpuneSQMjvGz+LtP2SiB7xb8Q6alCcWUNJv53L7acypY6FhHZCBYyRDZCCIH4UznQlVbBXqXAyE5+Leb+RUZ9Gebd6YFBkd7Q1xgx5ZuD+PnQRaljEZENaBm/BYlagSPphUjOKoFMBtzdyRfOGpXUkSxKrZThP3+LwZhubWAwCsz64Sj+u+O81LGIyMpx+jWRDbhUVIEdZ3UAgP5hnghwc5A4keUlJSUBAB4JE6guc8S602V48/cknDyXhgmdnJvlFJqnpyeCgoIsvl0iun1YyBBZOX2NARsSsyAEEOHthK6BrlJHsqji/FwAwMSJE82Wu/R+AG4DJ+PnU2VY/v3PyN+4CBBGi+7b3sEBp5KSWMwQ2TAWMkRWbmtyLoora+CiUWJQlLfND+69WkVp7ZTrkVNfQvvOMWbrUkprcChfAecuw9C+zxD08qiBpYYFZaedw4p3Z0On07GQIbJhLGSIrNipS8WmcTHDO/pCrVRIHanZePgHIyA82mxZAAC/nFJsSMxCZoUce4pdMLqLHxzs+KuLiGpxsC+RlSqtqe2NAYDeIe42deVeSwrzdsJ93fyhVsqRVVyJ7/enI69UL3UsIrISLGSIrJFcgf06JaoMRvi7atCzrbvUiSQV4OaAB3sEQmtfe62ZHw5eRFp+udSxiMgKsJAhskLavg8hv0oOO6Ucw6J9IW9h42Kaws3RDg/2CIS/VoOqGiN+OZKB4xeLIISQOhoRSYiFDJGVOZtfBW3seADA4EhvuLSw68XcCns7Be7v3gbtfZ1hFMCW5BxsTspBjcGys5mIyHawkCGyIvoaAz7dVwSZXIEABwMifJyljmR1lHI5hnXwQb9QD8gAnLxUjO8PpKOwvErqaEQkARYyRFZkYfwZpBfXwFBWgK5uBqnjWC2ZTIYebd1xf7c2sFcpoCutwnf703E+t1TqaER0m7GQIbISxy4WYvH22kvy5/35OdQtd6a1xQS6O+CRXkHwuzxuZt2xS9h6KgfVPNVE1GqwkCGyAvoaA55ffRQGo0C/QA0qTidIHclmOGmUGNs9wHTF42MZRVixNw2ZhRXSBiOi24KFDJEVWBh/BqezS+HhaIcp3bVSx7E5CrkMAyK8cF9XfziplSiqqMbqgxex86yOA4GJWjgWMkQSS8woMp1SevO+jnBR82vZVMEejpjYOwhRfrWDpA9eKMCKfWlIzSuTOBkRNRf+xiSSUI3BiH//fAwGo8DIzn4Y0clP6kg2T61SYGgHX4zu7AcHOwUKy6vxy5FMrDuaiaKKaqnjEZGF8YYlRBJannABiRnFcNEoMW90B6njtCjtvJzQxs0ee1PycTS9EOd1ZbiQX47uQa6ICXaTOh4RWYjkPTIZGRmYOHEiPDw8YG9vj06dOuHAgQOm9UIIvPLKK/Dz84O9vT3i4uJw5swZCRMTWUZGYQU+/DMZAPDvEVHwdtZInKjlUSsVuDPcCxN6ByPI3QEGo8D+1AIs25WKU0VyyFR8z4lsnaSFTEFBAfr16weVSoU//vgDJ0+exIcffgg3t///39J7772HhQsXYvHixdi7dy8cHR0xbNgwVFZWSpic6NYIITDvl0SUVxnQI9gND/UMlDpSi+buaIf7uvpjZCc/uDmoUFljxIkiJdo8tQRrTpWivKpG6ohE1ESSnlp69913ERgYiKVLl5qWhYSEmP4thMCCBQvw8ssv49577wUAfPPNN/Dx8cHatWvx0EMP3fbMRJaw8UQWNiflQKWQ4e0xnSCX815KzU0mkyHM2wntvBxxOrsEu5KzUOqgxf+OleC3s1swsU8w/hYbzJ4xIhsjaSHz66+/YtiwYRg3bhy2b9+ONm3a4B//+AemTJkCAEhJSUFWVhbi4uJMr9FqtejduzcSEhIaLGT0ej30er3peXFxcfP/IEQ3oaSyGvN+PQEAmHpnKG9DcJvJZTJE+rrAoTgdXy3+DBH3P4f8cuDTLWfxxbazuDPIHqPbOyJY23z3uPL09ERQUFCzbZ+oNZG0kDl//jy++OILzJo1Cy+++CL279+PZ599FnZ2dpg0aRKysrIAAD4+Pmav8/HxMa272vz58/Haa681e3aipvrwz9PILtajrYcDpg8KkzpOq1VakIuyxC04fGIbHML7wLnX/dC0icKW1ApsSa1ARcohFO9bg8rUwxbft72DA04lJbGYIbIASQsZo9GIHj164O233wYAdOvWDYmJiVi8eDEmTZrUpG3OmTMHs2bNMj0vLi5GYCDHH5B1OJFZhG8SUgEAb93fCRoV70MglYrS2t7akU/OQfvOMQCAPH01zhQrkFEhg31Id9iHdIeLyohwZyMCHY1QWOAMYHbaOax4dzZ0Oh0LGSILkLSQ8fPzQ4cO5lNOo6Ki8NNPPwEAfH19AQDZ2dnw8/v/62tkZ2eja9euDW5TrVZDrVY3T2CiW1A7wPcEjAIY1dkP/cI8pY5EADz8gxEQHg0ACADQBUBRRTWOpBXixKUiFFfLcTBfjqRSBboEuKJTgBb2LECJrIaks5b69euH5ORks2WnT59GcHAwgNqBv76+voiPjzetLy4uxt69exEbG3tbsxLdqrVHMnDgQgHsVQq8NDJK6jh0HVp7FQa098IT/ULQL8wDTmolyqsMSDifh693pmDrqRwUlFdJHZOIIHGPzMyZM9G3b1+8/fbbGD9+PPbt24cvv/wSX375JYDaWQYzZszAm2++ifDwcISEhGDu3Lnw9/fHfffdJ2V0optSUlmNt9efAgA8MzgMflp7iRNRY6hVCvQIdke3QDecyS7BobRC5JbqcSyjCMcyihDh44Q+IR5wc7STOipRqyVpIdOzZ0+sWbMGc+bMweuvv46QkBAsWLAAEyZMMLV54YUXUFZWhieffBKFhYW44447sGHDBmg0nCJJtmNh/BnklugR4umIJ+4IufELyKoo5DJE+rmgva8zLhZU4FBaAVLzynE6uxRnsksR6eeM3iEe0No330wnImqY5LcoGDVqFEaNGnXN9TKZDK+//jpef/3125iKyHLOZJdg6a5UAMC80R2gVnJ8ha2SyWQIdHdAoLsDckv02HM+D+d1ZUi6VILkrBJE+2sR284D9nY8xkS3i+SFDFFLJoTAq+tOoMYoEBflg4HtvaWORBbi5azG6C7+yCqqxJ7zebiQX47jGUU4k12CvmGeiPZ3gVzGCx0SNTfJ77VE1JJtPJGFXWfzYKeU45VRvClkS+Sr1eC+bm3wQPcAeDrZobLGiC2ncvDDgXRkFfNWKkTNjYUMUTPR1xgw/4/aAb5T72yHIA8HiRNRc2rjZo+HewbhznBP2CnkyC7W4/v96dh+Ohc1BqPU8YhaLBYyRM3kfwkXcCGvHF7Oajw1IFTqOHQbyOUydAtyw6OxwWjvW3vriSPphVi5Lw3Z7J0hahYsZIiaQX5ZFT6JPwMAmD20PRzVHI7WmjiqlRge7Yt7uvjD0U6BgvJq/HAgHYfTCiCE1OmIWhYWMkTNYGH8GZRU1iDKzwVjYwKkjkMSCfF0xMQ+wQjzcoJRAH+d0WGPTgmZHU8zElkKCxkiCzubU4r/7bkAAHh5ZBQUcs5cac00KgXu7uSLgRFeUMhkyKyQw+/RD5FZUiN1NKIWgYUMkYW980cSDEaBwZHevJ8SAai9/kyXQFeM6xEAe4WAyiMQL2zWYfdZndTRiGweCxkiC9p1VofNSTlQyGWYczfvp0TmfFw0GORbjcqLJ1BeLTB56X78fuyS1LGIbBoLGSILMRoF3vo9CQAwsXcQwrydJE5E1kijALJXvYzYAA2qDEZM/+4QvklIlToWkc1iIUNkIeuOZeLkpWI4q5V4Li5C6jhkzQzVmNXHFRP7BEEI4JVfTuDDP5MhOKWJ6KaxkCGygKoaIz788zQA4Mk728Gdd0OmG1DIZXjj3o6Yebno/XTLWby0NhFGI4sZopvBQobIAr4/kI60/HJ4Otnhcd7dmhpJJpPhubhwvHV/R8hlwMq9aXh13Qn2zBDdBBYyRLeoosqAhZcvfjf9rjBe/I5u2oTewfhgXBfIZMA3CRfw1u9JLGaIGomFDNEtWro7BbklegS42ePh3kFSxyEbNaZ7AN6+vxMA4L87U0ynKono+ljIEN2CovJqLN52DgAwa0gE1EqFxInIlj3cKwiv3xsNAPhs61l8ermnj4iujYUM0S1Y/Nc5FFfWIMLHCfd2bSN1HGoBHo1ti5dH1l6D6MNNp7F0V4rEiYisG0/mE11HWloadLqGr76aX2HAkh05AIAxYSocPXLYIvtMSkqyyHbIdv29fztUVBnw4abTeP23k2jjao+h0b5SxyKySixkiK4hLS0NkVFRqCgvb3C92+An4dLjHlRmJOHpe2ZbfP+lpaUW3ybZjumDwpBZVInv9qXh2VWHserJWHQNdJU6FpHVYSFDdA06nQ4V5eWY8K/34RMUarauogbYkKmCEcCQbmHwjv3ZYvtN2rcdfyz/BJWVlRbbJtkemUyGN+6NxqWiCmxLzsXfl+/Hmn/0Q6A775xNdCUWMkQ34BMUioDwaLNl25JzYEQR/LUadOsYBpnMcne4zk47Z7FtkW1TKuT47JHuGL84AScvFWPS0n34+em+cHXgBReJ6nCwL9FNKtXXIDGzGADQu52HRYsYoqs5qZVY+lhP+Gk1OJ9bhif/dxBVNUapYxFZDRYyRDfpYGoBDEYBP60GgW72UsehVsDHRYOlj/WEk1qJfSn5eOv3k1JHIrIaLGSIbkKZvgbHM4sAAH3YG0O3UaSvCz5+sCsAYHnCBaw+kC5tICIrwUKG6CYcuMDeGJLOkA4+mBEXDgB4aW0ijl0slDYQkRVgIUPUSGX6GhzPqO2N6R3izt4YksSzg8IRF+WDqhojnvrfQehK9VJHIpIUCxmiRrqyNyaIU2BJInK5DB892AXtvByRWVSJaSsOodrAwb/UerGQIWqE8qoaJLI3hqyEi0aFL//WA05qJfam5OO9DaekjkQkGRYyRI1wNL0INUYBHxc1e2PIKoR5O+GDcV0AAF/tSEF8UrbEiYikwUKG6AaqjcDRy4MqY4Ld2BtDVmN4R1881q8tAOCfq48is7BC2kBEEmAhQ3QDKaVy6GuMcHVQIdTLSeo4RGbmjIhC5wAtCsur8ex3h1HD8TLUyvAWBUTXI1fibIkCABAT5AY5e2PIQix5l/OnOtvh+WwZDlwowAv/+wsTO7s02M7T0xNBQUEW2y+RNWAhQ3QdjtEDUWGQwdFOgUg/Z6njUAtQnJ8LAJg4caJFt+vQvh+87puDn0+V4YvX/4nKlEP12tg7OOBUUhKLGWpRWMgQXYNRCGh7jwUAdAtyg1LOM7F06ypKa+/TNXLqS2jfOcai2z6cb8D5UgWCHnoNcX7V0Cj+f1122jmseHc2dDodCxlqUVjIEF3D/kw9VB6BUMkEOrZpuKueqKk8/IPr3VX9VvkajPj+QDp0pVU4UemKe7r4c3A6tXj8LyZRA4QQ+DmpFADQztkItVJxg1cQSU+pkGNYtC8UchlS88pNV6ImaslYyBA1YG9KPs7kV0PUVCHM2SB1HKJG83RSo1+oBwBgxxkdCsqqJE5E1LxYyBA14L87UgAApcfjzcYZENmCroGuCHS3R41RYMOJLBiMQupIRM1G0kLm1VdfhUwmM3tERkaa1ldWVmLatGnw8PCAk5MTxo4di+xsXr2Smleqrgzxp2o/Z8UH1kobhqgJZDIZhkb5Qq2UI6dEj70peVJHImo2kvfIREdH49KlS6bHzp07TetmzpyJdevWYfXq1di+fTsyMzMxZswYCdNSa7BsdyqEALr7qVGTnyF1HKImcdIoMTjSGwBwILUAOj0H/VLLJPmsJaVSCV9f33rLi4qKsGTJEqxcuRKDBg0CACxduhRRUVHYs2cP+vTpc7ujUitQXFmN1QfSAQCjwh2xRuI8RLci3McZUboyJGWV4ECeEjKlWupIRBYneY/MmTNn4O/vj3bt2mHChAlIS0sDABw8eBDV1dWIi4sztY2MjERQUBASEhKuuT29Xo/i4mKzB1Fj/bA/HWVVBoR7O6GLj53UcYhu2YD2XnBSK1FWI4PrgEeljkNkcZIWMr1798ayZcuwYcMGfPHFF0hJSUH//v1RUlKCrKws2NnZwdXV1ew1Pj4+yMrKuuY258+fD61Wa3oEBgY2809BLYXBKLBsdyoA4PE7Qnj9DWoR1EoF4qJqTzE5x4zGyVzOYqKWRdJCZsSIERg3bhw6d+6MYcOGYf369SgsLMQPP/zQ5G3OmTMHRUVFpkd6eroFE1NLtulkNi4WVMDNQYX7u7WROg6RxQR7OKKtowEymRyf7S9EeVWN1JGILEbyU0tXcnV1RUREBM6ePQtfX19UVVWhsLDQrE12dnaDY2rqqNVquLi4mD2IGuPrXbVTrh/pHQSNinOuqWXp7GZATXEuskoNeG9DstRxiCzGqgqZ0tJSnDt3Dn5+foiJiYFKpUJ8fLxpfXJyMtLS0hAbGythSmqJEjOKsC8lH0q5DH/r01bqOEQWp5IDeRs+BVA7M2/veU7JppZB0kLm+eefx/bt25Gamordu3fj/vvvh0KhwMMPPwytVosnnngCs2bNwtatW3Hw4EE89thjiI2N5Ywlsrilu1IBAHd38oOvViNtGKJmUplyCHEh9gCA2T8e4ykmahEknX598eJFPPzww8jLy4OXlxfuuOMO7NmzB15eXgCAjz/+GHK5HGPHjoVer8ewYcPw+eefSxmZWiBdqR7rjmYCqB3kS9SSTe7qgpP5Amn55Viw+QxevDtK6khEt0TSQmbVqlXXXa/RaLBo0SIsWrToNiWi1uiHA+moMhjRJdAVXQNdpY5D1KwcVHK8cV9HPLH8AP674zzu6eKPjm20UsciajKrGiNDdLsZjAIr9tReu2hi7yCJ0xDdHoOjfDCqsx+MAvjXT8dQYzBKHYmoyVjIUKu2/XQOMgoroLVXYXQXf6njEN0280ZHQ2uvwonMYtOMPSJbxEKGWrVvL/fGjIsJ4JRralW8nNV46fL4mI82nUZaXrnEiYiahoUMtVrp+eXYmpwDAJjQJ1jiNES337geAYht54HKaiNeXHMcQgipIxHdNBYy1Gqt3JcGIYA7wjwR4ukodRyi204mk+HtMZ1gp5Rj51kdfj7Eu72T7WEhQ62SvsaAH/bX3r5iYh8O8qXWK8TTEc8NDgcAvL0+CYXlvBcT2RYWMtQqbUjMQl5ZFXxc1IiL8pE6DpGkpvRvh3BvJ+SVVeH9jbx9AdkWSa8jQySVb/dcAAA83CsISgXreWo9kpKSGlz+aLQd5uYAK/emobNTGcI97Cy2T09PTwQFseeTmgcLGWp1TmUVY39qARRyGR7qyV+u1DoU5+cCACZOnHjNNh53z4RTp8GYsWIvsr6ZBQjLXF/G3sEBp5KSWMxQs2AhQ63Oyr21U66HRPnwvkrUalSUFgMARk59Ce07xzTYptIA/JkpAN8w3DP/Z4Q533ohk512DivenQ2dTsdChpoFCxlqVSqrDVhzuHZmxiO8ki+1Qh7+wQgIj77m+nKnQmxNzkVSsR16RgfDUc0/E2TdODiAWpUNiVkoqaxBG1d73BHmKXUcIqvTsY0WPi5qVBmM2HFWJ3UcohtiqU02IS0tDTrdrf9S/e+2PADAHW0UOHLk8HXbXmtQJFFLJpfJcFd7b3y/Px3JWSXo6O+CADcHqWMRXRMLGbJ6aWlpiIyKQkX5rV1CXenqizZT/wshjPhw2ji8V5LbqNeVlpbe0n6JbI2Piwad2mhxLKMI20/n4uGeQZDLZVLHImoQCxmyejqdDhXl5Zjwr/fhExTa5O0kFiqQXAz42gMPvPOfG7ZP2rcdfyz/BJWVlU3eJ5Gt6hPqgdPZJdCVVuF4ZhG6BLhKHYmoQSxkyGb4BIVed5Di9RiNAht2pQAwICbMHwE+zjd8TXbauSbti6glsFcp0CfUA9uSc5FwLg8RPs6w541VyQpxsC+1Cqn5ZSirMsBepUCIF++rRNQYnfy18HSyg77GiIRzeVLHIWoQCxlqFU5k1F5DI9LXGUo5P/ZEjSGXyzAgwgsAkJhRhNwSvcSJiOrjb3Rq8cr0NUjJKwMARPu7SJyGyLYEuDkgwtsJAsD207kQQkgdicgMCxlq8ZKyiiEE4OuigYeTWuo4RDbnjnBPKOUyZBRW4HQ2Z/GRdWEhQy2aEAInMmtPK0W3YW8MUVM4a1To0dYNALDzrA41Bsvcg4nIEljIUIt2qagSheXVUClkiPC+8UwlImpYTJAbnNRKlOprcCi9UOo4RCYsZKhFS7pU2xsT5u0EOyU/7kRNpVTI0S/MAwBwIDUfZfoaiRMR1eJvdmqxagxGnM6pPZ8f5cvTSkS3qr2PM3xc1Kg2COw5z+nYZB1YyFCLdV5XhqoaI5w1SgS42Usdh8jmyWQy3BleOx37RGYxp2OTVWAhQy1W3WmlSF9nyGS8TwyRJfi72iP88nTsHWc5HZukx0KGWqQyfQ0u5NfeZDLKj6eViCypX5gnFDIZ0vMrkJp3azdzJbpVLGSoRUrOKjFdO8bNwU7qOEQtitZehS6BWgDAjjO5MBjZK0PSYSFDLVJSVu1ppSg/Trkmag692rpDo5KjoLwaJy9fq4lICixkqMXJLdFDV1oFhUyGiEbc5ZqIbp5apUDvkNrp2HtT8lDNi+SRRFjIUItz8vIg3xAvR2hUConTELVcHdu4wEWjRFmVAYd5kTySCAsZalEMRoHkrBIAPK1E1NyUcjliQ2t7ZQ6mFqCi2iBxImqNWMhQi3IhvwwV1QbYqxQIdneUOg5Ri9fexxmeTnaoMhixPzVf6jjUCrGQoRbl1KXa3pj2vs5QyHntGKLmJpPJ0C/MEwBwLL0IxRXVEiei1oaFDLUY+hoDzuvKAABRvjytRHS7BLs7IMDNHgYhsCeFty6g26tJhUy7du2Ql1f/w1pYWIh27drdciiipjiXUwaDUcDNQQUvZ7XUcYhaDZlMhn6htb0ySZdKoCvlrQvo9mlSIZOamgqDof6gLr1ej4yMjFsORdQUp7LrbkngwlsSEN1mvloNwrydAAC7z7FXhm4f5c00/vXXX03/3rhxI7Rarem5wWBAfHw82rZta7FwRI1Vpq/BxfwKALXjY4jo9usb6oFzuaVI0ZUho6ACbXizVroNbqpH5r777sN9990HmUyGSZMmmZ7fd999eOihh7Bp0yZ8+OGHTQryzjvvQCaTYcaMGaZllZWVmDZtGjw8PODk5ISxY8ciOzu7Sdunli05uwQCgJ9WA629Suo4RK2Sm4Mdov1r722265yON5Sk2+KmChmj0Qij0YigoCDk5OSYnhuNRuj1eiQnJ2PUqFE3HWL//v34z3/+g86dO5stnzlzJtatW4fVq1dj+/btyMzMxJgxY256+9Ty1V07pj2v5EskqT4hHlDKZbhUVGkafE/UnJo0RiYlJQWenp4WCVBaWooJEybgq6++gpubm2l5UVERlixZgo8++giDBg1CTEwMli5dit27d2PPnj0W2Te1DAVlVcgp0UMmA8J9nKSOQ9SqOaqV6BbkCgDYfTYP7JSh5nZTY2SuFB8fj/j4eFPPzJW+/vrrRm9n2rRpGDlyJOLi4vDmm2+alh88eBDV1dWIi4szLYuMjERQUBASEhLQp0+fBren1+uh1///iPniYt7MrKU7dbk3JtjdAQ52Tf5IE5GFxAS74fjFIuSXVyHNnlf5oObVpE/Ya6+9hqFDhyI+Ph46nQ4FBQVmj8ZatWoVDh06hPnz59dbl5WVBTs7O7i6upot9/HxQVZW1jW3OX/+fGi1WtMjMDCw0XnI9gghkJz9/xfBIyLpqZUKxATX9rAnFSkAOe95Rs2nSf99Xbx4MZYtW4a//e1vTd5xeno6nnvuOWzatAkajabJ27nanDlzMGvWLNPz4uJiFjMtWFZxJYoqqqFSyBDqxdNKRNaiS6ArDqUVoqzaAKeOg6WOQy1Yk3pkqqqq0Ldv31va8cGDB5GTk4Pu3btDqVRCqVRi+/btWLhwIZRKJXx8fFBVVYXCwkKz12VnZ8PX1/ea21Wr1XBxcTF7UMtVN8i3nZcTVAp2YRNZC5VCjh5ta3tltH0fQrWBg2WoeTTpN//f//53rFy58pZ2PHjwYBw/fhxHjhwxPXr06IEJEyaY/q1SqRAfH296TXJyMtLS0hAbG3tL+6aWwWgUOJ1dCgCI5GwlIqvTuY0WGoWAUuuNzefLpY5DLVSTTi1VVlbiyy+/xObNm9G5c2eoVObX7fjoo49uuA1nZ2d07NjRbJmjoyM8PDxMy5944gnMmjUL7u7ucHFxwTPPPIPY2NhrDvSl1iW9oNx0p+tAdwep4xDRVZQKOSJdDDhSoMSPSaX4Z7UBGhXHy5BlNamQOXbsGLp27QoASExMNFtnyUvDf/zxx5DL5Rg7diz0ej2GDRuGzz//3GLbJ9tW1xsT5u3EO10TWam2TkYcSM1GAXzw7Z4L+Ht/3o+PLKtJhczWrVstnQMAsG3bNrPnGo0GixYtwqJFi5plf2S7DEaBc7m1hQwvgkdkvRQyoGj3KniMeA5fbDuHh3sFwVHNyySQ5XB0JNmkC/ll0NcY4ahWwN/VcrPeiMjyShO3wNdJgbyyKixPSJU6DrUwTSqL77rrruueQtqyZUuTAxE1Rt1ppXBvZ97pmsjaGQ14MNoJn+wtwn+2n8fEPsFw0fCeaGQZTeqR6dq1K7p06WJ6dOjQAVVVVTh06BA6depk6YxEZmoMRpy/fFopgrckILIJdwTaI8zbCUUV1fh6Z4rUcagFaVKPzMcff9zg8ldffRWlpaW3FIjoRlLzylFtEHDWKOHrwtNKRLZAIZdhRlw4pq88jCU7UjC5b1u4OthJHYtaAIuOkZk4ceJN3WeJqClOX74lQQRPKxHZlLs7+iHS1xkl+hp8+dd5qeNQC2HRQiYhIcGitxsgulpVjREpujIAvNM1ka2Ry2WYNSQCALB8dyoKyqokTkQtQZNOLY0ZM8bsuRACly5dwoEDBzB37lyLBCNqSIquDDVGAa29Ct7OaqnjENFNGtLBB9H+LjiRWYz/7jyP2cMipY5ENq5JPTJX3l1aq9XC3d0dAwcOxPr16zFv3jxLZyQyOZNz+bSSjxNPKxHZIJlMhmcHhwMAlu++gMJy9srQrWlSj8zSpUstnYPohqqNtQN9ASCCF8EjsllDO/ggys8FSZeKsWRnCv45tL3UkciG3dIYmYMHD+Lbb7/Ft99+i8OHD1sqE1GDMsvlMBgF3B3s4OHI2Q5Etkomk+G5wWEAgGW7UlFUXi1xIrJlTeqRycnJwUMPPYRt27bB1dUVAFBYWIi77roLq1atgpeXlyUzEgEALpbX1t3hPK1EZPOGdvBFex9nJGeX4OtdKZh5eRAw0c1qUo/MM888g5KSEpw4cQL5+fnIz89HYmIiiouL8eyzz1o6IxHkGidkV9YWLzytRGT75PL/Hyvz9a4UFFWwV4aapkmFzIYNG/D5558jKirKtKxDhw5YtGgR/vjjD4uFI6rjEBELARk8nezgztNKRC3CiI6+iPBxQkllDZbtSpU6DtmoJhUyRqMRKlX9+2SoVCoYjcZbDkV0NYeoOwGwN4aoJZHLZXhmUG2vzJKd51FcyV4ZunlNKmQGDRqE5557DpmZmaZlGRkZmDlzJgYPHmyxcEQAUFhpgCaoMwAWMkQtzd2d/BDm7YTiyhp8sztV6jhkg5pUyHz22WcoLi5G27ZtERoaitDQUISEhKC4uBiffvqppTNSK7fnYiVkcgXc7IzQ2vOOuUQtiUIuwzODamcw/XdnCkr1NRInIlvTpFlLgYGBOHToEDZv3oxTp04BAKKiohAXF2fRcEQAsCu9EgAQ4MDTlkQt0ajO/vgk/gzO55Zh+e5UTLsrTOpIZENuqkdmy5Yt6NChA4qLiyGTyTBkyBA888wzeOaZZ9CzZ09ER0djx44dzZWVWqHs4kqczK298icLGaKWyaxXZsd5lLFXhm7CTRUyCxYswJQpU+Di4lJvnVarxdSpU/HRRx9ZLBzR78cuQQCovHgSDk3qPyQiWzC6sz9CPB1RUF6NbxIuSB2HbMhNFTJHjx7F8OHDr7l+6NChOHjw4C2HIqrz27HaAeXlSX9JnISImpNSIcf0y6eUluw8j4oqg8SJyFbcVCGTnZ3d4LTrOkqlErm5ubcciggALhaU41BaIWQAypN3SR2HiJrZPV39EeBmD11pFVbtT5M6DtmIm+qsb9OmDRITExEW1vBArGPHjsHPz88iwYh+P3YJABDtZYfUsgKJ0xDRrUhKSmpUu5HtVPjPwQp8tvkUOtjlQ6Vo+u1IPD09ERQU1OTXk224qULm7rvvxty5czF8+HBoNBqzdRUVFZg3bx5GjRpl0YDUev12uZDpF6jB7xJnIaKmKc6v7aWfOHFi416gUKLN1CXIgwfipryE0qMbm7xvewcHnEpKYjHTwt1UIfPyyy/j559/RkREBKZPn4727WtvvX7q1CksWrQIBoMBL730UrMEpdYlVVeG4xlFUMhl6BOgufELiMgqVZQWAwBGTn0J7TvHNOo1Z4rlOFYIBI2ajqFTpkLehE6Z7LRzWPHubOh0OhYyLdxNFTI+Pj7YvXs3nn76acyZMwdCCAC1t2QfNmwYFi1aBB8fn2YJSq1L3SDfvqEe0GoUEqcholvl4R+MgPDoRrX1MRhxZlcqyqoNKHcJRKRv/ZmyRHVuekJrcHAw1q9fj4KCApw9exZCCISHh8PNza058lErVXdaaXRnfwAcQE7UmqgUcnQNckXCuTzsTy1Aex9nyGRNHytDLVuTblEAAG5ubujZsyd69erFIoYs6mxOCU5llUClkGFYtK/UcYhIAl0CtLBTypFfVoVzuWVSxyEr1uRChqi5rDta2xvTP9wLWgfeW4moNVIrFega4AoA2JeabxrKQHQ1FjJkVYQQWHd5fMzoLpzKT9SadQ10hUohQ26JHhfyyqWOQ1aKhQxZlaRLJTifWwY7pRxxURw4TtSa2dsp0KmNFgB7ZejaWMiQVambrXRXey84a3haiai16x7kBoVchktFlcgorJA6DlkhFjJkNYQQptlKozr7S5yGiKyBo1qJaL/a6df7UvMlTkPWiIUMWY1jF4uQll8Oe5UCg6O8pY5DRFYiJtgNchmQnl+BrKJKqeOQlWEhQ1aj7rTS4ChvONjd9CWOiKiFcrFXob2vMwD2ylB9LGTIKhiNwnSTSJ5WIqKr9WzrDgBI0ZUht0QvcRqyJixkyCocTi9AZlElnNRKDGzvJXUcIrIybg52iPB2AgDsZ68MXYGFDFmFuovgDe3gA42K91Yiovp6XO6VOZNTivyyKonTkLVgIUOSMxgFfj9++bQSL4JHRNfg5axGO09HAMCBC+yVoVqSFjJffPEFOnfuDBcXF7i4uCA2NhZ//PGHaX1lZSWmTZsGDw8PODk5YezYscjOzpYwMTWHfSn5yC3RQ2uvwh1hPK1ERNdWN1bmVFYJiiuqJU5D1kDSQiYgIADvvPMODh48iAMHDmDQoEG49957ceLECQDAzJkzsW7dOqxevRrbt29HZmYmxowZI2VkagZ1s5WGRfvATslOQiK6Nl+tBoHu9hACOHChQOo4ZAUkneM6evRos+dvvfUWvvjiC+zZswcBAQFYsmQJVq5ciUGDBgEAli5diqioKOzZswd9+vSRIjJZWI3BiD8SswAAo7twthIR3Vivtu5Iz8/AyUvF6B3iDkc1L9fQmlnNf38NBgNWrVqFsrIyxMbG4uDBg6iurkZcXJypTWRkJIKCgpCQkHDN7ej1ehQXF5s9yHrtPpeH/LIqeDjaIbadh9RxiMgGtHG1h59WA4NR4FAae2VaO8kLmePHj8PJyQlqtRpPPfUU1qxZgw4dOiArKwt2dnZwdXU1a+/j44OsrKxrbm/+/PnQarWmR2BgYDP/BHQr6k4rDe/oC6VC8o8jEdkAmUxmGitzPKMIFdUGiRORlCT/y9G+fXscOXIEe/fuxdNPP41Jkybh5MmTTd7enDlzUFRUZHqkp6dbMC1ZUlWNERsun1biRfCI6Ga09XCAl7Ma1QaBI2mFUschCUl+YtHOzg5hYWEAgJiYGOzfvx+ffPIJHnzwQVRVVaGwsNCsVyY7Oxu+vr7X3J5arYZarW7u2GQBf53ORXFlDbyd1egV4i51HCKyITKZDD2D3bA+MQtHLxaie7Ar1Epeg6o1krxH5mpGoxF6vR4xMTFQqVSIj483rUtOTkZaWhpiY2MlTEiW8uvR2tNKozr7QyGXSZyGiGxNmLcT3BxU0NcYcexikdRxSCKS9sjMmTMHI0aMQFBQEEpKSrBy5Ups27YNGzduhFarxRNPPIFZs2bB3d0dLi4ueOaZZxAbG8sZSy1AeVUNNp2svSbQPV15WomIbl7dWJk/T2bjcFohuga6QsWxdq2OpIVMTk4OHn30UVy6dAlarRadO3fGxo0bMWTIEADAxx9/DLlcjrFjx0Kv12PYsGH4/PPPpYxMFrI5KQcV1QYEezigS4BW6jhEZKMifJyx53weiitrcCKzGF0DXaWORLeZpIXMkiVLrrteo9Fg0aJFWLRo0W1KRLfLr0dqTyuN7uwPmYynlYioaRRyGWKC3bA1ORcHLxSgUxstT1W3MuyDo9uuqLwa20/nAOBpJSK6dR38XOBop0CpvgZJWbx2WGvDQoZuuw0nLqHaIBDp64wIH2ep4xCRjVMq5Oge7AYAOJBaAKNRSJyIbicWMnTb1c1W4i0JiMhSOrXRQqOSo6iiGmdySqWOQ7cRCxm6rXJKKpFwLg8AcA8LGSKyEJVCjm6Btb0y+1PzIdgp02qwkKHb6vdjl2AUQLcgVwS6O0gdh4hakC4BWtgp5Mgrq8KlCg74bS1YyNBtZTqtxFsSEJGFqVUKdL58OYdTxbzKb2vBQoZum/T8chxOK4RcBozq7Cd1HCJqgboFuUIpl6GgSg5N265Sx6HbgIUM3TZ1vTF92nnA20UjcRoiaokc7JTo2Ka2V0Yb+6DEaeh2YCFDt826y4UMB/kSUXPqHuQKGQQ0QZ2QpKuSOg41MxYydFuczi7BqawSqBQyjOjI00pE1HycNSoEOxoBAD+d5FTslo6FDN0Wdb0xAyK8oHVQSZyGiFq69i4GCKMBh7L0SMzgnbFbMhYy1OyEELwIHhHdVk4qoDxpBwDg821nJU5DzYmFDDW7YxeLcCGvHPYqBYZ08JE6DhG1EkV7VgMA/kjMwtmcEonTUHNhIUPNrq43Jq6DDxzsJL3hOhG1ItW6C+jVRg0hgM+3nZM6DjUTFjLUrAxGwdlKRCSZB6KcAAC/HMlEen65xGmoObCQoWa166wOOSV6uDmoMCDCS+o4RNTKhLnboX+4JwxGgcXb2SvTErGQoWa15nAGAGBUZ3/YKflxI6Lbb/pdYQCA1QcuIru4UuI0ZGn8y0LNpkxfgw2JWQCA+7u3kTgNEbVWvdt5oGdbN1QZjPjqr/NSxyELYyFDzWZDYhYqqg0I8XREt0BXqeMQUSs27XKvzIq9acgv49V+WxIWMtRs6k4r3d+tDWQymcRpiKg1GxDhhU5ttKioNmDprhSp45AFsZChZpFVVIld53QAagsZIiIpyWQyTLsrFACwbHcqiiurJU5ElsJChprFL0cyIATQs60bAt0dpI5DRIShHXwR7u2Eksoa/C/hgtRxyEJYyFCz+P/TSgESJyEiqiWXy0xjZb7acR6l+hqJE5ElsJAhizuZWYxTWSWwU8oxshPvdE1E1mN0F3+083JEYXk1lu9OlToOWQALGbK4nw9dBADERXnzTtdEZFUUchmeGxwOoLZXpoRjZWweCxmyqBqDEb9cviUBTysRkTUa1dkfoeyVaTF4Bz+6KWlpadDpdNdcfzhLj9wSPZztZHApu4hDhzJueZ9JSUm3vA0iojoKuQzPDg7Hc6uO4KsdKZjUty2cNew9tlUsZKjR0tLSEBkVhYrya994zfOeF+AYdScyEn5Fnzf+Y9H9l5aWWnR7RNR6jersj0+3nMXZnFIs25WKZy6fbiLbw0KGGk2n06GivBwT/vU+fIJC663XG4D1GSoYAdw/cjhc7x9mkf0m7duOP5Z/gspK3iOFiCyjrlfm2e8O4787UzCpX1u4sFfGJrGQoZvmExSKgPDoesuPpBfCiFx4O6vRMdpy/7vJTuMda4nI8kZ28sPC+DOmXpln2StjkzjYlyxCCIETmUUAgA5+LhKnISK6sStnMP13x3le7ddGsZAhi8gp0UNXWgWFXIb2vs5SxyEiapS7O/kh3NsJxZU1WLozVeo41AQsZMgiTmYWAwBCvRyhUSkkTkNE1DgKuQzPxdX2yizZeR5FFeyVsTUsZOiW1RiMOJVdAgCI9tdKnIaI6Obc3dEPET6Xe2V4Z2ybw0KGbtnZ3FJU1RjhrFEi0M1e6jhERDdFLpfhucERAIAlO1PYK2NjWMjQLTtx+bRSBz8XyGQyidMQEd28ER190d7HGSWVNfjvjvNSx6GbwEKGbklRRTUuFlQA4GwlIrJdcrkMM4f8f69Mbole4kTUWCxk6JbUDfINcneAiz0vJkVEtmtYtA+6BLqivMqARVvPSh2HGknSQmb+/Pno2bMnnJ2d4e3tjfvuuw/JyclmbSorKzFt2jR4eHjAyckJY8eORXZ2tkSJ6UpGo8DJS7WFTLQ/e2OIyLbJZDK8MKw9AGDl3jRcLLj27VjIekhayGzfvh3Tpk3Dnj17sGnTJlRXV2Po0KEoKysztZk5cybWrVuH1atXY/v27cjMzMSYMWMkTE11UvLKUKqvgb1KgXaejlLHISK6Zf3CPNEvzANVBiMWbD4jdRxqBElvUbBhwwaz58uWLYO3tzcOHjyIO++8E0VFRViyZAlWrlyJQYMGAQCWLl2KqKgo7NmzB3369JEiNl127OLlK/n6u0Cp4FlKImoZZg+LxK6zu/DzoYuYemc7hPvwIp/WzKr++hQV1f5hdHd3BwAcPHgQ1dXViIuLM7WJjIxEUFAQEhISGtyGXq9HcXGx2YMsr7C8Cmn5td2undrw2jFE1HJ0DXTF0A4+MArgwz9PSx2HbsBqChmj0YgZM2agX79+6NixIwAgKysLdnZ2cHV1NWvr4+ODrKysBrczf/58aLVa0yMwMLC5o7dKxzNqi862Hg7QcpAvEbUwzw9rD5kM2HAiC0fTC6WOQ9dhNYXMtGnTkJiYiFWrVt3SdubMmYOioiLTIz093UIJqY7B+P/Xjukc4CptGCKiZhDh44z7u7UBALy38RSEEBInomuxikJm+vTp+O2337B161YEBASYlvv6+qKqqgqFhYVm7bOzs+Hr69vgttRqNVxcXMweZFkXy+XQX76Sb7CHg9RxiIiaxcy4CNgp5Nh1Ng/bT+dKHYeuQdJCRgiB6dOnY82aNdiyZQtCQkLM1sfExEClUiE+Pt60LDk5GWlpaYiNjb3dcemyc6W1H5tObbSQ80q+RNRCBbo7YFLfYADA2+uTUGMwSpyIGiLprKVp06Zh5cqV+OWXX+Ds7Gwa96LVamFvbw+tVosnnngCs2bNgru7O1xcXPDMM88gNjaWM5YkYucTioIqORQyGa8dQ0RWLykp6ZZe39/DiO/sZDidXYoP1+zGkHY37oX29PREUFDQLe2XGk/SQuaLL74AAAwcONBs+dKlSzF58mQAwMcffwy5XI6xY8dCr9dj2LBh+Pzzz29zUqrj1O1uAECYtxMc7CT9+BARXVNxfu2poIkTJ97ytpx73AP3wU/i020pmPPIkxDVlddtb+/ggFNJSSxmbhNJ/xI1ZvCURqPBokWLsGjRotuQiK6ntMoIxw4DAACdAzjlmoisV0Vp7YSEkVNfQvvOMbe0LaMA/rwkUObkjmGvfo8OroZrts1OO4cV786GTqdjIXOb8L/U1GhbUsohV2mgVRnhp9VIHYeI6IY8/IMREB59y9sZ4FqC9cezcKZUib6dwuCk4Z9Pa2EVs5bI+tUYjPjtTO0F8EKdjZBxkC8RtSJhXk7w02pQYxRIOJ8ndRy6AgsZapT1iVnQlRtgKCtAkCNH7hNR6yKTydA/3BMAcPJSMXJL9BInojosZOiGhBD4747zAICSQ79Dwc4YImqF/LT2CPd2AgBsP53Li+RZCRYydEP7UvJx7GIR7BRAyeH1UschIpLMHeGeUMplyCiswOnsUqnjEFjIUCN8dbk3ZmCwA4wVvAknEbVeLhoVeratvbHxjrO5qKrhqXapsZCh6zqXW4rNSTmQyYDREY5SxyEiklz3IFdo7VUo0xuwLzVf6jitHgsZuq4lO1MAAIMjfdDGhdMNiYiUCjnuvDzw93BaAQrKqyRO1LqxkKFryivV46eDFwEAU/qH3KA1EVHrEeLpiLYeDjAKYHsyB/5KiYUMXdO3e9KgrzGic4AWvULcpY5DRGQ1ZDIZ7ozwgkImw4X8cpzXlUkdqdViIUMNKq+qwTcJqQCAv/dvxwvgERFdxc3BDt2DXQEAf53ORTXvji0JFjLUoBV70pBXVoUgdwfc3dFX6jhERFapZ1t3OGuUKK6swR5e8VcSLGSonooqA/7z1zkAwPS7wqBU8GNCRNQQlUKOu9p7AwAOpxWioIq917cb/0JRPSv2XoCutAqB7va4v3sbqeMQEVm1EE9HRPg4QQA4mKcA5AqpI7UqLGTITGW1Af/5q/YCeNMGhkHF3hgiohsaEOEFjVKOomo5XHrcJ3WcVoV/pcjMyr1pyC3Ro42rPcZ0D5A6DhGRTXCwU6J/hBcAQHvHI7hUUiNxotaDhQyZVFYbsHh77diYaXeFwU7JjwcRUWNF+TrDW2OEXKXGfw4W8doytwn/UpHJd/vSkHO5N+aBGPbGEBHdDJlMhm5uNTBWV+JYThV+OJAudaRWgYUMATDvjXl6YCh7Y4iImsBJBRTtXAEAeH3dSaTnl0ucqOXjXysCUNsbk12sh59Wg3E92BtDRNRUxft/QQdPO5RVGTDrhyMwGHmKqTmxkCGU6mvw2ZazAIDpg8KgVnLqIBFRkwkjnu2thZNaif2pBfjy8kxQah4sZAhf/XUeeWVVaOfpiPE9AqWOQ0Rk87wdlXhldAcAwEebknEis0jiRC0XC5lWLrdEj6921P5vYfaw9rxuDBGRhYyLCcDQDj6oNgjM/P4IKqsNUkdqkfhXq5VbGH8G5VUGdAl0xXDeU4mIyGJkMhnmj+kETyc7nM4uxfsbk6WO1CKxkGnFTmeXYOW+NADAnBGRvMM1EZGFeTip8e7YzgCAJTtTsOVUtsSJWh4WMq2UEAJv/HYSBqPAsGgf9GnnIXUkIqIWaXCUDybFBgMAZn5/FBcLOCXbkljItFLbknOx44wOKoUML94dJXUcIqIW7cWRUegcoEVRRTWmrzyMqhqj1JFaDBYyrVBVjRFv/H4SAPB4vxAEezhKnIiIqGVTKxVY9Eh3uGiUOJJeiPl/JEkdqcVgIdMKLdmZgvO5ZfB0ssO0QWFSxyEiahUC3R3w4fiuAIClu1Kx/vglaQO1ECxkWpmMwgosjD8DAJgzIgouGpXEiYiIWo8hHXww9c52AIAXfjyGFF2ZxIlsHwuZVuaNdSdRUW1Ar7buGNO9jdRxiIhaneeHtUfPtm4o1ddgyjcHUFxZLXUkm8ZCphXZciobG05kQSGX4fX7ojndmohIAiqFHIse6Q5fFw3O5pTimZWHUWPg4N+mYiHTSpTqa/DSmkQAwBN3hCDS10XiRERErZe3iwb/ndQDGpUc20/n4u31p6SOZLNYyLQS7204hUtFlQhyd8DMuAip4xARtXod22jx0eXBv1/vSsF3ly9QSjeHhUwrcCA1H//bcwEAMH9MJ9jb8e7WRETW4O5Ofpg1pPY/l3PXJiLhXJ7EiWwPC5kWrryqBv9cfRRCAON7BKBfmKfUkYiI6ArPDArD6C7+qDEKPPXtQZzNKZE6kk1hIdPCzV9/ChfyyuGn1eClkR2kjkNERFeRyWR4/4HO6BroiqKKavxtyT5kFlZIHctmsJBpwXacyTWdUnr/gS7Q2vOaMURE1kijUuDryT0R6uWIS0WV+NuSvSgoq5I6lk2QtJD566+/MHr0aPj7+0Mmk2Ht2rVm64UQeOWVV+Dn5wd7e3vExcXhzJkz0oS1MfllVXh+9VEAwKOxwbgjnKeUiIismbujHb55ojf8tBqcyy3DY8v2o7yqRupYVk/SQqasrAxdunTBokWLGlz/3nvvYeHChVi8eDH27t0LR0dHDBs2DJWVlbc5qW0RQuCFH48iu1iPdl6O+PeISKkjERFRI7Rxtcc3j/eC1l6FI+mFeOrbQ7zB5A1IWsiMGDECb775Ju6///5664QQWLBgAV5++WXce++96Ny5M7755htkZmbW67khc8t3p2JzUg7sFHJ8+nA3ONgppY5ERESNFO7jjK8n94S9SoG/Tudi5vdHeMG867DaMTIpKSnIyspCXFycaZlWq0Xv3r2RkJBwzdfp9XoUFxebPVqTYxcLTRdWevHuSET7ayVORERENysm2A1fTOwOlUKG349fwgwWM9dktYVMVlYWAMDHx8dsuY+Pj2ldQ+bPnw+tVmt6BAYGNmtOa1JQVoWnvz2EKoMRQzr4YFLftlJHIiKiJhrY3htfTIiBSiHDb8cuYeYPR1nMNMBqC5mmmjNnDoqKikyP9PR0qSPdFkajwMwfjiCjsALBHg74YFwX3kuJiMjGxXXwwaJHukMpl2Hd0Uz8c/VRGIxC6lhWxWoLGV9fXwBAdna22fLs7GzTuoao1Wq4uLiYPVqDD/5MxrbkXKiVcnwxIYZTrYmIWoih0b5YNKG2mPnlSCaeX82emStZbSETEhICX19fxMfHm5YVFxdj7969iI2NlTCZ9Vl9IB2fbzsHAHhnbCd08G8dxRsRUWsxLNoXn13umVlzOAPTVx6GvsYgdSyrIOl0ltLSUpw9e9b0PCUlBUeOHIG7uzuCgoIwY8YMvPnmmwgPD0dISAjmzp0Lf39/3HfffdKFthJpaWnQ6XQ4kavHa9vzAQAPRDkhWOTg0KGcZtlnUlJSs2yXiKilaY7fl94Ano91xQcJBdhwIgvjP92CF/q6wV5V2yfh6emJoKAgi+/X2klayBw4cAB33XWX6fmsWbMAAJMmTcKyZcvwwgsvoKysDE8++SQKCwtxxx13YMOGDdBoNFJFtgppaWmIjIpCtZ0Wvo9+CIW9C8pO7cCH776HD9H8505LS0ubfR9ERLaoOD8XADBx4sRm24cmqDO8xs7F0Wzggc+2I2f1PBj1ZbB3cMCppKRWV8xIWsgMHDgQQlz7D69MJsPrr7+O119//Tamsn46nQ56gwwRUz9DBdRwszPi3rjeUA79qVn3m7RvO/5Y/gkvSEhEdA0VpbWX/Bg59SW07xzTbPvJ18uwK1cAbSIRPfs7RFafw+p3Z0Kn07GQIetXZRDwuv9FVEANJ7USY3sGwlHd/IcyO+1cs++DiKgl8PAPRkB4dLNtPwCAf6keaw9noLgKSFSEQekR0Gz7s2ZWO9iXGlZtMOKDhAJogrtAKRO4p4v/bSliiIjIung6qTGuRyBc7VUoN8jgO+F9nMjVSx3rtmMhY0MMRoF//nAUBzL1MFbr0derBl7OaqljERGRRLT2KozvEQh3OyMU9s54bXs+fj2aKXWs24qFjI0QQuClNcfx69FMKOVA7tq34aXhRZGIiFo7ezsF7vSuQVnyLtQYgWe/O4zF289ddwxqS8JCxgYIIfDm70lYtT8dchkwo7crKs8flDoWERFZCYUc0P3yLkaFOwIA3vnjFF748ViruNYMCxkrJ4TAO3+cwpKdKQCAd8d2Rt9Ae4lTERGR1RFGPN7NBfNGd4BcBqw+eBEPf7kHOSUte6YpR4negrqL0jUXoxD46lAxNp4rBwBM6eaCUHkuL0xHREQNSkpKQpeoKLzc3x0fJhTgUFohRny0Ff++wx2hbs1z6xqpL8THQqaJ6i5KV1Fe3jw7kMnhcfcMOHUcBCGMyN/wKV5+dxNevqIJL0xHRERAwxfiU7r5w3vsXOR5BOKf6y8ib/0nKD+1w+L7lvpCfCxkmkin06GivBwT/vU+fIJCLbptowD26ZTIqJBDBoFenkYETn0awNMAeGE6IiIyd60L8VUbgb06I7Khgde9/0Lk355HB60BMpll9puddg4r3p0t6YX4WMjcIp+gUIte9Kiqxoj1xy8ho6IcCpkMIzr5IdTLyawNL0xHREQNaehCfMERArvP5uFgWgFOFStQZeeCodE+UCsVEqW0LA72tSKllTVYfTAdF/LLoZTLMLpL/SKGiIjoZshlMtwR7omhHXygkMtwXleG7/alI7ekZVw8j4WMlcgt0eP7A+nQlVbBXqXA2O4BCPZwlDoWERG1EFF+LhgXEwBnjRJFFdX4/kA6TmYWSx3rlrGQsQIX8sqw+mA6SvU1cHeww4M9A+Grbd13+CYiIsvzcdHg4V5BCPZwgMEosCkpG/FJ2agxGKWO1mQsZCQkhMCR9EL8cjQT1QaBADd7jOsRAK1980yRIyIislcpcG8Xf/Rp5w4ASMwsxuqDF1FUUS1xsqZhISORGoMRf57MxvbTuRACiPJ1xn1d20CjahmDr4iIyHrJZDL0DvHAfV39oVHJkVOix3f70nA+1/Yu68FCRgLFFdX44eBFnMoqgUwG9A/3xJDLg7CIiIhul2APRzzcKwi+Lhroa4xYd+wStiXn2NSpJhYyt1lafjm+25+G3BI97FUK3N+1DboHuUFmqUn9REREN8FFo8IDMQHoGugKADh6sQjfH0hHXqltzGpiIXObGI0Cu8/psOZwBiqrjfB2VuOhXoEIdHeQOhoREbVyCrkMAyK8cE8Xf9irFNCVVmHV/nQczyiy+rtos5C5DYorqvHjoYvYn1oAAIj2r50C56LhoF4iIrIeIZ6OmNA7CEHuDqgxCmw5lYPfj19CZbX13kWbV/ZtZmeyS7D5VA6qaoywU8gxOMobET7OUsciIiJqkKNaifu6+uNQWiF2n9PhXG4ZsovTMCzaBwFu1ncWgYVMM9FXG/DXGR1OXqq92JCviwbDO/pyajUREVk9mUyGmGA3BLjZ44/ELBRVVOOnQxnoGuiKvqEeUCms54QOC5lmkJpXhvikHJTqawAAPYLd0KedB2clERGRTfFx0eCRXkHYcSYXiZnFOJJeiNS8Mgzt4AM/rb3U8QCwkLEofY0BO87ocOLyJZ+19ioM7eADf1frONhEREQ3y04px+AoH4R6OWHzqWwUlldj9YGLtT02VjAOmIWMBQghcC63DNtP55p6Yayx+42IiKip2no6YmLvYGw/nYtTWSU4cKEAp5QqqNt0kDQXC5lbVFYD/Ho0E6l55QBqe2HiorytckAUERHRrdCoFBgW7YtQLydsTc5BaZUBvhPfw89JpejeXZpMLGSaqNog4NJnHDZdUsEgyiGXATHBbujZ1p29MERE1KKFeTshwM0eGw+eQUqpDFFedpJl4V/cJnprRz7cBkyCQcgQ4GqPCb2D0TfUk0UMERG1ChqVAjEeBmR+9RSiPFnI2Jy4dg4wlBWip0cNxnRvA3dH6Q4iERGRVGoKMiXdPwuZJuoXqEHGl1MQ5GjkfZKIiIgkwkKmiWQyGURVhdQxiIiIWjUWMkRERGSzWMgQERGRzWIhQ0RERDaLhQwRERHZLBYyREREZLNYyBAREZHNYiFDRERENouFDBEREdksmyhkFi1ahLZt20Kj0aB3797Yt2+f1JGIiIjIClh9IfP9999j1qxZmDdvHg4dOoQuXbpg2LBhyMnJkToaERERSczqC5mPPvoIU6ZMwWOPPYYOHTpg8eLFcHBwwNdffy11NCIiIpKYUuoA11NVVYWDBw9izpw5pmVyuRxxcXFISEho8DV6vR56vd70vKioCABQXFxs0WylpaUAgItnTkBfUW7Rbd9Idto5AEBW6mmcc3Tgfrlf7pf75X65X0n2m3sxBUDt30RL/52t254Q4voNhRXLyMgQAMTu3bvNls+ePVv06tWrwdfMmzdPAOCDDz744IMPPlrAIz09/bq1glX3yDTFnDlzMGvWLNNzo9GI/Px8eHh4QCaTSZjs2oqLixEYGIj09HS4uLhIHYfAY2KteFysE4+L9WkJx0QIgZKSEvj7+1+3nVUXMp6enlAoFMjOzjZbnp2dDV9f3wZfo1aroVarzZa5uro2V0SLcnFxsdkPXEvFY2KdeFysE4+L9bH1Y6LVam/YxqoH+9rZ2SEmJgbx8fGmZUajEfHx8YiNjZUwGREREVkDq+6RAYBZs2Zh0qRJ6NGjB3r16oUFCxagrKwMjz32mNTRiIiISGJWX8g8+OCDyM3NxSuvvIKsrCx07doVGzZsgI+Pj9TRLEatVmPevHn1TomRdHhMrBOPi3XicbE+remYyIS40bwmIiIiIutk1WNkiIiIiK6HhQwRERHZLBYyREREZLNYyBAREZHNYiHTBPPnz0fPnj3h7OwMb29v3HfffUhOTjZrU1lZiWnTpsHDwwNOTk4YO3ZsvQv7paWlYeTIkXBwcIC3tzdmz56Nmpoaszbbtm1D9+7doVarERYWhmXLltXLs2jRIrRt2xYajQa9e/fGvn37LP4z25p33nkHMpkMM2bMMC3jMZFGRkYGJk6cCA8PD9jb26NTp044cOCAab0QAq+88gr8/Pxgb2+PuLg4nDlzxmwb+fn5mDBhAlxcXODq6oonnnjCdL+zOseOHUP//v2h0WgQGBiI9957r16W1atXIzIyEhqNBp06dcL69eub54e2cgaDAXPnzkVISAjs7e0RGhqKN954w+yeNjwuzeuvv/7C6NGj4e/vD5lMhrVr15qtt6b3vzFZJHXLN0RqhYYNGyaWLl0qEhMTxZEjR8Tdd98tgoKCRGlpqanNU089JQIDA0V8fLw4cOCA6NOnj+jbt69pfU1NjejYsaOIi4sThw8fFuvXrxeenp5izpw5pjbnz58XDg4OYtasWeLkyZPi008/FQqFQmzYsMHUZtWqVcLOzk58/fXX4sSJE2LKlCnC1dVVZGdn3543wwrt27dPtG3bVnTu3Fk899xzpuU8Jrdffn6+CA4OFpMnTxZ79+4V58+fFxs3bhRnz541tXnnnXeEVqsVa9euFUePHhX33HOPCAkJERUVFaY2w4cPF126dBF79uwRO3bsEGFhYeLhhx82rS8qKhI+Pj5iwoQJIjExUXz33XfC3t5e/Oc//zG12bVrl1AoFOK9994TJ0+eFC+//LJQqVTi+PHjt+fNsCJvvfWW8PDwEL/99ptISUkRq1evFk5OTuKTTz4xteFxaV7r168XL730kvj5558FALFmzRqz9db0/jcmi5RYyFhATk6OACC2b98uhBCisLBQqFQqsXr1alObpKQkAUAkJCQIIWo/xHK5XGRlZZnafPHFF8LFxUXo9XohhBAvvPCCiI6ONtvXgw8+KIYNG2Z63qtXLzFt2jTTc4PBIPz9/cX8+fMt/4PagJKSEhEeHi42bdokBgwYYCpkeEyk8a9//Uvccccd11xvNBqFr6+veP/9903LCgsLhVqtFt99950QQoiTJ08KAGL//v2mNn/88YeQyWQiIyNDCCHE559/Ltzc3EzHqW7f7du3Nz0fP368GDlypNn+e/fuLaZOnXprP6QNGjlypHj88cfNlo0ZM0ZMmDBBCMHjcrtdXchY0/vfmCxS46klCygqKgIAuLu7AwAOHjyI6upqxMXFmdpERkYiKCgICQkJAICEhAR06tTJ7MJ+w4YNQ3FxMU6cOGFqc+U26trUbaOqqgoHDx40ayOXyxEXF2dq09pMmzYNI0eOrPe+8ZhI49dff0WPHj0wbtw4eHt7o1u3bvjqq69M61NSUpCVlWX2fmm1WvTu3dvsuLi6uqJHjx6mNnFxcZDL5di7d6+pzZ133gk7OztTm2HDhiE5ORkFBQWmNtc7dq1J3759ER8fj9OnTwMAjh49ip07d2LEiBEAeFykZk3vf2OySI2FzC0yGo2YMWMG+vXrh44dOwIAsrKyYGdnV+9mlT4+PsjKyjK1ufrqxHXPb9SmuLgYFRUV0Ol0MBgMDbap20ZrsmrVKhw6dAjz58+vt47HRBrnz5/HF198gfDwcGzcuBFPP/00nn32WSxfvhzA/7+v13u/srKy4O3tbbZeqVTC3d3dIseuNR6Xf//733jooYcQGRkJlUqFbt26YcaMGZgwYQIAHhepWdP735gsUrP6WxRYu2nTpiExMRE7d+6UOkqrlp6ejueeew6bNm2CRqOROg5dZjQa0aNHD7z99tsAgG7duiExMRGLFy/GpEmTJE7Xev3www9YsWIFVq5ciejoaBw5cgQzZsyAv78/jwvZHPbI3ILp06fjt99+w9atWxEQEGBa7uvri6qqKhQWFpq1z87Ohq+vr6nN1TNm6p7fqI2Liwvs7e3h6ekJhULRYJu6bbQWBw8eRE5ODrp37w6lUgmlUont27dj4cKFUCqV8PHx4TGRgJ+fHzp06GC2LCoqCmlpaQD+/3293vvl6+uLnJwcs/U1NTXIz8+3yLFrjcdl9uzZpl6ZTp064W9/+xtmzpxp6s3kcZGWNb3/jckiNRYyTSCEwPTp07FmzRps2bIFISEhZutjYmKgUqkQHx9vWpacnIy0tDTExsYCAGJjY3H8+HGzD+KmTZvg4uJi+sUfGxtrto26NnXbsLOzQ0xMjFkbo9GI+Ph4U5vWYvDgwTh+/DiOHDlievTo0QMTJkww/ZvH5Pbr169fvUsTnD59GsHBwQCAkJAQ+Pr6mr1fxcXF2Lt3r9lxKSwsxMGDB01ttmzZAqPRiN69e5va/PXXX6iurja12bRpE9q3bw83NzdTm+sdu9akvLwccrn5r3+FQgGj0QiAx0Vq1vT+NyaL5KQebWyLnn76aaHVasW2bdvEpUuXTI/y8nJTm6eeekoEBQWJLVu2iAMHDojY2FgRGxtrWl831Xfo0KHiyJEjYsOGDcLLy6vBqb6zZ88WSUlJYtGiRQ1O9VWr1WLZsmXi5MmT4sknnxSurq5mM29aqytnLQnBYyKFffv2CaVSKd566y1x5swZsWLFCuHg4CC+/fZbU5t33nlHuLq6il9++UUcO3ZM3HvvvQ1OM+3WrZvYu3ev2LlzpwgPDzebZlpYWCh8fHzE3/72N5GYmChWrVolHBwc6k0zVSqV4oMPPhBJSUli3rx5rWKab0MmTZok2rRpY5p+/fPPPwtPT0/xwgsvmNrwuDSvkpIScfjwYXH48GEBQHz00Ufi8OHD4sKFC0II63r/G5NFSixkmgBAg4+lS5ea2lRUVIh//OMfws3NTTg4OIj7779fXLp0yWw7qampYsSIEcLe3l54enqKf/7zn6K6utqszdatW0XXrl2FnZ2daNeundk+6nz66aciKChI2NnZiV69eok9e/Y0x49tc64uZHhMpLFu3TrRsWNHoVarRWRkpPjyyy/N1huNRjF37lzh4+Mj1Gq1GDx4sEhOTjZrk5eXJx5++GHh5OQkXFxcxGOPPSZKSkrM2hw9elTccccdQq1WizZt2oh33nmnXpYffvhBRERECDs7OxEdHS1+//13y//ANqC4uFg899xzIigoSGg0GtGuXTvx0ksvmU3T5XFpXlu3bm3w78ikSZOEENb1/jcmi5RkQlxxKUciIiIiG8IxMkRERGSzWMgQERGRzWIhQ0RERDaLhQwRERHZLBYyREREZLNYyBAREZHNYiFDRERENouFDBEREdksFjJERFeZPHky7rvvPqljEFEjsJAhohvKzc3F008/jaCgIKjVavj6+mLYsGHYtWsXAEAmk2Ht2rXShrzs6NGjuOeee+Dt7Q2NRoO2bdviwQcfrHenYCJqGZRSByAi6zd27FhUVVVh+fLlaNeuHbKzsxEfH4+8vDyL7qe6uhoqlarJr8/NzcXgwYMxatQobNy4Ea6urkhNTcWvv/6KsrIyCyYlIqsh9c2eiMi6FRQUCABi27ZtDa4PDg42u+ldcHCwad3nn38u2rVrJ1QqlYiIiBDffPON2WsBiM8//1yMHj1aODg4iHnz5gkhhFi7dq3o1q2bUKvVIiQkRLz66qv1bt7ZkDVr1gilUnndtjU1NeLxxx8Xbdu2FRqNRkRERIgFCxaYtZk0aZK49957Tc8NBoN4++23Ta/p3LmzWL16tWl9fn6+eOSRR4Snp6fQaDQiLCxMfP311zfMS0S3jj0yRHRdTk5OcHJywtq1a9GnTx+o1Wqz9fv374e3tzeWLl2K4cOHQ6FQAADWrFmD5557DgsWLEBcXBx+++03PPbYYwgICMBdd91lev2rr76Kd955BwsWLIBSqcSOHTvw6KOPYuHChejfvz/OnTuHJ598EgAwb96862b19fVFTU0N1qxZgwceeAAymaxeG6PRiICAAKxevRoeHh7YvXs3nnzySfj5+WH8+PENbnf+/Pn49ttvsXjxYoSHh+Ovv/7CxIkT4eXlhQEDBmDu3Lk4efIk/vjjD3h6euLs2bOoqKi4qfeZiJpI6kqKiKzfjz/+KNzc3IRGoxF9+/YVc+bMEUePHjWtByDWrFlj9pq+ffuKKVOmmC0bN26cuPvuu81eN2PGDLM2gwcPFm+//bbZsv/973/Cz8+vUVlffPFFoVQqhbu7uxg+fLh47733RFZW1nVfM23aNDF27FjT8yt7ZCorK4WDg4PYvXu32WueeOIJ8fDDDwshhBg9erR47LHHGpWPiCyLg32J6IbGjh2LzMxM/Prrrxg+fDi2bduG7t27Y9myZdd8TVJSEvr162e2rF+/fkhKSjJb1qNHD7PnR48exeuvv27qCXJycsKUKVNw6dIllJeX3zDrW2+9haysLCxevBjR0dFYvHgxIiMjcfz4cVObRYsWISYmBl5eXnBycsKXX36JtLS0Brd39uxZlJeXY8iQIWaZvvnmG5w7dw4A8PTTT2PVqlXo2rUrXnjhBezevfuGOYnIMljIEFGjaDQaDBkyBHPnzsXu3bsxefLkG57qaQxHR0ez56WlpXjttddw5MgR0+P48eM4c+YMNBpNo7bp4eGBcePG4YMPPkBSUhL8/f3xwQcfAABWrVqF559/Hk888QT+/PNPHDlyBI899hiqqqoa3FZpaSkA4PfffzfLdPLkSfz4448AgBEjRuDChQuYOXMmMjMzMXjwYDz//PNNfUuI6CZwjAwRNUmHDh1MU65VKhUMBoPZ+qioKOzatQuTJk0yLdu1axc6dOhw3e12794dycnJCAsLs0hOOzs7hIaGmmYt7dq1C3379sU//vEPU5u6npWGdOjQAWq1GmlpaRgwYMA123l5eWHSpEmYNGkS+vfvj9mzZ5uKJyJqPixkiOi68vLyMG7cODz++OPo3LkznJ2dceDAAbz33nu49957AQBt27ZFfHw8+vXrB7VaDTc3N8yePRvjx49Ht27dEBcXh3Xr1uHnn3/G5s2br7u/V155BaNGjUJQUBAeeOAByOVyHD16FImJiXjzzTev+9rffvsNq1atwkMPPYSIiAgIIbBu3TqsX78eS5cuBQCEh4fjm2++wcaNGxESEoL//e9/2L9/P0JCQhrcprOzM55//nnMnDkTRqMRd9xxB4qKirBr1y64uLhg0qRJeOWVVxATE4Po6Gjo9Xr89ttviIqKasK7TUQ3TepBOkRk3SorK8W///1v0b17d6HVaoWDg4No3769ePnll0V5ebkQQohff/1VhIWFCaVSedPTr68eJCyEEBs2bBB9+/YV9vb2wsXFRfTq1Ut8+eWXN8x67tw5MWXKFBERESHs7e2Fq6ur6Nmzp1i6dKnZzzN58mSh1WqFq6urePrpp8W///1v0aVLF1Obq6dfG41GsWDBAtG+fXuhUqmEl5eXGDZsmNi+fbsQQog33nhDREVFCXt7e+Hu7i7uvfdecf78+Ru/uUR0y2RCCCF1MUVERETUFBzsS0RERDaLhQwR2YwVK1aYTYG+8hEdHS11PCKSAE8tEZHNKCkpQXZ2doPrVCoVgoODb3MiIpIaCxkiIiKyWTy1RERERDaLhQwRERHZLBYyREREZLNYyBAREZHNYiFDRERENouFDBEREdksFjJERERks/4PaH4KVuID6bkAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Гистограмма распределения цены в обучающей выборке\n", + "sns.histplot(train_data[\"Store_Sales\"], kde=True)\n", + "plt.title('Распределение цены в обучающей выборке')\n", + "plt.show()\n", + "\n", + "# Гистограмма распределения цены в контрольной выборке\n", + "sns.histplot(val_data[\"Store_Sales\"], kde=True)\n", + "plt.title('Распределение цены в контрольной выборке')\n", + "plt.show()\n", + "\n", + "# Гистограмма распределения цены в тестовой выборке\n", + "sns.histplot(test_data[\"Store_Sales\"], kde=True)\n", + "plt.title('Распределение цены в тестовой выборке')\n", + "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "1. Прогнозирование посетителей в магазине:\n", + "## Процесс конструирования признаков\n", "\n", - "Цель: Разработать модель, которая будет предсказывать посещение клиентами магазина на основе его характеристик (размер, распродажи, количество ассортимента).\n", "\n", - "Применение:\n", - "Предсказывание посещения магазинов клиентами.\n", "\n", - "2. Оптимизация параметров магазина:\n", + "### Унитарное кодирование категориальных признаков (one-hot encoding)\n", "\n", - "Цель: Определить оптимальные коэффициенты для различных факторов, влияющих на посещаемость магазина чтобы максимизировать прибыль компании при наименьших затратах на пространство магазина и его ассортиментт.\n", + "One-hot encoding: Преобразование категориальных признаков в бинарные векторы." + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", "\n", - "Применение:\n", - "Создавать магазин с максимальной посещаемостью." + "# Пример категориальных признаков\n", + "categorical_features = [\n", + " \"Store ID \",\n", + " \"Store_Area\"\n", + "]\n", + "\n", + "# Применение one-hot encoding\n", + "train_data_encoded = pd.get_dummies(train_data, columns=categorical_features)\n", + "val_data_encoded = pd.get_dummies(val_data, columns=categorical_features)\n", + "test_data_encoded = pd.get_dummies(test_data, columns=categorical_features)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "1. Прогнозирование посетителей в магазине" + "### Дискретизация числовых признаков " + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Store_AreaStore_Area
01659(1259.667, 1744.333]
11461(1259.667, 1744.333]
21340(1259.667, 1744.333]
31451(1259.667, 1744.333]
41770(1744.333, 2229.0]
51442(1259.667, 1744.333]
61542(1259.667, 1744.333]
71261(1259.667, 1744.333]
81090(775.0, 1259.667]
91030(775.0, 1259.667]
101187(775.0, 1259.667]
111751(1744.333, 2229.0]
121746(1744.333, 2229.0]
131615(1259.667, 1744.333]
141469(1259.667, 1744.333]
151644(1259.667, 1744.333]
161578(1259.667, 1744.333]
171703(1259.667, 1744.333]
181438(1259.667, 1744.333]
191940(1744.333, 2229.0]
\n", + "
" + ], + "text/plain": [ + " Store_Area Store_Area\n", + "0 1659 (1259.667, 1744.333]\n", + "1 1461 (1259.667, 1744.333]\n", + "2 1340 (1259.667, 1744.333]\n", + "3 1451 (1259.667, 1744.333]\n", + "4 1770 (1744.333, 2229.0]\n", + "5 1442 (1259.667, 1744.333]\n", + "6 1542 (1259.667, 1744.333]\n", + "7 1261 (1259.667, 1744.333]\n", + "8 1090 (775.0, 1259.667]\n", + "9 1030 (775.0, 1259.667]\n", + "10 1187 (775.0, 1259.667]\n", + "11 1751 (1744.333, 2229.0]\n", + "12 1746 (1744.333, 2229.0]\n", + "13 1615 (1259.667, 1744.333]\n", + "14 1469 (1259.667, 1744.333]\n", + "15 1644 (1259.667, 1744.333]\n", + "16 1578 (1259.667, 1744.333]\n", + "17 1703 (1259.667, 1744.333]\n", + "18 1438 (1259.667, 1744.333]\n", + "19 1940 (1744.333, 2229.0]" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.preprocessing import OneHotEncoder\n", + "import numpy as np\n", + "\n", + "\n", + "labels = [\"small\", \"medium\", \"large\"]\n", + "num_bins = 3\n", + "\n", + "hist1, bins1 = np.histogram(\n", + " df[\"Store_Area\"].fillna(df[\"Store_Area\"].median()), bins=num_bins\n", + ")\n", + "bins1, hist1\n", + "\n", + "pd.concat([df[\"Store_Area\"], pd.cut(df[\"Store_Area\"], list(bins1))], axis=1).head(20)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Store_AreaStore_Area
01659medium
11461medium
21340medium
31451medium
41770large
51442medium
61542medium
71261medium
81090small
91030small
101187small
111751large
121746large
131615medium
141469medium
151644medium
161578medium
171703medium
181438medium
191940large
\n", + "
" + ], + "text/plain": [ + " Store_Area Store_Area\n", + "0 1659 medium\n", + "1 1461 medium\n", + "2 1340 medium\n", + "3 1451 medium\n", + "4 1770 large\n", + "5 1442 medium\n", + "6 1542 medium\n", + "7 1261 medium\n", + "8 1090 small\n", + "9 1030 small\n", + "10 1187 small\n", + "11 1751 large\n", + "12 1746 large\n", + "13 1615 medium\n", + "14 1469 medium\n", + "15 1644 medium\n", + "16 1578 medium\n", + "17 1703 medium\n", + "18 1438 medium\n", + "19 1940 large" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat(\n", + " [df[\"Store_Area\"], pd.cut(df[\"Store_Area\"], list(bins1), labels=labels)], axis=1\n", + ").head(20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ручной синтез" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Среднее значение поля 'Daily_Customer_Count': 786.3504464285714\n", - " Store ID Store_Area Items_Available Daily_Customer_Count Store_Sales \\\n", - "0 1 1659 1961 530 66490 \n", - "1 2 1461 1752 210 39820 \n", - "2 3 1340 1609 720 54010 \n", - "3 4 1451 1748 620 53730 \n", - "4 5 1770 2111 450 46620 \n", - "\n", - " above_average_count customers_volatility \n", - "0 0 1550 \n", - "1 0 1550 \n", - "2 0 1550 \n", - "3 0 1550 \n", - "4 0 1550 \n" - ] - } - ], + "outputs": [], "source": [ - "import pandas as pd\n", - "\n", - "# Загружаем набор данных\n", - "df = pd.read_csv(\".//static//csv//Stores.csv\")\n", - "\n", - "# Устанавливаем случайное состояние\n", - "random_state = 42\n", - "\n", - "# Рассчитываем среднее значение посещаемости\n", - "average_count = df['Daily_Customer_Count'].mean()\n", - "print(f\"Среднее значение поля 'Daily_Customer_Count': {average_count}\")\n", - "\n", - "# Создаем новую переменную, указывающую, превышает ли посещаемость среднюю\n", - "df[\"above_average_count\"] = (df[\"Daily_Customer_Count\"] > average_count).astype(int)\n", - "\n", - "# Рассчитываем волатильность (разницу между максимальной и минимальной посещаемостью)\n", - "df[\"customers_volatility\"] = df[\"Daily_Customer_Count\"].max() - df[\"Daily_Customer_Count\"].min()\n", - "\n", - "# Выводим первые строки измененной таблицы для проверки\n", - "print(df.head())" + "# Пример синтеза признака коэффициента отношения размера ассортимента к его распродажам\n", + "train_data_encoded[\"koeff\"] = (\n", + " train_data_encoded[\"Items_Available\"] / train_data_encoded[\"Store_Sales\"]\n", + ")\n", + "val_data_encoded[\"koeff\"] = (\n", + " val_data_encoded[\"Items_Available\"] / val_data_encoded[\"Store_Sales\"]\n", + ")\n", + "test_data_encoded[\"koeff\"] = (\n", + " test_data_encoded[\"Items_Available\"] / test_data_encoded[\"Store_Sales\"]\n", + ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "2. Оптимизация параметров магазина:" + "Масштабирование признаков - это процесс преобразования числовых признаков таким образом, чтобы они имели одинаковый масштаб. Это важно для многих алгоритмов машинного обучения, которые чувствительны к масштабу признаков, таких как линейная регрессия, метод опорных векторов (SVM) и нейронные сети." ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Средняя посещаемость для 'Store_Area':\n", - "Store_Area\n", - "775 1090.0\n", - "780 790.0\n", - "854 660.0\n", - "869 850.0\n", - "891 630.0\n", - " ... \n", - "2063 810.0\n", - "2067 790.0\n", - "2169 600.0\n", - "2214 740.0\n", - "2229 660.0\n", - "Name: Daily_Customer_Count, Length: 583, dtype: float64\n", - "\n", - "Средняя посещаемость для 'Items_Available':\n", - "Items_Available\n", - "932 1090.0\n", - "951 790.0\n", - "1018 660.0\n", - "1050 850.0\n", - "1059 870.0\n", - " ... \n", - "2492 790.0\n", - "2493 810.0\n", - "2617 600.0\n", - "2647 740.0\n", - "2667 660.0\n", - "Name: Daily_Customer_Count, Length: 616, dtype: float64\n", - "\n", - "Средняя посещаемость для 'Store_Sales':\n", - "Store_Sales\n", - "14920 990.0\n", - "16370 880.0\n", - "17670 660.0\n", - "20270 870.0\n", - "21300 850.0\n", - " ... \n", - "101820 820.0\n", - "102310 1310.0\n", - "102920 680.0\n", - "105150 980.0\n", - "116320 860.0\n", - "Name: Daily_Customer_Count, Length: 816, dtype: float64\n", - "\n", - "Средняя посещаемость для комбинации 'Store_Area' и 'Items_Available':\n", - "Store_Area Items_Available\n", - "775 932 1090.0\n", - "780 951 790.0\n", - "854 1018 660.0\n", - "869 1050 850.0\n", - "891 1073 630.0\n", - " ... \n", - "2063 2493 810.0\n", - "2067 2492 790.0\n", - "2169 2617 600.0\n", - "2214 2647 740.0\n", - "2229 2667 660.0\n", - "Name: Daily_Customer_Count, Length: 892, dtype: float64\n", - "\n", - "Средняя посещаемость для комбинации 'Store_Sales' и 'Items_Available':\n", - "Store_Sales Items_Available\n", - "14920 1508 990.0\n", - "16370 1790 880.0\n", - "17670 1877 660.0\n", - "20270 1946 870.0\n", - "21300 1686 850.0\n", - " ... \n", - "101820 1758 820.0\n", - "102310 1587 1310.0\n", - "102920 1638 680.0\n", - "105150 2104 980.0\n", - "116320 2414 860.0\n", - "Name: Daily_Customer_Count, Length: 896, dtype: float64\n", - "\n", - "Средняя посещаемость для комбинации 'Store_Sales' и 'Store_Area':\n", - "Store_Sales Store_Area\n", - "14920 1250 990.0\n", - "16370 1477 880.0\n", - "17670 1537 660.0\n", - "20270 1624 870.0\n", - "21300 1397 850.0\n", - " ... \n", - "101820 1486 820.0\n", - "102310 1303 1310.0\n", - "102920 1365 680.0\n", - "105150 1775 980.0\n", - "116320 1989 860.0\n", - "Name: Daily_Customer_Count, Length: 896, dtype: float64\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "import pandas as pd\n", + "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n", "\n", - "# Загружаем набор данных\n", - "df = pd.read_csv(\".//static//csv//Stores.csv\")\n", + "# Пример масштабирования числовых признаков\n", + "numerical_features = [\"Daily_Customer_Count\", \"Items_Available\"]\n", "\n", - "# Устанавливаем случайное состояние\n", - "random_state = 42\n", - "\n", - "# Рассчитываем среднюю посещаемость для каждого значения каждого признака\n", - "for column in [\n", - " \"Store_Area\",\n", - " \"Items_Available\",\n", - " \"Store_Sales\"\n", - "]:\n", - " print(f\"Средняя посещаемость для '{column}':\")\n", - " print(df.groupby(column)[\"Daily_Customer_Count\"].mean())\n", - " print()\n", - "\n", - "\n", - "print(\"Средняя посещаемость для комбинации 'Store_Area' и 'Items_Available':\")\n", - "print(df.groupby([\"Store_Area\", \"Items_Available\"])[\"Daily_Customer_Count\"].mean())\n", - "print()\n", - "\n", - "\n", - "print(\"Средняя посещаемость для комбинации 'Store_Sales' и 'Items_Available':\")\n", - "print(df.groupby([\"Store_Sales\", \"Items_Available\"])[\"Daily_Customer_Count\"].mean())\n", - "print()\n", - "\n", - "\n", - "print(\"Средняя посещаемость для комбинации 'Store_Sales' и 'Store_Area':\")\n", - "print(df.groupby([\"Store_Sales\", \"Store_Area\"])[\"Daily_Customer_Count\"].mean())\n", - "print()" + "scaler = StandardScaler()\n", + "train_data_encoded[numerical_features] = scaler.fit_transform(train_data_encoded[numerical_features])\n", + "val_data_encoded[numerical_features] = scaler.transform(val_data_encoded[numerical_features])\n", + "test_data_encoded[numerical_features] = scaler.transform(test_data_encoded[numerical_features])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Выбор ориентира:\n", - "1. Прогнозирование стоимости страховых взносов:\n", - "Ориентир:\n", - "\n", - "R² (коэффициент детерминации): 0.75 - 0.85\n", - "\n", - "MAE (средняя абсолютная ошибка): 150 - 300 человек\n", - "\n", - "RMSE (среднеквадратичная ошибка): 175 - 315 человек\n" + "### Конструирование признаков с применением фреймворка Featuretools" ] }, { @@ -274,283 +584,62 @@ "execution_count": 70, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "MAE: 241.24369535006045\n", - "MSE: 82946.49105226391\n", - "RMSE: 288.004324711043\n", - "R²: -0.008816097180501359\n", - "Ориентиры для прогнозирования не достигнуты.\n", - "Средняя посещаемость 'Store_Area':\n", - "Store_Area\n", - "775 1090.0\n", - "780 790.0\n", - "854 660.0\n", - "869 850.0\n", - "891 630.0\n", - " ... \n", - "2063 810.0\n", - "2067 790.0\n", - "2169 600.0\n", - "2214 740.0\n", - "2229 660.0\n", - "Name: Daily_Customer_Count, Length: 583, dtype: float64\n", - "\n", - "Средняя посещаемость 'Items_Available':\n", - "Items_Available\n", - "932 1090.0\n", - "951 790.0\n", - "1018 660.0\n", - "1050 850.0\n", - "1059 870.0\n", - " ... \n", - "2492 790.0\n", - "2493 810.0\n", - "2617 600.0\n", - "2647 740.0\n", - "2667 660.0\n", - "Name: Daily_Customer_Count, Length: 616, dtype: float64\n", - "\n", - "Средняя посещаемость 'Store_Sales':\n", - "Store_Sales\n", - "14920 990.0\n", - "16370 880.0\n", - "17670 660.0\n", - "20270 870.0\n", - "21300 850.0\n", - " ... \n", - "101820 820.0\n", - "102310 1310.0\n", - "102920 680.0\n", - "105150 980.0\n", - "116320 860.0\n", - "Name: Daily_Customer_Count, Length: 816, dtype: float64\n", - "\n", - "Средняя стоимость страховых взносов для комбинации 'Store_Area' и 'Items_Available':\n", - "Store_Area Items_Available\n", - "775 932 1090.0\n", - "780 951 790.0\n", - "854 1018 660.0\n", - "869 1050 850.0\n", - "891 1073 630.0\n", - " ... \n", - "2063 2493 810.0\n", - "2067 2492 790.0\n", - "2169 2617 600.0\n", - "2214 2647 740.0\n", - "2229 2667 660.0\n", - "Name: Daily_Customer_Count, Length: 892, dtype: float64\n", - "\n", - "Средняя стоимость страховых взносов для комбинации 'Items_Available' и 'Store_Sales':\n", - "Items_Available Store_Sales\n", - "932 42530 1090.0\n", - "951 25600 790.0\n", - "1018 77740 660.0\n", - "1050 52540 850.0\n", - "1059 75110 870.0\n", - " ... \n", - "2492 70230 790.0\n", - "2493 51480 810.0\n", - "2617 67080 600.0\n", - "2647 65900 740.0\n", - "2667 87410 660.0\n", - "Name: Daily_Customer_Count, Length: 896, dtype: float64\n", - "\n" - ] - }, { "name": "stderr", "output_type": "stream", "text": [ - "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n", - " warnings.warn(\n" + "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\featuretools\\entityset\\entityset.py:1733: UserWarning: index id not found in dataframe, creating new integer column\n", + " warnings.warn(\n", + "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\featuretools\\synthesis\\deep_feature_synthesis.py:169: UserWarning: Only one dataframe in entityset, changing max_depth to 1 since deeper features cannot be created\n", + " warnings.warn(\n", + "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\featuretools\\computational_backends\\feature_set_calculator.py:143: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", + " df = pd.concat([df, default_df], sort=True)\n", + "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\woodwork\\logical_types.py:841: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " series = series.replace(ww.config.get_option(\"nan_values\"), np.nan)\n", + "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\featuretools\\computational_backends\\feature_set_calculator.py:143: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", + " df = pd.concat([df, default_df], sort=True)\n", + "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\woodwork\\logical_types.py:841: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " series = series.replace(ww.config.get_option(\"nan_values\"), np.nan)\n" ] } ], "source": [ - "import pandas as pd\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.linear_model import LinearRegression\n", - "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n", + "import featuretools as ft\n", "\n", - "# Загружаем набор данных\n", - "df = pd.read_csv(\".//static//csv//Stores.csv\")\n", + "# Определение сущностей\n", + "es = ft.EntitySet(id='shop_data')\n", + "es = es.add_dataframe(dataframe_name='shops', dataframe=train_data_encoded, index='id')\n", "\n", "\n", - "# Разделяем данные на признаки (X) и целевую переменную (y)\n", - "X = df.drop(\"Daily_Customer_Count\", axis=1)\n", - "y = df[\"Daily_Customer_Count\"]\n", + "# Генерация признаков\n", + "feature_matrix, feature_defs = ft.dfs(entityset=es, target_dataframe_name='shops', max_depth=2)\n", "\n", - "# Разделяем данные на обучающую и тестовую выборки\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", - "\n", - "# Стандартизируем признаки\n", - "scaler = StandardScaler()\n", - "X_train = scaler.fit_transform(X_train)\n", - "X_test = scaler.transform(X_test)\n", - "\n", - "# Обучаем модель линейной регрессии\n", - "model = LinearRegression()\n", - "model.fit(X_train, y_train)\n", - "\n", - "# Делаем предсказания на тестовой выборке\n", - "y_pred = model.predict(X_test)\n", - "\n", - "# Оцениваем качество модели\n", - "mae = mean_absolute_error(y_test, y_pred)\n", - "mse = mean_squared_error(y_test, y_pred)\n", - "rmse = mean_squared_error(y_test, y_pred, squared=False)\n", - "r2 = r2_score(y_test, y_pred)\n", - "\n", - "print(f\"MAE: {mae}\")\n", - "print(f\"MSE: {mse}\")\n", - "print(f\"RMSE: {rmse}\")\n", - "print(f\"R²: {r2}\")\n", - "\n", - "# Проверяем, достигнуты ли ориентиры\n", - "if r2 >= 0.75 and mae <= 300 and rmse <= 350:\n", - " print(\"Ориентиры для прогнозирования достигнуты!\")\n", - "else:\n", - " print(\"Ориентиры для прогнозирования не достигнуты.\")\n", - "\n", - "\n", - "columns_to_group = [\n", - " \"Store_Area\",\n", - " \"Items_Available\",\n", - " \"Store_Sales\"\n", - "]\n", - "\n", - "# Рассчитываем среднюю посещаемость для каждого значения каждого признака\n", - "for column in columns_to_group:\n", - " print(f\"Средняя посещаемость '{column}':\")\n", - " print(df.groupby(column)[\"Daily_Customer_Count\"].mean())\n", - " print()\n", - "\n", - "# Рассчитываем среднюю посещаемость для комбинаций признаков\n", - "\n", - "print(\n", - " \"Средняя стоимость страховых взносов для комбинации 'Store_Area' и 'Items_Available':\"\n", - ")\n", - "print(df.groupby([\"Store_Area\", \"Items_Available\"])[\"Daily_Customer_Count\"].mean())\n", - "print()\n", - "\n", - "print(\n", - " \"Средняя стоимость страховых взносов для комбинации 'Items_Available' и 'Store_Sales':\"\n", - ")\n", - "print(df.groupby([\"Items_Available\", \"Store_Sales\"])[\"Daily_Customer_Count\"].mean())\n", - "print()" + "# Преобразование признаков для контрольной и тестовой выборок\n", + "val_feature_matrix = ft.calculate_feature_matrix(features=feature_defs, entityset=es, instance_ids=val_data_encoded.index)\n", + "test_feature_matrix = ft.calculate_feature_matrix(features=feature_defs, entityset=es, instance_ids=test_data_encoded.index)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Анализ применимости алгоритмов обучения с учителем для решения поставленных задач:\n", - "1. Прогнозирование посещаемости магазинов:\n", - "Задача: Регрессия\n", + "### Оценка качества каждого набора признаков\n", + "Предсказательная способность\n", + "Метрики: RMSE, MAE, R²\n", "\n", - "Свойства алгоритмов:\n", + "Методы: Обучение модели на обучающей выборке и оценка на контрольной и тестовой выборках.\n", "\n", - "Линейная регрессия:\n", - "Применимость: Хорошо подходит для задач, где зависимость между признаками и целевой переменной линейна.\n", - "Преимущества: Проста в реализации, интерпретируема.\n", - "Недостатки: Может плохо работать, если зависимость нелинейна.\n", + "Скорость вычисления\n", + "Методы: Измерение времени выполнения генерации признаков и обучения модели.\n", "\n", - "Деревья решений (регрессия):\n", - "Применимость: Подходит для задач с нелинейными зависимостями.\n", - "Преимущества: Может обрабатывать категориальные признаки, не требует масштабирования данных.\n", - "Недостатки: Подвержены переобучению, могут давать нестабильные результаты.\n", + "Надежность\n", + "Методы: Кросс-валидация, анализ чувствительности модели к изменениям в данных.\n", "\n", - "Случайный лес (регрессия):\n", - "Применимость: Хорошо подходит для задач с нелинейными зависимостями и большим количеством признаков.\n", - "Преимущества: Устойчив к переобучению, может обрабатывать категориальные признаки.\n", - "Недостатки: Менее интерпретируем, чем линейная регрессия.\n", + "Корреляция\n", + "Методы: Анализ корреляционной матрицы признаков, удаление мультиколлинеарных признаков.\n", "\n", - "Градиентный бустинг (регрессия):\n", - "Применимость: Подходит для задач с нелинейными зависимостями и сложными взаимосвязями между признаками.\n", - "Преимущества: Может достигать высокой точности, устойчив к переобучению.\n", - "Недостатки: Сложнее в настройке, чем случайный лес, менее интерпретируем.\n", - "\n", - "Нейронные сети (регрессия):\n", - "Применимость: Подходит для задач с очень сложными зависимостями и большим количеством данных.\n", - "Преимущества: Может моделировать очень сложные зависимости.\n", - "Недостатки: Требует большого количества данных, сложнее в настройке и интерпретации.\n", - "\n", - "Вывод:\n", - "\n", - "Линейная регрессия: Может быть хорошим выбором для начала, особенно если зависимость между признаками и целевой переменной линейна.\n", - "\n", - "Деревья решений и случайный лес: Подходят для задач с нелинейными зависимостями.\n", - "\n", - "Градиентный бустинг: Может давать более высокую точность, чем случайный лес, но требует больше времени на настройку.\n", - "\n", - "Нейронные сети: Могут быть излишними для этой задачи, если данных недостаточно много.\n", - "\n", - "2. Оптимизация тарифной сетки:\n", - "Задача: Классификация (группировка клиентов по группам риска)\n", - "\n", - "Свойства алгоритмов:\n", - "\n", - "Логистическая регрессия:\n", - "Применимость: Хорошо подходит для задач бинарной классификации, где зависимость между признаками и целевой переменной линейна.\n", - "Преимущества: Проста в реализации, интерпретируема.\n", - "Недостатки: Может плохо работать, если зависимость нелинейна.\n", - "\n", - "Деревья решений (классификация):\n", - "Применимость: Подходит для задач с нелинейными зависимостями.\n", - "Преимущества: Может обрабатывать категориальные признаки, не требует масштабирования данных.\n", - "Недостатки: Подвержены переобучению, могут давать нестабильные результаты.\n", - "\n", - "Случайный лес (классификация):\n", - "Применимость: Хорошо подходит для задач с нелинейными зависимостями и большим количеством признаков.\n", - "Преимущества: Устойчив к переобучению, может обрабатывать категориальные признаки.\n", - "Недостатки: Менее интерпретируем, чем линейная регрессия.\n", - "\n", - "Градиентный бустинг (классификация):\n", - "Применимость: Подходит для задач с нелинейными зависимостями и сложными взаимосвязями между признаками.\n", - "Преимущества: Может достигать высокой точности, устойчив к переобучению.\n", - "Недостатки: Сложнее в настройке, чем случайный лес, менее интерпретируем.\n", - "\n", - "Нейронные сети (классификация):\n", - "Применимость: Подходит для задач с очень сложными зависимостями и большим количеством данных.\n", - "Преимущества: Может моделировать очень сложные зависимости.\n", - "Недостатки: Требует большого количества данных, сложнее в настройке и интерпретации.\n", - "\n", - "Вывод:\n", - "\n", - "Логистическая регрессия: Может быть хорошим выбором для начала, особенно если зависимость между признаками и целевой переменной линейна.\n", - "\n", - "Деревья решений и случайный лес: Подходят для задач с нелинейными зависимостями.\n", - "\n", - "Градиентный бустинг: Может давать более высокую точность, чем случайный лес, но требует больше времени на настройку.\n", - "\n", - "Нейронные сети: Могут быть излишними для этой задачи, если данных недостаточно много.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "1. Прогнозирование стоимости страховых взносов:\n", - "Выбранные модели:\n", - "\n", - "Линейная регрессия\n", - "\n", - "Случайный лес (регрессия)\n", - "\n", - "Градиентный бустинг (регрессия)\n", - "\n", - "2. Оптимизация тарифной сетки:\n", - "Выбранные модели:\n", - "\n", - "Логистическая регрессия\n", - "\n", - "Случайный лес (классификация)\n", - "\n", - "Градиентный бустинг (классификация)" + "Цельность\n", + "Методы: Проверка логической связи между признаками и целевой переменной, интерпретация результатов модели." ] }, { @@ -558,156 +647,38 @@ "execution_count": 71, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Результаты для задачи регрессии:\n", - "Model: Linear Regression\n", - "MAE: 241.24369535006045\n", - "MSE: 82946.49105226391\n", - "RMSE: 288.004324711043\n", - "R²: -0.008816097180501359\n", - "\n" - ] - }, { "name": "stderr", "output_type": "stream", "text": [ - "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n", + "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\featuretools\\entityset\\entityset.py:724: UserWarning: A Woodwork-initialized DataFrame was provided, so the following parameters were ignored: index\n", " warnings.warn(\n", - "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: Random Forest Regression\n", - "MAE: 240.68666666666667\n", - "MSE: 85748.043\n", - "RMSE: 292.82766775016324\n", - "R²: -0.042889276963148815\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: Gradient Boosting Regression\n", - "MAE: 243.53822748120598\n", - "MSE: 86937.70201509264\n", - "RMSE: 294.85200018838714\n", - "R²: -0.05735820927548918\n", - "\n", - "Результаты для задачи классификации:\n", - "Model: Logistic Regression\n", - "Accuracy: 0.43333333333333335\n", - "\n", - "Model: Random Forest Classification\n", - "Accuracy: 0.4777777777777778\n", - "\n", - "Model: Gradient Boosting Classification\n", - "Accuracy: 0.4888888888888889\n", - "\n" + "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\featuretools\\synthesis\\deep_feature_synthesis.py:169: UserWarning: Only one dataframe in entityset, changing max_depth to 1 since deeper features cannot be created\n", + " warnings.warn(\n", + "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\featuretools\\computational_backends\\feature_set_calculator.py:143: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", + " df = pd.concat([df, default_df], sort=True)\n", + "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\woodwork\\logical_types.py:841: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " series = series.replace(ww.config.get_option(\"nan_values\"), np.nan)\n", + "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\featuretools\\computational_backends\\feature_set_calculator.py:143: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", + " df = pd.concat([df, default_df], sort=True)\n", + "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\woodwork\\logical_types.py:841: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n", + " series = series.replace(ww.config.get_option(\"nan_values\"), np.nan)\n" ] } ], "source": [ - "import pandas as pd\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.linear_model import LinearRegression, LogisticRegression\n", - "from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n", - "from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n", - "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score\n", + "import featuretools as ft\n", "\n", - "# Загружаем набор данных\n", - "df = pd.read_csv(\".//static//csv//Stores.csv\")\n", + "# Определение сущностей\n", + "es = ft.EntitySet(id='shop_data')\n", + "es = es.add_dataframe(dataframe_name='shops', dataframe=train_data_encoded, index='id')\n", "\n", + "# Генерация признаков\n", + "feature_matrix, feature_defs = ft.dfs(entityset=es, target_dataframe_name='shops', max_depth=2)\n", "\n", - "# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n", - "X_reg = df.drop(\"Daily_Customer_Count\", axis=1)\n", - "y_reg = df[\"Daily_Customer_Count\"]\n", - "\n", - "# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n", - "X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n", - "\n", - "# Стандартизируем признаки для задачи регрессии\n", - "scaler_reg = StandardScaler()\n", - "X_train_reg = scaler_reg.fit_transform(X_train_reg)\n", - "X_test_reg = scaler_reg.transform(X_test_reg)\n", - "\n", - "# Список моделей для задачи регрессии\n", - "models_reg = {\n", - " \"Linear Regression\": LinearRegression(),\n", - " \"Random Forest Regression\": RandomForestRegressor(),\n", - " \"Gradient Boosting Regression\": GradientBoostingRegressor()\n", - "}\n", - "\n", - "# Обучаем и оцениваем модели для задачи регрессии\n", - "print(\"Результаты для задачи регрессии:\")\n", - "for name, model in models_reg.items():\n", - " model.fit(X_train_reg, y_train_reg)\n", - " y_pred_reg = model.predict(X_test_reg)\n", - " mae = mean_absolute_error(y_test_reg, y_pred_reg)\n", - " mse = mean_squared_error(y_test_reg, y_pred_reg)\n", - " rmse = mean_squared_error(y_test_reg, y_pred_reg, squared=False)\n", - " r2 = r2_score(y_test_reg, y_pred_reg)\n", - " print(f\"Model: {name}\")\n", - " print(f\"MAE: {mae}\")\n", - " print(f\"MSE: {mse}\")\n", - " print(f\"RMSE: {rmse}\")\n", - " print(f\"R²: {r2}\")\n", - " print()\n", - "\n", - "# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n", - "X_class = df.drop(\"Daily_Customer_Count\", axis=1)\n", - "y_class = (df[\"Daily_Customer_Count\"] > df[\"Daily_Customer_Count\"].mean()).astype(int)\n", - "\n", - "# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n", - "X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n", - "\n", - "# Стандартизируем признаки для задачи классификации\n", - "scaler_class = StandardScaler()\n", - "X_train_class = scaler_class.fit_transform(X_train_class)\n", - "X_test_class = scaler_class.transform(X_test_class)\n", - "\n", - "# Список моделей для задачи классификации\n", - "models_class = {\n", - " \"Logistic Regression\": LogisticRegression(),\n", - " \"Random Forest Classification\": RandomForestClassifier(),\n", - " \"Gradient Boosting Classification\": GradientBoostingClassifier()\n", - "}\n", - "\n", - "# Обучаем и оцениваем модели для задачи классификации\n", - "print(\"Результаты для задачи классификации:\")\n", - "for name, model in models_class.items():\n", - " model.fit(X_train_class, y_train_class)\n", - " y_pred_class = model.predict(X_test_class)\n", - " accuracy = accuracy_score(y_test_class, y_pred_class)\n", - " print(f\"Model: {name}\")\n", - " print(f\"Accuracy: {accuracy}\")\n", - " print()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "1. Прогнозирование стоимости страховых взносов:\n", - "Конвейер для задачи регрессии:" + "# Преобразование признаков для контрольной и тестовой выборок\n", + "val_feature_matrix = ft.calculate_feature_matrix(features=feature_defs, entityset=es, instance_ids=val_data_encoded.index)\n", + "test_feature_matrix = ft.calculate_feature_matrix(features=feature_defs, entityset=es, instance_ids=test_data_encoded.index)\n" ] }, { @@ -715,19 +686,6 @@ "execution_count": 72, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Результаты для задачи регрессии:\n", - "Model: Linear Regression\n", - "MAE: 240.99246411452697\n", - "MSE: 82771.10925011222\n", - "RMSE: 287.6996858707222\n", - "R²: -0.0066830595689202354\n", - "\n" - ] - }, { "name": "stderr", "output_type": "stream", @@ -740,1013 +698,112 @@ "name": "stdout", "output_type": "stream", "text": [ - "Model: Random Forest Regression\n", - "MAE: 247.89333333333335\n", - "MSE: 94993.29455555555\n", - "RMSE: 308.2098222892248\n", - "R²: -0.15533235289568936\n", - "\n", - "Model: Gradient Boosting Regression\n", - "MAE: 251.77123469394226\n", - "MSE: 91978.0886332414\n", - "RMSE: 303.27889579270334\n", - "R²: -0.11866065970944106\n", - "\n" + "RMSE: 935.869473619144\n", + "R²: 0.9976677314259463\n", + "MAE: 563.0765217391303\n", + "Cross-validated RMSE: 2423.8868120485813\n", + "Train RMSE: 871.8955293545159\n", + "Train R²: 0.9975555952641544\n", + "Train MAE: 514.1715034965034\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n", - " warnings.warn(\n", "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n", " warnings.warn(\n" ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.linear_model import LinearRegression\n", - "from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.compose import ColumnTransformer\n", - "from sklearn.preprocessing import OneHotEncoder\n", - "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n", - "\n", - "# Загружаем набор данных\n", - "df = pd.read_csv(\".//static//csv//Stores.csv\")\n", - "\n", - "\n", - "numerical_cols = [\"Store_Area\", \"Items_Available\", \"Store_Sales\"]\n", - "\n", - "\n", - "preprocessor = ColumnTransformer(\n", - " transformers=[\n", - " ('num', StandardScaler(), numerical_cols)\n", - " ])\n", - "\n", - "# Список моделей для задачи регрессии\n", - "models_reg = {\n", - " \"Linear Regression\": LinearRegression(),\n", - " \"Random Forest Regression\": RandomForestRegressor(),\n", - " \"Gradient Boosting Regression\": GradientBoostingRegressor()\n", - "}\n", - "\n", - "# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n", - "X_reg = df[numerical_cols]\n", - "y_reg = df[\"Daily_Customer_Count\"]\n", - "\n", - "# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n", - "X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n", - "\n", - "# Обучаем и оцениваем модели для задачи регрессии\n", - "print(\"Результаты для задачи регрессии:\")\n", - "for name, model in models_reg.items():\n", - " pipeline = Pipeline(steps=[\n", - " ('preprocessor', preprocessor),\n", - " ('model', model)\n", - " ])\n", - " pipeline.fit(X_train_reg, y_train_reg)\n", - " y_pred_reg = pipeline.predict(X_test_reg)\n", - " mae = mean_absolute_error(y_test_reg, y_pred_reg)\n", - " mse = mean_squared_error(y_test_reg, y_pred_reg)\n", - " rmse = mean_squared_error(y_test_reg, y_pred_reg, squared=False)\n", - " r2 = r2_score(y_test_reg, y_pred_reg)\n", - " print(f\"Model: {name}\")\n", - " print(f\"MAE: {mae}\")\n", - " print(f\"MSE: {mse}\")\n", - " print(f\"RMSE: {rmse}\")\n", - " print(f\"R²: {r2}\")\n", - " print()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "2. Оптимизация характеристик магазина:\n", - "Конвейер для задачи классификации:" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Результаты для задачи классификации:\n", - "Model: Logistic Regression\n", - "Accuracy: 0.46111111111111114\n", - "\n", - "Model: Random Forest Classification\n", - "Accuracy: 0.4722222222222222\n", - "\n", - "Model: Gradient Boosting Classification\n", - "Accuracy: 0.4722222222222222\n", - "\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.compose import ColumnTransformer\n", - "from sklearn.preprocessing import OneHotEncoder\n", - "from sklearn.metrics import accuracy_score\n", - "\n", - "# Загружаем набор данных\n", - "df = pd.read_csv(\".//static//csv//Stores.csv\")\n", - "\n", - "numerical_cols = [\"Store_Area\", \"Items_Available\", \"Store_Sales\"]\n", - "\n", - "# Создаем преобразователь для категориальных и числовых столбцов\n", - "preprocessor = ColumnTransformer(\n", - " transformers=[\n", - " ('num', StandardScaler(), numerical_cols)\n", - " ])\n", - "\n", - "# Список моделей для задачи классификации\n", - "models_class = {\n", - " \"Logistic Regression\": LogisticRegression(),\n", - " \"Random Forest Classification\": RandomForestClassifier(),\n", - " \"Gradient Boosting Classification\": GradientBoostingClassifier()\n", - "}\n", - "\n", - "# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n", - "X_class = df[numerical_cols]\n", - "y_class = (df[\"Daily_Customer_Count\"] > df[\"Daily_Customer_Count\"].mean()).astype(int)\n", - "\n", - "# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n", - "X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n", - "\n", - "# Обучаем и оцениваем модели для задачи классификации\n", - "print(\"Результаты для задачи классификации:\")\n", - "for name, model in models_class.items():\n", - " pipeline = Pipeline(steps=[\n", - " ('preprocessor', preprocessor),\n", - " ('model', model)\n", - " ])\n", - " pipeline.fit(X_train_class, y_train_class)\n", - " y_pred_class = pipeline.predict(X_test_class)\n", - " accuracy = accuracy_score(y_test_class, y_pred_class)\n", - " print(f\"Model: {name}\")\n", - " print(f\"Accuracy: {accuracy}\")\n", - " print()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "1. Прогнозирование посещения:\n", - "\n", - "Настройка гиперпараметров для задачи регрессии:" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Результаты для задачи регрессии:\n", - "Model: Linear Regression\n", - "Best Parameters: {}\n", - "MAE: 240.99246411452697\n", - "MSE: 82771.10925011222\n", - "RMSE: 287.6996858707222\n", - "R²: -0.0066830595689202354\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n", - " warnings.warn(\n", - "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: Random Forest Regression\n", - "Best Parameters: {'model__max_depth': 10, 'model__n_estimators': 100}\n", - "MAE: 242.55834193962204\n", - "MSE: 87591.55194330998\n", - "RMSE: 295.9586997256712\n", - "R²: -0.06531049664000643\n", - "\n", - "Model: Gradient Boosting Regression\n", - "Best Parameters: {'model__learning_rate': 0.01, 'model__max_depth': 3, 'model__n_estimators': 100}\n", - "MAE: 241.05326789654333\n", - "MSE: 82428.16277986151\n", - "RMSE: 287.1030525436146\n", - "R²: -0.0025120582972431027\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "from sklearn.model_selection import train_test_split, GridSearchCV\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.linear_model import LinearRegression\n", - "from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.compose import ColumnTransformer\n", - "from sklearn.preprocessing import OneHotEncoder\n", - "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n", - "\n", - "# Загружаем набор данных\n", - "df = pd.read_csv(\".//static//csv//Stores.csv\")\n", - "\n", - "# Определяем категориальные и числовые столбцы\n", - "\n", - "numerical_cols = [\"Store_Area\", \"Items_Available\", \"Store_Sales\"]\n", - "\n", - "# Создаем преобразователь для категориальных и числовых столбцов\n", - "preprocessor = ColumnTransformer(\n", - " transformers=[\n", - " ('num', StandardScaler(), numerical_cols)\n", - " ])\n", - "\n", - "# Список моделей и их гиперпараметров для задачи регрессии\n", - "models_reg = {\n", - " \"Linear Regression\": (LinearRegression(), {}),\n", - " \"Random Forest Regression\": (RandomForestRegressor(), {\n", - " 'model__n_estimators': [100, 200],\n", - " 'model__max_depth': [None, 10, 20]\n", - " }),\n", - " \"Gradient Boosting Regression\": (GradientBoostingRegressor(), {\n", - " 'model__n_estimators': [100, 200],\n", - " 'model__learning_rate': [0.01, 0.1],\n", - " 'model__max_depth': [3, 5]\n", - " })\n", - "}\n", - "\n", - "# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n", - "X_reg = df[numerical_cols]\n", - "y_reg = df['Daily_Customer_Count']\n", - "\n", - "# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n", - "X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n", - "\n", - "# Обучаем и оцениваем модели для задачи регрессии\n", - "print(\"Результаты для задачи регрессии:\")\n", - "for name, (model, params) in models_reg.items():\n", - " pipeline = Pipeline(steps=[\n", - " ('preprocessor', preprocessor),\n", - " ('model', model)\n", - " ])\n", - " grid_search = GridSearchCV(pipeline, params, cv=5, scoring='neg_mean_absolute_error')\n", - " grid_search.fit(X_train_reg, y_train_reg)\n", - " best_model = grid_search.best_estimator_\n", - " y_pred_reg = best_model.predict(X_test_reg)\n", - " mae = mean_absolute_error(y_test_reg, y_pred_reg)\n", - " mse = mean_squared_error(y_test_reg, y_pred_reg)\n", - " rmse = mean_squared_error(y_test_reg, y_pred_reg, squared=False)\n", - " r2 = r2_score(y_test_reg, y_pred_reg)\n", - " print(f\"Model: {name}\")\n", - " print(f\"Best Parameters: {grid_search.best_params_}\")\n", - " print(f\"MAE: {mae}\")\n", - " print(f\"MSE: {mse}\")\n", - " print(f\"RMSE: {rmse}\")\n", - " print(f\"R²: {r2}\")\n", - " print()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "2. Оптимизация характеристик:\n", - "\n", - "Настройка гиперпараметров для задачи классификации:" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Результаты для задачи классификации:\n", - "Model: Logistic Regression\n", - "Best Parameters: {'model__C': 10, 'model__solver': 'lbfgs'}\n", - "Accuracy: 0.46111111111111114\n", - "\n", - "Model: Random Forest Classification\n", - "Best Parameters: {'model__max_depth': 10, 'model__n_estimators': 100}\n", - "Accuracy: 0.49444444444444446\n", - "\n", - "Model: Gradient Boosting Classification\n", - "Best Parameters: {'model__learning_rate': 0.1, 'model__max_depth': 3, 'model__n_estimators': 100}\n", - "Accuracy: 0.4777777777777778\n", - "\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "from sklearn.model_selection import train_test_split, GridSearchCV\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.compose import ColumnTransformer\n", - "from sklearn.preprocessing import OneHotEncoder\n", - "from sklearn.metrics import accuracy_score\n", - "\n", - "# Загружаем набор данных\n", - "df = pd.read_csv(\".//static//csv//Stores.csv\")\n", - "\n", - "# Определяем категориальные и числовые столбцы\n", - "\n", - "numerical_cols = [\"Store_Area\", \"Items_Available\", \"Store_Sales\"]\n", - "\n", - "# Создаем преобразователь для категориальных и числовых столбцов\n", - "preprocessor = ColumnTransformer(\n", - " transformers=[\n", - " ('num', StandardScaler(), numerical_cols)\n", - " ])\n", - "\n", - "# Список моделей и их гиперпараметров для задачи классификации\n", - "models_class = {\n", - " \"Logistic Regression\": (LogisticRegression(), {\n", - " 'model__C': [0.1, 1, 10],\n", - " 'model__solver': ['liblinear', 'lbfgs']\n", - " }),\n", - " \"Random Forest Classification\": (RandomForestClassifier(), {\n", - " 'model__n_estimators': [100, 200],\n", - " 'model__max_depth': [None, 10, 20]\n", - " }),\n", - " \"Gradient Boosting Classification\": (GradientBoostingClassifier(), {\n", - " 'model__n_estimators': [100, 200],\n", - " 'model__learning_rate': [0.01, 0.1],\n", - " 'model__max_depth': [3, 5]\n", - " })\n", - "}\n", - "\n", - "# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n", - "X_class = df[numerical_cols]\n", - "y_class = (df['Daily_Customer_Count'] > df['Daily_Customer_Count'].mean()).astype(int)\n", - "\n", - "# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n", - "X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n", - "\n", - "# Обучаем и оцениваем модели для задачи классификации\n", - "print(\"Результаты для задачи классификации:\")\n", - "for name, (model, params) in models_class.items():\n", - " pipeline = Pipeline(steps=[\n", - " ('preprocessor', preprocessor),\n", - " ('model', model)\n", - " ])\n", - " grid_search = GridSearchCV(pipeline, params, cv=5, scoring='accuracy')\n", - " grid_search.fit(X_train_class, y_train_class)\n", - " best_model = grid_search.best_estimator_\n", - " y_pred_class = best_model.predict(X_test_class)\n", - " accuracy = accuracy_score(y_test_class, y_pred_class)\n", - " print(f\"Model: {name}\")\n", - " print(f\"Best Parameters: {grid_search.best_params_}\")\n", - " print(f\"Accuracy: {accuracy}\")\n", - " print()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "1. Прогнозирование посещаемости::\n", - "Задача: Регрессия\n", - "\n", - "Выбор метрик:\n", - "\n", - "MAE (Mean Absolute Error): Средняя абсолютная ошибка. Показывает среднее отклонение предсказанных значений от фактических. Эта метрика легко интерпретируется, так как она измеряется в тех же единицах, что и целевая переменная \n", - "\n", - "MSE (Mean Squared Error): Среднеквадратичная ошибка. Показывает среднее квадратичное отклонение предсказанных значений от фактических. Эта метрика чувствительна к выбросам, так как ошибки возводятся в квадрат.\n", - "\n", - "RMSE (Root Mean Squared Error): Квадратный корень из среднеквадратичной ошибки. Показывает среднее отклонение предсказанных значений от фактических в тех же единицах, что и целевая переменная. Эта метрика также чувствительна к выбросам, но легче интерпретируется, чем MSE.\n", - "\n", - "R² (R-squared): Коэффициент детерминации. Показывает, какую долю дисперсии целевой переменной объясняет модель. Значение R² близкое к 1 указывает на хорошее качество модели.\n", - "\n", - "Обоснование:\n", - "\n", - "MAE: Хорошо подходит для задач, где важно понимать среднее отклонение предсказаний от фактических значений.\n", - "\n", - "MSE и RMSE: Полезны для задач, где важно минимизировать влияние выбросов, так как они возводят ошибки в квадрат.\n", - "\n", - "R²: Позволяет оценить, насколько хорошо модель объясняет вариацию целевой переменной.\n", - "\n", - "2. Оптимизация характеристик:\n", - "Задача: Классификация\n", - "\n", - "Выбор метрик:\n", - "\n", - "Accuracy: Доля правильных предсказаний среди всех предсказаний. Эта метрика показывает общую точность модели.\n", - "\n", - "Precision: Доля правильных положительных предсказаний среди всех положительных предсказаний. Эта метрика важна, если важно минимизировать количество ложноположительных результатов.\n", - "\n", - "Recall (Sensitivity): Доля правильных положительных предсказаний среди всех фактических положительных случаев. Эта метрика важна, если важно минимизировать количество ложноотрицательных результатов.\n", - "\n", - "F1-score: Гармоническое среднее между precision и recall. Эта метрика показывает баланс между precision и recall.\n", - "\n", - "Обоснование:\n", - "\n", - "Accuracy: Хорошо подходит для задач, где классы сбалансированы.\n", - "\n", - "Precision и Recall: Важны для задач, где важно минимизировать ошибки определенного типа (ложноположительные или ложноотрицательные).\n", - "\n", - "F1-score: Позволяет оценить баланс между precision и recall." - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Результаты для задачи регрессии:\n", - "Model: Linear Regression\n", - "Best Parameters: {}\n", - "MAE: 240.99246411452697\n", - "MSE: 82771.10925011222\n", - "RMSE: 287.6996858707222\n", - "R²: -0.0066830595689202354\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n", - " warnings.warn(\n", - "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: Random Forest Regression\n", - "Best Parameters: {'model__max_depth': 10, 'model__n_estimators': 200}\n", - "MAE: 244.5229418633195\n", - "MSE: 87788.51054250356\n", - "RMSE: 296.29125964581465\n", - "R²: -0.06770595668688673\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: Gradient Boosting Regression\n", - "Best Parameters: {'model__learning_rate': 0.01, 'model__max_depth': 3, 'model__n_estimators': 100}\n", - "MAE: 240.99176421026533\n", - "MSE: 82412.10586641222\n", - "RMSE: 287.075087505712\n", - "R²: -0.002316770075243779\n", - "\n", - "Результаты для задачи классификации:\n", - "Model: Logistic Regression\n", - "Best Parameters: {'model__C': 10, 'model__solver': 'lbfgs'}\n", - "Accuracy: 0.46111111111111114\n", - "Precision: 0.475\n", - "Recall: 0.2\n", - "F1-score: 0.2814814814814815\n", - "\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: Random Forest Classification\n", - "Best Parameters: {'model__max_depth': 10, 'model__n_estimators': 200}\n", - "Accuracy: 0.4888888888888889\n", - "Precision: 0.5211267605633803\n", - "Recall: 0.3894736842105263\n", - "F1-score: 0.4457831325301205\n", - "\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: Gradient Boosting Classification\n", - "Best Parameters: {'model__learning_rate': 0.1, 'model__max_depth': 3, 'model__n_estimators': 100}\n", - "Accuracy: 0.4722222222222222\n", - "Precision: 0.5\n", - "Recall: 0.42105263157894735\n", - "F1-score: 0.45714285714285713\n", - "\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import pandas as pd\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error\n", + "from sklearn.model_selection import cross_val_score\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", - "from sklearn.model_selection import train_test_split, GridSearchCV\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.linear_model import LinearRegression, LogisticRegression\n", - "from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n", - "from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.compose import ColumnTransformer\n", - "from sklearn.preprocessing import OneHotEncoder\n", - "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay\n", "\n", - "# Загружаем набор данных\n", - "df = pd.read_csv(\".//static//csv//Stores.csv\")\n", + "# Удаление строк с NaN\n", + "feature_matrix = feature_matrix.dropna()\n", + "val_feature_matrix = val_feature_matrix.dropna()\n", + "test_feature_matrix = test_feature_matrix.dropna()\n", + "\n", + "# Разделение данных на обучающую и тестовую выборки\n", + "X_train = feature_matrix.drop(\"Store_Sales\", axis=1)\n", + "y_train = feature_matrix[\"Store_Sales\"]\n", + "X_val = val_feature_matrix.drop(\"Store_Sales\", axis=1)\n", + "y_val = val_feature_matrix[\"Store_Sales\"]\n", + "X_test = test_feature_matrix.drop(\"Store_Sales\", axis=1)\n", + "y_test = test_feature_matrix[\"Store_Sales\"]\n", + "\n", + "# Выбор модели\n", + "model = RandomForestRegressor(random_state=42)\n", + "\n", + "# Обучение модели\n", + "model.fit(X_train, y_train)\n", + "\n", + "# Предсказание и оценка\n", + "y_pred = model.predict(X_test)\n", + "\n", + "rmse = mean_squared_error(y_test, y_pred, squared=False)\n", + "r2 = r2_score(y_test, y_pred)\n", + "mae = mean_absolute_error(y_test, y_pred)\n", + "\n", + "print(f\"RMSE: {rmse}\")\n", + "print(f\"R²: {r2}\")\n", + "print(f\"MAE: {mae}\")\n", + "\n", + "# Кросс-валидация\n", + "scores = cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')\n", + "rmse_cv = (-scores.mean())**0.5\n", + "print(f\"Cross-validated RMSE: {rmse_cv}\")\n", + "\n", + "# Анализ важности признаков\n", + "feature_importances = model.feature_importances_\n", + "feature_names = X_train.columns\n", "\n", "\n", - "numerical_cols = [\"Store_Area\", \"Items_Available\", \"Store_Sales\"]\n", + "# Проверка на переобучение\n", + "y_train_pred = model.predict(X_train)\n", "\n", - "# Создаем преобразователь для категориальных и числовых столбцов\n", - "preprocessor = ColumnTransformer(\n", - " transformers=[\n", - " ('num', StandardScaler(), numerical_cols)\n", - " ])\n", + "rmse_train = mean_squared_error(y_train, y_train_pred, squared=False)\n", + "r2_train = r2_score(y_train, y_train_pred)\n", + "mae_train = mean_absolute_error(y_train, y_train_pred)\n", "\n", - "# Список моделей и их гиперпараметров для задачи регрессии\n", - "models_reg = {\n", - " \"Linear Regression\": (LinearRegression(), {}),\n", - " \"Random Forest Regression\": (RandomForestRegressor(), {\n", - " 'model__n_estimators': [100, 200],\n", - " 'model__max_depth': [None, 10, 20]\n", - " }),\n", - " \"Gradient Boosting Regression\": (GradientBoostingRegressor(), {\n", - " 'model__n_estimators': [100, 200],\n", - " 'model__learning_rate': [0.01, 0.1],\n", - " 'model__max_depth': [3, 5]\n", - " })\n", - "}\n", + "print(f\"Train RMSE: {rmse_train}\")\n", + "print(f\"Train R²: {r2_train}\")\n", + "print(f\"Train MAE: {mae_train}\")\n", "\n", - "# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n", - "X_reg = df[numerical_cols]\n", - "y_reg = df['Daily_Customer_Count']\n", - "\n", - "# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n", - "X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n", - "\n", - "# Обучаем и оцениваем модели для задачи регрессии\n", - "print(\"Результаты для задачи регрессии:\")\n", - "for name, (model, params) in models_reg.items():\n", - " pipeline = Pipeline(steps=[\n", - " ('preprocessor', preprocessor),\n", - " ('model', model)\n", - " ])\n", - " grid_search = GridSearchCV(pipeline, params, cv=5, scoring='neg_mean_absolute_error')\n", - " grid_search.fit(X_train_reg, y_train_reg)\n", - " best_model = grid_search.best_estimator_\n", - " y_pred_reg = best_model.predict(X_test_reg)\n", - " mae = mean_absolute_error(y_test_reg, y_pred_reg)\n", - " mse = mean_squared_error(y_test_reg, y_pred_reg)\n", - " rmse = mean_squared_error(y_test_reg, y_pred_reg, squared=False)\n", - " r2 = r2_score(y_test_reg, y_pred_reg)\n", - " print(f\"Model: {name}\")\n", - " print(f\"Best Parameters: {grid_search.best_params_}\")\n", - " print(f\"MAE: {mae}\")\n", - " print(f\"MSE: {mse}\")\n", - " print(f\"RMSE: {rmse}\")\n", - " print(f\"R²: {r2}\")\n", - " print()\n", - "\n", - "# Список моделей и их гиперпараметров для задачи классификации\n", - "models_class = {\n", - " \"Logistic Regression\": (LogisticRegression(), {\n", - " 'model__C': [0.1, 1, 10],\n", - " 'model__solver': ['liblinear', 'lbfgs']\n", - " }),\n", - " \"Random Forest Classification\": (RandomForestClassifier(), {\n", - " 'model__n_estimators': [100, 200],\n", - " 'model__max_depth': [None, 10, 20]\n", - " }),\n", - " \"Gradient Boosting Classification\": (GradientBoostingClassifier(), {\n", - " 'model__n_estimators': [100, 200],\n", - " 'model__learning_rate': [0.01, 0.1],\n", - " 'model__max_depth': [3, 5]\n", - " })\n", - "}\n", - "\n", - "# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n", - "X_class = df[numerical_cols]\n", - "y_class = (df['Daily_Customer_Count'] > df['Daily_Customer_Count'].mean()).astype(int)\n", - "\n", - "# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n", - "X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n", - "\n", - "# Обучаем и оцениваем модели для задачи классификации\n", - "print(\"Результаты для задачи классификации:\")\n", - "for name, (model, params) in models_class.items():\n", - " pipeline = Pipeline(steps=[\n", - " ('preprocessor', preprocessor),\n", - " ('model', model)\n", - " ])\n", - " grid_search = GridSearchCV(pipeline, params, cv=5, scoring='accuracy')\n", - " grid_search.fit(X_train_class, y_train_class)\n", - " best_model = grid_search.best_estimator_\n", - " y_pred_class = best_model.predict(X_test_class)\n", - " accuracy = accuracy_score(y_test_class, y_pred_class)\n", - " precision = precision_score(y_test_class, y_pred_class)\n", - " recall = recall_score(y_test_class, y_pred_class)\n", - " f1 = f1_score(y_test_class, y_pred_class)\n", - " print(f\"Model: {name}\")\n", - " print(f\"Best Parameters: {grid_search.best_params_}\")\n", - " print(f\"Accuracy: {accuracy}\")\n", - " print(f\"Precision: {precision}\")\n", - " print(f\"Recall: {recall}\")\n", - " print(f\"F1-score: {f1}\")\n", - " print()\n", - "\n", - " # Визуализация матрицы ошибок\n", - " cm = confusion_matrix(y_test_class, y_pred_class)\n", - " disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Less', 'More'])\n", - " disp.plot(cmap=plt.cm.Blues)\n", - " plt.title(f'Confusion Matrix for {name}')\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Давайте проанализируем полученные значения метрик и определим, являются ли они нормальными или их можно улучшить.\n", - "\n", - "### Оценка смещения и дисперсии для задачи регрессии:\n", - "\n", - "### Вывод для задачи регрессии:\n", - "\n", - "- **Random Forest Regression** демонстрирует наилучшие результаты по метрикам MAE и R², что указывает на высокую точность и стабильность модели.\n", - "- **Linear Regression** и **Gradient Boosting Regression** также показывают хорошие результаты, но уступают случайному лесу.\n", - "\n", - "### Вывод для задачи классификации:\n", - "\n", - "- **Random Forest Classification** демонстрирует наилучшие результаты по всем метрикам (Accuracy, Precision, Recall, F1-score), что указывает на высокую точность и стабильность модели.\n", - "- **Logistic Regression** и **Gradient Boosting Classification** также показывают хорошие результаты, но уступают случайному лесу.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Для оценки смещения (bias) и дисперсии (variance) моделей можно использовать метод перекрестной проверки (cross-validation). Этот метод позволяет оценить, насколько хорошо модель обобщается на новых данных.\n", - "\n", - "Оценка смещения и дисперсии для задачи регрессии:\n", - "Для задачи регрессии мы будем использовать метрики MAE (Mean Absolute Error) и R² (R-squared) для оценки смещения и дисперсии.\n", - "\n", - "Оценка смещения и дисперсии для задачи классификации:\n", - "Для задачи классификации мы будем использовать метрики Accuracy, Precision, Recall и F1-score для оценки смещения и дисперсии.\n", - "\n", - "Пример кода для оценки смещения и дисперсии:" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Оценка смещения и дисперсии для задачи регрессии:\n", - "Model: Linear Regression\n", - "MAE (Cross-Validation): Mean = 214.80552977981765, Std = 10.606512171542404\n", - "R² (Cross-Validation): Mean = -0.013983192308878256, Std = 0.013712813782736416\n", - "\n", - "Model: Random Forest Regression\n", - "MAE (Cross-Validation): Mean = 228.04118684047177, Std = 8.752812633688961\n", - "R² (Cross-Validation): Mean = -0.16773777274246124, Std = 0.07089525362334798\n", - "\n", - "Model: Gradient Boosting Regression\n", - "MAE (Cross-Validation): Mean = 223.01691070195233, Std = 7.525579341977898\n", - "R² (Cross-Validation): Mean = -0.1007213971850566, Std = 0.039722456407795335\n", - "\n", - "Оценка смещения и дисперсии для задачи классификации:\n", - "Model: Logistic Regression\n", - "Accuracy (Cross-Validation): Mean = 0.5055307262569833, Std = 0.03499561917769727\n", - "Precision (Cross-Validation): Mean = 0.5065468552510806, Std = 0.054654647753909255\n", - "Recall (Cross-Validation): Mean = 0.36069969356486214, Std = 0.041986149284426406\n", - "F1-score (Cross-Validation): Mean = 0.41699563277139867, Std = 0.022647838103859376\n", - "\n", - "Model: Random Forest Classification\n", - "Accuracy (Cross-Validation): Mean = 0.47995654872749843, Std = 0.02347679112801281\n", - "Precision (Cross-Validation): Mean = 0.4767585025913199, Std = 0.027370716762614142\n", - "Recall (Cross-Validation): Mean = 0.44468845760980596, Std = 0.05499181588361489\n", - "F1-score (Cross-Validation): Mean = 0.47024453023626417, Std = 0.04502303274822186\n", - "\n", - "Model: Gradient Boosting Classification\n", - "Accuracy (Cross-Validation): Mean = 0.5178895096213532, Std = 0.027332603426564073\n", - "Precision (Cross-Validation): Mean = 0.5084858601973745, Std = 0.022621295137266188\n", - "Recall (Cross-Validation): Mean = 0.49668028600612874, Std = 0.04700469023993552\n", - "F1-score (Cross-Validation): Mean = 0.5055891495803455, Std = 0.03687694960165771\n", - "\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "from sklearn.model_selection import cross_val_score\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.linear_model import LinearRegression, LogisticRegression\n", - "from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n", - "from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.compose import ColumnTransformer\n", - "from sklearn.preprocessing import OneHotEncoder\n", - "\n", - "# Загружаем набор данных\n", - "df = pd.read_csv(\".//static//csv//Stores.csv\")\n", - "\n", - "# Определяем категориальные и числовые столбцы\n", - "\n", - "numerical_cols = [\"Store_Area\", \"Items_Available\", \"Store_Sales\"]\n", - "\n", - "# Создаем преобразователь для категориальных и числовых столбцов\n", - "preprocessor = ColumnTransformer(\n", - " transformers=[\n", - " ('num', StandardScaler(), numerical_cols)\n", - " ])\n", - "\n", - "# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n", - "X_reg = df[numerical_cols]\n", - "y_reg = df['Daily_Customer_Count']\n", - "\n", - "# Список моделей для задачи регрессии\n", - "models_reg = {\n", - " \"Linear Regression\": LinearRegression(),\n", - " \"Random Forest Regression\": RandomForestRegressor(),\n", - " \"Gradient Boosting Regression\": GradientBoostingRegressor()\n", - "}\n", - "\n", - "# Оценка смещения и дисперсии для задачи регрессии\n", - "print(\"Оценка смещения и дисперсии для задачи регрессии:\")\n", - "for name, model in models_reg.items():\n", - " pipeline = Pipeline(steps=[\n", - " ('preprocessor', preprocessor),\n", - " ('model', model)\n", - " ])\n", - " mae_scores = -cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='neg_mean_absolute_error')\n", - " r2_scores = cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='r2')\n", - " print(f\"Model: {name}\")\n", - " print(f\"MAE (Cross-Validation): Mean = {mae_scores.mean()}, Std = {mae_scores.std()}\")\n", - " print(f\"R² (Cross-Validation): Mean = {r2_scores.mean()}, Std = {r2_scores.std()}\")\n", - " print()\n", - "\n", - "# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n", - "X_class = df[numerical_cols]\n", - "y_class = (df['Daily_Customer_Count'] > df['Daily_Customer_Count'].mean()).astype(int)\n", - "\n", - "# Список моделей для задачи классификации\n", - "models_class = {\n", - " \"Logistic Regression\": LogisticRegression(),\n", - " \"Random Forest Classification\": RandomForestClassifier(),\n", - " \"Gradient Boosting Classification\": GradientBoostingClassifier()\n", - "}\n", - "\n", - "# Оценка смещения и дисперсии для задачи классификации\n", - "print(\"Оценка смещения и дисперсии для задачи классификации:\")\n", - "for name, model in models_class.items():\n", - " pipeline = Pipeline(steps=[\n", - " ('preprocessor', preprocessor),\n", - " ('model', model)\n", - " ])\n", - " accuracy_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='accuracy')\n", - " precision_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='precision')\n", - " recall_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='recall')\n", - " f1_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='f1')\n", - " print(f\"Model: {name}\")\n", - " print(f\"Accuracy (Cross-Validation): Mean = {accuracy_scores.mean()}, Std = {accuracy_scores.std()}\")\n", - " print(f\"Precision (Cross-Validation): Mean = {precision_scores.mean()}, Std = {precision_scores.std()}\")\n", - " print(f\"Recall (Cross-Validation): Mean = {recall_scores.mean()}, Std = {recall_scores.std()}\")\n", - " print(f\"F1-score (Cross-Validation): Mean = {f1_scores.mean()}, Std = {f1_scores.std()}\")\n", - " print()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "from sklearn.model_selection import cross_val_score\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.linear_model import LinearRegression, LogisticRegression\n", - "from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n", - "from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.compose import ColumnTransformer\n", - "from sklearn.preprocessing import OneHotEncoder\n", - "\n", - "# Загружаем набор данных\n", - "df = pd.read_csv(\".//static//csv//Stores.csv\")\n", - "\n", - "# Определяем категориальные и числовые столбцы\n", - "numerical_cols = [\"Store_Area\", \"Items_Available\", \"Store_Sales\"]\n", - "\n", - "# Создаем преобразователь для категориальных и числовых столбцов\n", - "preprocessor = ColumnTransformer(\n", - " transformers=[\n", - " ('num', StandardScaler(), numerical_cols)\n", - " ])\n", - "\n", - "# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n", - "X_reg = df[numerical_cols]\n", - "y_reg = df['Daily_Customer_Count']\n", - "\n", - "# Список моделей для задачи регрессии\n", - "models_reg = {\n", - " \"Linear Regression\": LinearRegression(),\n", - " \"Random Forest Regression\": RandomForestRegressor(),\n", - " \"Gradient Boosting Regression\": GradientBoostingRegressor()\n", - "}\n", - "\n", - "# Оценка смещения и дисперсии для задачи регрессии\n", - "mae_means = []\n", - "mae_stds = []\n", - "r2_means = []\n", - "r2_stds = []\n", - "\n", - "for name, model in models_reg.items():\n", - " pipeline = Pipeline(steps=[\n", - " ('preprocessor', preprocessor),\n", - " ('model', model)\n", - " ])\n", - " mae_scores = -cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='neg_mean_absolute_error')\n", - " r2_scores = cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='r2')\n", - " mae_means.append(mae_scores.mean())\n", - " mae_stds.append(mae_scores.std())\n", - " r2_means.append(r2_scores.mean())\n", - " r2_stds.append(r2_scores.std())\n", - "\n", - "# Визуализация результатов для задачи регрессии\n", - "fig, ax = plt.subplots(1, 2, figsize=(12, 6))\n", - "\n", - "ax[0].bar(models_reg.keys(), mae_means, yerr=mae_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n", - "ax[0].set_ylabel('MAE')\n", - "ax[0].set_title('Mean Absolute Error (MAE) for Regression Models')\n", - "ax[0].yaxis.grid(True)\n", - "\n", - "ax[1].bar(models_reg.keys(), r2_means, yerr=r2_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n", - "ax[1].set_ylabel('R²')\n", - "ax[1].set_title('R-squared (R²) for Regression Models')\n", - "ax[1].yaxis.grid(True)\n", - "\n", - "plt.tight_layout()\n", - "plt.show()\n", - "\n", - "# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n", - "X_class = df[numerical_cols]\n", - "y_class = (df['Daily_Customer_Count'] > df['Daily_Customer_Count'].mean()).astype(int)\n", - "\n", - "# Список моделей для задачи классификации\n", - "models_class = {\n", - " \"Logistic Regression\": LogisticRegression(),\n", - " \"Random Forest Classification\": RandomForestClassifier(),\n", - " \"Gradient Boosting Classification\": GradientBoostingClassifier()\n", - "}\n", - "\n", - "# Оценка смещения и дисперсии для задачи классификации\n", - "accuracy_means = []\n", - "accuracy_stds = []\n", - "precision_means = []\n", - "precision_stds = []\n", - "recall_means = []\n", - "recall_stds = []\n", - "f1_means = []\n", - "f1_stds = []\n", - "\n", - "for name, model in models_class.items():\n", - " pipeline = Pipeline(steps=[\n", - " ('preprocessor', preprocessor),\n", - " ('model', model)\n", - " ])\n", - " accuracy_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='accuracy')\n", - " precision_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='precision')\n", - " recall_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='recall')\n", - " f1_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='f1')\n", - " accuracy_means.append(accuracy_scores.mean())\n", - " accuracy_stds.append(accuracy_scores.std())\n", - " precision_means.append(precision_scores.mean())\n", - " precision_stds.append(precision_scores.std())\n", - " recall_means.append(recall_scores.mean())\n", - " recall_stds.append(recall_scores.std())\n", - " f1_means.append(f1_scores.mean())\n", - " f1_stds.append(f1_scores.std())\n", - "\n", - "# Визуализация результатов для задачи классификации\n", - "fig, ax = plt.subplots(2, 2, figsize=(12, 12))\n", - "\n", - "ax[0, 0].bar(models_class.keys(), accuracy_means, yerr=accuracy_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n", - "ax[0, 0].set_ylabel('Accuracy')\n", - "ax[0, 0].set_title('Accuracy for Classification Models')\n", - "ax[0, 0].yaxis.grid(True)\n", - "\n", - "ax[0, 1].bar(models_class.keys(), precision_means, yerr=precision_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n", - "ax[0, 1].set_ylabel('Precision')\n", - "ax[0, 1].set_title('Precision for Classification Models')\n", - "ax[0, 1].yaxis.grid(True)\n", - "\n", - "ax[1, 0].bar(models_class.keys(), recall_means, yerr=recall_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n", - "ax[1, 0].set_ylabel('Recall')\n", - "ax[1, 0].set_title('Recall for Classification Models')\n", - "ax[1, 0].yaxis.grid(True)\n", - "\n", - "ax[1, 1].bar(models_class.keys(), f1_means, yerr=f1_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n", - "ax[1, 1].set_ylabel('F1-score')\n", - "ax[1, 1].set_title('F1-score for Classification Models')\n", - "ax[1, 1].yaxis.grid(True)\n", - "\n", - "plt.tight_layout()\n", + "# Визуализация результатов\n", + "plt.figure(figsize=(10, 6))\n", + "plt.scatter(y_test, y_pred, alpha=0.5)\n", + "plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)\n", + "plt.xlabel('Actual Sales')\n", + "plt.ylabel('Predicted Sales')\n", + "plt.title(\"Actual vs Predicted Sales\")\n", "plt.show()" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Точность предсказаний: Модель показывает довольно высокий R² (0.9975), что указывает на хорошее объяснение вариации распродаж. Значения RMSE и MAE довольно низки, что говорит о том, что модель достаточно точно предсказывает цены.\n", + "\n", + "Переобучение: Разница между RMSE на обучающей и тестовой выборках не очень большая, что указывает на то, что переобучение не является критическим. Однако, стоит быть осторожным и продолжать мониторинг этого показателя.\n" + ] } ], "metadata": {