2024-11-01 21:16:02 +04:00
{
"cells": [
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 1,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y',\n",
" 'z'],\n",
" dtype='object') \n",
"\n",
"<bound method DataFrame.info of carat cut color clarity depth table price x y z\n",
"1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43\n",
"2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31\n",
"3 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31\n",
"4 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63\n",
"5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75\n",
"... ... ... ... ... ... ... ... ... ... ...\n",
"53939 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74\n",
"53940 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64\n",
"53941 0.71 Premium E SI1 60.5 55.0 2756 5.79 5.74 3.49\n",
"53942 0.71 Premium F SI1 59.8 62.0 2756 5.74 5.73 3.43\n",
"53943 0.70 Very Good E VS2 60.5 59.0 2757 5.71 5.76 3.47\n",
"\n",
"[53943 rows x 10 columns]> \n",
"\n"
]
}
],
"source": [
"import numpy as np\n",
"import pandas as pd \n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"\n",
"df = pd.read_csv(\"..//static//csv//DiamondsPrices2022.csv\", index_col=\"Unnamed: 0\")\n",
"\n",
"print(df.columns, \"\\n\")\n",
"\n",
"print(df.info, \"\\n\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Бизнес цели\n",
"1. Оптимизация ценообразования. Анализ между характиристик и цен. Это поможет опеделять цену камня в зависимости от е г о качеств.\n",
"2. Разделение камней для разного сигмента рынка. В зависимости от характиристик камня делать е г о более доступным или премиальным."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Цели технического проекта\n",
"\n",
"Для первой БЦ\n",
"1. Разработка модели предсказания стоимости бриллианта\n",
"2. Анализ факторов, влияющих на стоимость\n",
"\n",
"Для второй БЦ\n",
"1. Создание системы кластеризации бриллиантов"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Нужно выявить какие проблемы есть в данных.\n",
"Начнем с поиском зашумленности"
]
},
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 2,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiUAAAGJCAYAAABVW0PjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABQDUlEQVR4nO3deXhU9aH/8ffMJJmsk5CQBUyAsIRVRAExblVAUkWvVNqrlgrWpdUHaQWrllul1C609iJaRW1dwF8rRb1WW0FBBMEFkEWiLLKFYIBsQEgm+zIzvz8mMyRkIcskc5J8Xs8zD5lzvnPOdybCfPyuJpfL5UJERETEz8z+roCIiIgIKJSIiIiIQSiUiIiIiCEolIiIiIghKJSIiIiIISiUiIiIiCEolIiIiIghKJSIiIiIISiUiIiIiCEolIiIiIghKJSI9GDLly/HZDKxY8eOBucGDBjAjTfe2KrXSOu9//77LFy40N/VEDEEhRIRET96//33+c1vfuPvaogYgkKJiIgPlZaW+rsKIl2WQomItMuyZcswmUzs2rWrwbk//OEPWCwWTpw4AcA111yDyWRi2rRpDcr+9Kc/xWQyMWrUqPPec8CAAZhMJkwmE2azmYSEBG699VaysrJaVOcPPviA73znO0RERGCz2Rg/fjwrVqzwnv/000/5wQ9+QL9+/bBarSQlJTF37lzKy8vrXefOO+8kPDycjIwMbrjhBiIiIpgxY0aLr3HnnXeydOlSAO/7MZlMLXoPIt1RgL8rICJd2/e//31mz57N66+/zsUXX1zv3Ouvv84111zDBRdc4D0WHBzM6tWryc/PJy4uDoDy8nLeeOMNgoODW3zfq666ip/85Cc4nU727NnD008/TXZ2Np9++mmzr1u+fDl33XUXI0eOZP78+URFRbFr1y7WrFnDD3/4QwDeeustysrKuP/++4mJiWHbtm08++yzHD9+nLfeeqve9WpqakhLS+PKK6/kf//3fwkNDW3xNX7605+SnZ3NunXr+Pvf/97i9y7SXSmUiEi7REREMG3aNP75z3/y5JNPYja7G2B37drFvn37ePjhh+uVHzRoEBaLhb///e889NBDALz99ttEREQwevRoCgoKWnTfgQMH8qMf/cj7/MSJE7z77rvNvqaoqIif/exnXHrppWzcuLFeCHK5XN6f//SnPxESEuJ9/pOf/ITBgwfzP//zP2RlZdGvXz/vucrKSn7wgx+waNGievdqyTVSU1NJSUlh3bp19d6LSE+l7hsRabeZM2eSnZ3Nxx9/7D32+uuvExISwvTp0xuU//GPf8yyZcu8z5ctW8asWbO8gaYlKisrOXXqFPn5+axbt44NGzYwadKkZl+zbt06iouL+eUvf9mgVaZut0ndMFFaWsqpU6e4/PLLcblcjXZT3X///Q2OtfYaIqJQIiI+cN1119GnTx9ef/11AJxOJ//85z+5+eabiYiIaFB+xowZHDx4kG3btnH06FE2btzInXfe2ap7rly5ktjYWOLj45kyZQpJSUm8/PLLzb4mIyMD4LzjVrKysrjzzjuJjo4mPDyc2NhYvvOd7wDu1pa6AgICSExMbNc1RMRN3Tci0m4Wi4Uf/vCHvPTSSzz//PN8/vnnZGdnN9klERsby0033cSyZcuIj4/niiuuYPDgwa2655QpU7xdQ8ePH+dPf/oT1157LTt27KjXStFaDoeD6667joKCAh599FGGDRtGWFgYJ06c4M4778TpdNYrb7VaG7TwtPYaIuKmUCIiPjFz5kwWL17Me++9xwcffEBsbCxpaWlNlr/rrruYMWMGkZGRbVo8rE+fPkyePNn7fOjQoVx++eW8++673H777Y2+ZtCgQQDs2bOnyRC0e/duDh48yGuvvcbMmTO9x9etW9fiurXmGpptI3KWum9ExCdGjx7N6NGjefnll3n77be57bbbCAho+v97vvvd7xIWFkZBQQH//d//3e77e6baVlZWNllmypQpREREsGjRIioqKuqd8wx0tVgs9Z57fn7mmWdaXJfWXCMsLAyAwsLCFl9fpLtSS4mI8Oqrr7JmzZp6x4qKijh8+DC/+93v6h1vbpDmzJkz+cUvfgFw3tkkFouFb775BpfL5f1ibo0jR47wj3/8A3DPvHnuueew2WzNDna12WwsWbKEe+65h/Hjx/PDH/6QXr168dVXX1FWVsZrr73GsGHDGDRoEL/4xS84ceIENpuNt99+mzNnzrS4bq25xtixYwH42c9+RlpaGhaLhdtuu62Vn4ZIN+ESkR5r2bJlLqBNj+3btze4Xk5OjstisbhSUlIavd93vvMd18iRI5usz/nOe/Tv379eXXr37u2aMmWKa8uWLS163//5z39cl19+uSskJMRls9lcl156qeuf//yn9/y+fftckydPdoWHh7t69+7tuvfee11fffWVC3AtW7bMW27WrFmusLCwRu/R0mvU1NS45syZ44qNjXWZTCaX/lmWnszkctVpXxQRaYdTp07Rp08fFixYwOOPP+7v6ohIF6MxJSLiM8uXL8fhcHDHHXf4uyoi0gVpTImItNuGDRvYt28fv//975k2bRoDBgzwd5VEpAtS942ItNs111zD5s2bueKKK/jHP/5Rb68bEZGWUigRERERQ9CYEhERETEEhRIRERExBA10bQGn00l2djYRERFaElpERKQVXC4XxcXF9O3b97w7gSuUtEB2djZJSUn+roaIiEiXdezYsUZ31K5LoaQFPFuvHzt2DJvN5ufaiIiIdB12u52kpCTvd2lzFEpawNNlY7PZFEpERETaoCXDHzTQVURERAxBoUREREQMQaFEREREDEGhRERERAxBoUREREQMQaFEREREDEGhRERERAxBoUREREQMQaFEREREDEGhRERERAxBy8wbmMPhICMjw/t80KBBWCwWP9ZIRESk4yiUGFhGRgaL3/6U6IRECnKP89B0SElJ8Xe1REREOoRCicFFJyQSl5js72qIiIh0OI0pEREREUNQKBERERFDUCgRERERQ1AoEREREUNQKBERERFDUCgRERERQ1AoEREREUNQKBERERFDUCgRERERQ1AoEREREUPwayhZuHAhJpOp3mPYsGHe8xUVFcyePZuYmBjCw8OZPn06eXl59a6RlZXF1KlTCQ0NJS4ujocffpiampp6ZTZu3Mgll1yC1Wpl8ODBLF++vDPenoiIiLSC31tKRo4cSU5Ojvfx2Wefec/NnTuX9957j7feeotNmzaRnZ3NLbfc4j3vcDiYOnUqVVVVbN68mddee43ly5ezYMECb5nMzEymTp3KtddeS3p6Og8++CD33HMPa9eu7dT3KSIiIs3z+4Z8AQEBJCQkNDheVFTEK6+8wooVK5g4cSIAy5YtY/jw4WzdupXLLruMDz/8kH379vHRRx8RHx/PmDFj+O1vf8ujjz7KwoULCQoK4sUXXyQ5OZnFixcDMHz4cD777DOWLFlCWlpap75XERERaZrfW0oOHTpE3759GThwIDNmzCArKwuAnTt3Ul1dzeTJk71lhw0bRr9+/diyZQsAW7Zs4cILLyQ+Pt5bJi0tDbvdzt69e71l6l7DU8ZzjcZUVlZit9vrPURERKRj+TWUTJgwgeXLl7NmzRpeeOEFMjMzueqqqyguLiY3N5egoCCioqLqvSY+Pp7c3FwAcnNz6wUSz3nPuebK2O12ysvLG63XokWLiIyM9D6SkpJ88XZFRESkGX7tvrn++uu9P48ePZoJEybQv39/3nzzTUJCQvxWr/nz5zNv3jzvc7vdrmAiIiLSwfzefVNXVFQUKSkpHD58mISEBKqqqigsLKxXJi8vzzsGJSEhocFsHM/z85Wx2WxNBh+r1YrNZqv3EBERkY5lqFBSUlJCRkYGffr0YezYsQQGBrJ+/Xrv+QMHDpCVlUVqaioAqamp7N69m/z8fG+ZdevWYbPZGDFihLdM3Wt4yniuISIiIsbg11Dyi1/8gk2bNnH06FE2b97M9773PSwWC7fffjuRkZHcfffdzJs3j48//pidO3fy4x//mNTUVC677DIApkyZwogRI7jjjjv46quvWLt
"text/plain": [
"<Figure size 600x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiUAAAGJCAYAAABVW0PjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABGqElEQVR4nO3deXhTVcI/8O9Nmq0tTSl0ZV9kF0UU6CgKQ2XVV4VRURRQRh1/gCKKiKMIjMqIGy6MjuMrjCOKOvqCgqIsAg4UKGhlkWGzCFLaQkubJm2znt8faS5NW7omuTfp9/M8eWjuPbn3HGPSb88591xJCCFAREREpDCN0hUgIiIiAhhKiIiISCUYSoiIiEgVGEqIiIhIFRhKiIiISBUYSoiIiEgVGEqIiIhIFRhKiIiISBUYSoiIiEgVGEqIqMXq3Lkzpk6dqsi5T5w4AUmS8NJLLylyfiI1YighamFWrFgBSZKwZ8+eGvs6d+6MG264oVGvobp99dVXWLBggdLVIAoLDCVEREH01VdfYeHChUpXgygsMJQQERGRKjCUEFGjLV++HJIk4ccff6yx7/nnn4dWq8Xp06cBAMOGDYMkSbj55ptrlH3ggQcgSRL69etX7zk7d+4MSZIgSRI0Gg1SUlJw++234+TJk/W+VgiBZ599Fu3bt0d0dDSGDx+OgwcP1lq2uLgYs2bNQocOHWAwGNC9e3e88MIL8Hg8cpmq80FeffVVdOrUCSaTCddddx0OHDggl5s6dSqWLVsGAHLdJUmqcc533nkH3bp1g8FgwFVXXYWsrKx620QUiaKUrgARhZ8//OEPmD59OlauXIkBAwb47Vu5ciWGDRuGdu3ayduMRiPWrVuHgoICJCUlAQDKy8vx8ccfw2g0Nvi8Q4cOxf333w+Px4MDBw5g6dKlyM3Nxffff1/n6+bPn49nn30WY8eOxdixY/HDDz9g5MiRcDgcfuXKyspw3XXX4fTp03jggQfQsWNH7NixA/PmzcOZM2ewdOlSv/Lvv/8+SktLMX36dFRUVOC1117D73//e+zfvx/Jycl44IEHkJubiw0bNuBf//pXrXX78MMPUVpaKge0JUuWYPz48fjll1+g0+ka/N+GKCIIImpRli9fLgCIrKysGvs6deokxo0b16DX3HHHHSItLU243W552w8//CAAiOXLl8vbrrvuOtG3b1/Rv39/8dJLL8nb//Wvf4n27duLoUOHir59+9Zb706dOokpU6b4bbvzzjtFdHR0na8rKCgQer1ejBs3Tng8Hnn7k08+KQD4HfMvf/mLiImJEUeOHPE7xhNPPCG0Wq04efKkEEKInJwcAUCYTCbx22+/yeV27dolAIhHHnlE3jZ9+nRR21et7xht2rQRRUVF8vY1a9YIAOLLL7+ss11EkYjDN0TUJJMnT0Zubi6+++47edvKlSthMpkwYcKEGuXvueceLF++XH6+fPlyTJkyBRpNw7+G7HY7zp07h4KCAmzYsAGbN2/GiBEj6nzNxo0b4XA4MHPmTL+hk1mzZtUo++mnn2Lo0KFo3bo1zp07Jz8yMjLgdruxbds2v/I333yzX4/QoEGDMHjwYHz11VcNbtPtt9+O1q1by8+HDh0KAPjll18afAyiSMFQQkRNcv311yM1NRUrV64EAHg8Hnz00Ue46aab0KpVqxrlJ02ahCNHjmD37t04ceIEtmzZ0ug1QlatWoXExEQkJydj5MiR6NChA9599906X/Prr78CAC655BK/7YmJiX5hAACOHj2K9evXIzEx0e+RkZEBACgoKPArX/2YANCjRw+cOHGiwW3q2LGj33Nfnc6fP9/gYxBFCs4pIaIm0Wq1uPPOO/GPf/wDf/vb37B9+3bk5ubirrvuqrV8YmIibrzxRixfvhzJycm4+uqr0b1790adc+TIkZgzZw4A4LfffsMLL7yA4cOHY8+ePTCZTM1uk8fjwfXXX4/HH3+81v09evRo9jmq02q1tW4XQgT8XERqx1BCRE02efJkvPzyy/jyyy/x9ddfIzExEaNGjbpo+XvvvReTJk2C2Wxu0oJiqampcq8FAPTs2RO/+93vsHr1atxxxx21vqZTp04AvL0gXbt2lbefPXu2Rm9Et27dYLVa/c5Rl6NHj9bYduTIEXTu3Fl+XtvVNkRUOw7fEFGT9e/fH/3798e7776Lzz77DBMnTkRU1MX/1hk9ejRiYmJQVFSE2267rdnnLy8vB+Cda3IxGRkZ0Ol0eOONN/x6H6pfSQMAt912GzIzM/HNN9/U2FdcXAyXy+W3bfXq1fKlzwCwe/du7Nq1C2PGjJG3xcTEyK8norqxp4SohXrvvfewfv16v20lJSU4duwYnn32Wb/tta1H4jN58mQ89thjAHDRoRsfrVaLQ4cOQQgh/7JujF9++QUffPABAOD06dN48803ERcXV+dk18TERDz22GNYvHgxbrjhBowdOxY//vgjvv76a7Rt29av7Jw5c/DFF1/ghhtuwNSpUzFw4EDYbDbs378f//73v3HixAm/13Tv3h3XXHMNHnzwQdjtdixduhRt2rTxG/4ZOHAgAOChhx7CqFGjoNVqMXHixEa3naglYCghaqHeeuutWrcXFxfj6aefbvBxJk2ahLlz56Jbt24YNGhQveXj4uIafOzqvv/+e3lNkrZt2+KKK67AwoUL0aFDhzpf9+yzz8JoNOLtt9/Gd999h8GDB+Pbb7/FuHHj/MpFR0dj69ateP755/Hpp5/i/fffR1xcHHr06IGFCxfCbDb7lZ88eTI0Gg2WLl2KgoICDBo0CG+++SZSU1PlMuPHj8fMmTOxatUqfPDBBxBCMJQQXYQkOJuKiJrh3LlzSE1Nxfz58xsVZsLZiRMn0KVLF7z44otyLxERNR/nlBBRs6xYsQJutxt333230lUhojDH4RsiapLNmzfj559/xnPPPYebb77Z74oTIqKmYCghoiZZtGgRduzYgauvvhpvvPGG0tUhogjAOSVERESkCpxTQkRERKrAUEJERESqwDklDeDxeJCbm4tWrVpxyWgiIqJGEEKgtLQUaWlp9d4VnKGkAXJzc+tdnImIiIgu7tSpU2jfvn2dZRhKGsB3G/ZTp041azVKIiKilsZisaBDhw7y79K6MJQ0gG/IJi4ujqGEiIioCRoy/YETXYmIiEgVGEqIiIhIFRhKiIiISBUYSoiIiEgVGEqIiIhIFRhKiIiISBUYSoiIiEgVGEqIiIhIFRhKiIiISBUYSoiIiEgVGEqIqEURQsBqtUIIoXRViKgahhIialFsNhsWf7YTNptN6aoQUTUMJUTU4uiNJqWrQES1YCghIiIiVWAoISIiIlVgKCEiIiJVYCghIiIiVWAoISIiIlVgKCEiIiJVYCghIiIiVWAoIaIWw7eaK8DVXInUiKGEiFoMm82Gl9dkwel0KV0VIqoFQwkRtSg6o1HpKhDRRTCUEBERkSowlBAREZEqMJQQERGRKjCUEBERkSowlBAREZEqMJQQERGRKjCUEBERkSowlBAREZEqMJQQERGRKjCUEFGLwPveEKkfQwkRtQi++964nG6lq0JEF8FQQkQtBu97Q6RuDCVERESkCgwlREREpAoMJUTUYvkmvwrBya9EasBQQkQtls1mw+LPdsJmsyldFSKCwqFk8eLFuOqqq9CqVSskJSXh5ptvxuHDh/3KVFRUYPr06WjTpg1iY2MxYcIE5Ofn+5U5efIkxo0bh+joaCQlJWHOnDlwuVx+ZbZs2YIrrrgCBoMB3bt3x4oVK4LdPCJSqao9JHqjSenqEFElRUPJ1q1bMX36dOzcuRMbNmyA0+nEyJEj/f5qeeSRR/Dll1/i008/xdatW5Gbm4vx48fL+91uN8aNGweHw4EdO3bgn//8J1asWIH58+fLZXJycjBu3DgMHz4c2dnZmDVrFv74xz/im2++CWl7iUgdnPYKvLL2R/aQEKmMJFQ0mHr27FkkJSVh69atuPbaa1FSUoLExER8+OGH+MMf/gAA+O9//4vevXsjMzMTQ4YMwddff40bbrgBubm5SE5OBgC8/fbbmDt3Ls6ePQu9Xo+5c+di3bp
"text/plain": [
"<Figure size 600x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi4AAAGJCAYAAACtu7gUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABFIElEQVR4nO3deVxU5eI/8M+ZgRmGZUDZUUTcl9wtQtM0SSzr5s0Wi3LJ1LpYmWXlr1zSzLKraWV5bdG6qand8mtamuFWSqjkvpsaJAIqzgwoMDDz/P4Y5jAjAyjbcPDzfr3mFXOeZ2aecxrl47MdSQghQERERKQAKnc3gIiIiOh6MbgQERGRYjC4EBERkWIwuBAREZFiMLgQERGRYjC4EBERkWIwuBAREZFiMLgQERGRYjC4EBERkWIwuBAR1bCtW7dCkiR8++23ldYdOXIkmjdvXvuNImogGFyIqEJLly6FJEnYs2dPmbLmzZvjvvvuu6HXKMHHH3+MpUuXursZROQCgwsR0TUYXIjqLwYXIiIiUgwGFyKqdUuWLIEkSdi7d2+ZsrfffhtqtRrnzp0DAPTr1w+SJGHIkCFl6o4bNw6SJOGWW26p9DObN28OSZIgSRJUKhXCwsLw6KOPIi0trdLXHT58GNu2bZNf369fPwBATk4OXn75ZXTq1Am+vr7Q6/W45557sH//fpfvZbFY8P/+3/9DWFgYfHx88I9//APp6emVtt1qtWL+/Pno2LEjvLy8EBoainHjxuHy5cuVvpaoofNwdwOIqOF76KGHkJiYiGXLlqFbt25OZcuWLUO/fv3QpEkT+ZiXlxfWr1+P7OxshISEAADy8/OxcuVKeHl5Xffn9unTB2PHjoXVasWhQ4cwf/58ZGRk4Ndffy33NfPnz8dzzz0HX19fvP766wCA0NBQAMDp06exZs0aPPzww4iOjkZWVhb+85//4M4778SRI0cQERHh9F6zZs2CJEl49dVXkZ2djfnz5yMuLg779u2DTqcrtw3jxo3D0qVLMWrUKDz//PM4c+YMPvroI+zduxc7duyAp6fndV8DooaGwYWIap2fnx+GDBmCFStWYM6cOVCpbJ29e/fuxZEjRzBp0iSn+i1btoRarcZ///tfvPTSSwCA//3vf/Dz80Pnzp2Rk5NzXZ/bokULPPHEE/Lzc+fOYc2aNRW+ZsiQIXjjjTcQFBTk9FoA6NSpE06cOCG3HwCefPJJtGvXDp9//jmmTJniVD8nJwdHjx6Fn58fAKB79+545JFH8Omnn+L55593+fm//fYbPvvsMyxbtgyPP/64fLx///4YNGgQVq9e7XSc6GbDoSIiqhPDhw9HRkYGtmzZIh9btmwZdDodhg4dWqb+qFGjsGTJEvn5kiVLMGLECKfQUJnCwkJcvHgR2dnZ2LRpEzZv3owBAwZU+Ry0Wq38+RaLBZcuXYKvry/atm2LP/74o0z94cOHy6EFsPU8hYeH48cffyz3M1avXg1/f3/cfffduHjxovzo0aMHfH19na4f0c2IwYWI6sTdd9+N8PBwLFu2DIBtHseKFSvwwAMPOP1yt0tISMCJEyewa9cunD17Flu3bsXIkSNv6DO/+eYbBAcHIzQ0FAMHDkRkZCQ+++yzKp+D1WrF+++/j9atW0Or1SIoKAjBwcE4cOAAjEZjmfqtW7d2ei5JElq1aoWzZ8+W+xknT56E0WhESEgIgoODnR55eXnIzs6ucvuJGgIOFRFRnVCr1Xj88cfx6aef4uOPP8aOHTuQkZFRZjjGLjg4GPfffz+WLFmC0NBQ9O7dG61atbqhzxw4cKA8DPX333/j3XffRf/+/bFnz54K55iU5+2338aUKVPw1FNPYebMmWjcuDFUKhUmTJgAq9V6w+/nitVqRUhIiBzwrhUcHFwjn0OkVAwuRFRnhg8fjrlz5+KHH37ATz/9hODgYMTHx5db/6mnnkJCQgL8/f0xffr0G/688PBwxMXFyc/btm2LXr16Yc2aNXjsscfKfZ0kSS6Pf/vtt+jfvz8+//xzp+MGgwFBQUFl6p88edLpuRACp06dQufOncv97JYtW+KXX35B7969qxSuiBo6DhURUZ3p3LkzOnfujM8++wz/+9//MGzYMHh4lP/vp0GDBsHHxwc5OTl45JFHqv35+fn5AGxzXyri4+MDg8FQ5rharYYQwunY6tWr5aXc1/rqq6+Qm5srP//2229x/vx53HPPPeV+9iOPPAKLxYKZM2eWKSsuLnbZLqKbCXtciOi6fPHFF9iwYYPTMaPRiFOnTuGtt95yOu5qvxa74cOH4+WXXwaAcoeJ7NRqNY4ePQohBHx8fG64zadPn8bXX38NwLai6KOPPoJer690gm6PHj3wySef4K233kKrVq0QEhKCu+66C/fddx9mzJiBUaNGoVevXjh48CCWLVuGFi1auHyfxo0b44477sCoUaOQlZWF+fPno1WrVhgzZky5n33nnXdi3LhxmD17Nvbt24eBAwfC09MTJ0+exOrVq7FgwQI89NBDN3wtiBoMQURUgSVLlggAVXrs3r27zPudP39eqNVq0aZNG5efd+edd4qOHTuW257Kyu2ioqKc2hIUFCQGDhwokpOTK31tZmamGDx4sPDz8xMAxJ133imEEKKgoEC89NJLIjw8XOh0OtG7d2+RnJws7rzzTrmOEEJs2bJFABArVqwQkydPFiEhIUKn04nBgweLv/76y+mzRowYIaKiosq0YfHixaJHjx5Cp9MJPz8/0alTJ/HKK6+IjIyMSttP1JBJQlzT70lEVIsuXryI8PBwTJ06tcy+J0REleEcFyKqU0uXLoXFYsGTTz7p7qYQkQJxjgsR1YnNmzfjyJEjmDVrFoYMGYLmzZu7u0lEpEAcKiKiOtGvXz/s3LkTvXv3xtdff+10byIiouvF4EJERESKwTkuREREpBgMLkRERKQYnJxbQ6xWKzIyMuDn51fuduFERERUlhACubm5iIiIqPQO8AwuNSQjIwORkZHubgYREZFipaeno2nTphXWYXCpIX5+fgBsF12v17u5NURERMphMpkQGRkp/y6tCINLDbEPD+n1egYXIiKiKrieqRacnEtERESKweBCREREisHgQkRERIrB4EJERESK4dbgsn37dtx///2IiIiAJElYs2aNU7kQAlOnTkV4eDh0Oh3i4uJw8uRJpzo5OTlISEiAXq9HQEAARo8ejby8PKc6Bw4cQJ8+feDl5YXIyEjMmTOnTFtWr16Ndu3awcvLC506dcKPP/5Y4+dLRERE1ePW4HLlyhV06dIFCxcudFk+Z84cfPDBB1i0aBFSUlLg4+OD+Ph4FBQUyHUSEhJw+PBhbNq0CevWrcP27dsxduxYudxkMmHgwIGIiopCamoq3nvvPUyfPh2LFy+W6+zcuROPPfYYRo8ejb1792LIkCEYMmQIDh06VHsnT0RERDdO1BMAxPfffy8/t1qtIiwsTLz33nvyMYPBILRarVixYoUQQogjR44IAGL37t1ynZ9++klIkiTOnTsnhBDi448/Fo0aNRKFhYVynVdffVW0bdtWfv7II4+IwYMHO7UnJiZGjBs37rrbbzQaBQBhNBqv+zVERER0Y79D6+0clzNnziAzMxNxcXHyMX9/f8TExCA5ORkAkJycjICAAPTs2VOuExcXB5VKhZSUFLlO3759odFo5Drx8fE4fvw4Ll++LNdx/Bx7HfvnuFJYWAiTyeT0ICIiotpVb4NLZmYmACA0NNTpeGhoqFyWmZmJkJAQp3IPDw80btzYqY6r93D8jPLq2MtdmT17Nvz9/eUHt/snIiKqffU2uNR3kydPhtFolB/p6enubhIREVGDV2+DS1hYGAAgKyvL6XhWVpZcFhYWhuzsbKfy4uJi5OTkONVx9R6On1FeHXu5K1qtVt7en9v839yEEDAYDBBCuLspREQNXr0NLtHR0QgLC0NSUpJ8zGQyISUlBbGxsQCA2NhYGAwGpKamynU2b94Mq9WKmJgYuc727dtRVFQk19m0aRPatm2LRo0ayXUcP8dex/45RBUxGo2YtXI7jEaju5tCRNTguTW45OXlYd+
"text/plain": [
"<Figure size 600x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiUAAAGJCAYAAABVW0PjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABUMElEQVR4nO3deXhU5d3G8e/MJDNZJxtkky3su+wYV9SUgNSKYutCgQqCIliBFpEWUdFXqAu4gbQioBVFbautiiCyuRAQAmE3shoEkrAlQyDrzHn/CBkZE7aQZCbk/lzXXGSe85xzfk8GkpvznMVkGIaBiIiIiJeZvV2AiIiICCiUiIiIiI9QKBERERGfoFAiIiIiPkGhRERERHyCQomIiIj4BIUSERER8QkKJSIiIuITFEpERETEJyiUiIhUwvz58zGZTOzbt8/bpYhcNhRKROq4sl+u69evL7esSZMm/PrXv76odUREKkuhRESkEgYNGkR+fj6NGzf2dikilw2FEhGRi3Dy5EkALBYLAQEBmEwmL1ckcvlQKBGRSzZv3jxMJhMbN24st+zZZ5/FYrFw4MABAHr16oXJZKJ///7l+j7wwAOYTCbat29/3n02adIEk8mEyWTCbDYTGxvLXXfdRUZGxgWt++tf/5ovvviCTp06ERAQQNu2bfnPf/7j0a9smmrVqlU89NBDREdH06BBA49lvzyn5PPPP+eGG24gNDQUu91O9+7deffddz36rF27lj59+hAWFkZQUBA33HAD33777XnrFrncKZSIyCW78847CQwMZMGCBeWWLViwgF69enHFFVe42wICAvjss8/Izs52t+Xn5/P+++8TEBBwwfu97rrr+Oc//8n8+fMZPHgwH330EQMHDrygdXfu3Mldd91F3759mTp1Kn5+fvz2t79l6dKl5fo+9NBDbN++ncmTJ/PYY4+ddZvz58+nX79+HDt2jIkTJzJt2jQ6derE4sWL3X2WL1/O9ddfj8Ph4IknnuDZZ58lJyeHm266ie++++6Cxy5yOfLzdgEiUvuFhobSv39/3nvvPZ577jnM5tL/72zcuJHt27czfvx4j/7NmjXDYrHwz3/+kz/96U8A/Pvf/yY0NJSOHTty7NixC9pv06ZN+f3vf+9+f+DAAT7++OMLWveHH37g3//+N3fccQcAw4YNo3Xr1kyYMIFf/epXHn0jIyNZtmwZFovlrNvLzc3lj3/8Iz169GDlypUe4cowDPefDz74IDfeeCOff/65e+rngQceoF27dkyaNIkvvvjiguoXuRzpSImIVInBgwdz8OBBVqxY4W5bsGABgYGBDBgwoFz/++67j3nz5rnfz5s3jyFDhrgDzYUoLCzkyJEjZGdns3TpUpYvX87NN998QevGx8dz++23u9/b7XYGDx7Mxo0byczM9Og7fPjwcwYSgKVLl3LixAkee+yxckd7ysJHWloaO3fu5N577+Xo0aMcOXKEI0eOcPLkSW6++Wa++uorXC7XBdUvcjlSKBGRKvGrX/2KuLg49xSOy+Xivffe47bbbiM0NLRc/4EDB/LDDz/w3XffsW/fPlauXMkf/vCHi9rnwoULqV+/PjExMfTu3ZuGDRsyZ86cC1q3efPm5U5SbdmyJUC580QSEhLOu73du3cDnPN8mJ07dwIwZMgQ6tev7/GaM2cOhYWF5ObmXlD9IpcjTd+ISJWwWCzce++9vPHGG8yaNYtvv/2WgwcPekyvnKl+/frceuutzJs3j5iYGK655hqaN29+Ufvs3bu3e2rop59+4m9/+xs33ngj69evJzAw8JLHVKaqtlV2FOT555+nU6dOFfYJCQmpkn2J1EYKJSJSZQYPHsyLL77IJ598wueff079+vVJTk4+a/+hQ4cycOBAwsLCePLJJy96f3FxcSQlJbnft2rViquvvpqPP/6Ye+6555zr7tq1C8MwPI6W/PDDD0Dp1TkXq1mzZgBs3br1rOGqrI/dbveoW0RKafpGRKpMx44d6dixI3PmzOHf//43d999N35+Z/+/T58+fQgODubYsWP87ne/u+T95+fnA6XnmpzPwYMH+eijj9zvHQ4Hb7/9Np06dSI2Nvai9927d29CQ0OZOnUqBQUFHsvKTnTt2rUrzZo144UXXiAvL6/cNg4fPnzR+xW5nOhIiYgAMHfuXI9LV6H0ipJdu3bxzDPPeLRXdD+SMoMHD+bPf/4zwFmnbspYLBZ27NiBYRgEBwdfdM179uzhnXfeAUqvvHnttdew2+0XdLJry5YtGTZsGOvWrSMmJoa5c+eSlZXlcfLtxbDb7cyYMYP777+f7t27c++99xIREcGmTZs4deoUb731FmazmTlz5tC3b1/atWvHfffdxxVXXMGBAwdYsWIFdrudTz75pFL7F7kcKJSICACvv/56he05OTk8/vjjF7ydgQMHMmHCBJo1a0aPHj3O299ut1/wtn/p66+/5uuvvwagXr16dOnShaeeeoqGDRued90WLVrw6quvMn78eNLT00lISOD9998/53TT+QwbNozo6GimTZvG008/jb+/P61bt2bs2LHuPr169SIlJYWnn36a1157jby8PGJjY+nZsycPPPBApfctcjkwGWXHFUVEqsCRI0eIi4tj8uTJFxVmalKTJk1o3749n376qbdLEZEz6JwSEalS8+fPx+l0MmjQIG+XIiK1jKZvRKRKLF++nO3bt/N///d/9O/fv1JXsIhI3aZQIiJVYsqUKaxevZprrrmGV1991dvliEgtpHNKRERExCfonBIRERHxCQolIiIi4hN0TskFcLlcHDx4kNDQ0HIP8BIREZGzMwyDEydOEB8ff96ngCuUXICDBw9e0M2YREREpGL79++nQYMG5+yjUHIByh67vn///ku6+6SIiEhd43A4aNiwoft36bkolFyAsikbu92uUCIiIlIJF3L6g050FREREZ+gUCIiIiI+QaFEREREfIJCiYiIiPgEhRIRERHxCQolIiIi4hMUSkRERMQnKJSIiIiIT1AoEREREZ+gUCIiIiI+QaFEREREfIKefeODxj02icPHT3i01Y8IZfq0Z7xUkYiISPVTKPFBh4+foMudozzaNvxrppeqERERqRmavhERERGfoFAiIiIiPkGhRERERHyCQomIiIj4BIUSERER8QkKJSIiIuITFEpERETEJyiUiIiIiE9QKBERERGfoFAiIiIiPkGhRERERHyCQomIiIj4BIUSERER8QkKJSIiIuITFEpERETEJ3g9lBw4cIDf//73REVFERgYSIcOHVi/fr17uWEYTJ48mbi4OAIDA0lKSmLnzp0e2zh27BgDBw7EbrcTHh7OsGHDyMvL8+izefNmrrvuOgICAmjYsCHPPfdcjYxPRERELoxXQ8nx48e55ppr8Pf35/PPP2f79u28+OKLREREuPs899xzvPLKK8yePZu1a9cSHBxMcnIyBQUF7j4DBw5k27ZtLF26lE8//ZSvvvqKESNGuJc7HA569+5N48aNSU1N5fnnn+fJJ5/kH//4R42OV0RERM7Oz5s7/9vf/kbDhg2ZN2+euy0hIcH9tWEYvPTSS0yaNInbbrsNgLfffpuYmBg+/vhj7r77bnbs2MHixYtZt24d3bp1A+DVV1/llltu4YUXXiA+Pp4FCxZQVFTE3LlzsVqttGvXjrS0NKZPn+4RXsoUFhZSWFjofu9wOKrrWyAiIiKnefVIyf/+9z+6devGb3/7W6Kjo+ncuTNvvPGGe/nevXvJzMwkKSnJ3RYWFkbPnj1JSUkBICUlhfDwcHcgAUhKSsJsNrN27Vp3n+uvvx6r1eruk5ycTHp6OsePHy9X19SpUwkLC3O/GjZsWOVjFxEREU9eDSV79uzh9ddfp0WLFixZsoSRI0fyxz/+kbfeeguAzMxMAGJiYjzWi4mJcS/LzMwkOjraY7mfnx+RkZEefSraxpn7ONPEiRPJzc11v/bv318FoxUREZFz8er0jcvlolu3bjz77LMAdO7cma1btzJ79myGDBnitbpsNhs2m81r+xcREamLvHqkJC4ujrZt23q0tWnThoyMDABiY2MByMrK8ui
"text/plain": [
"<Figure size 600x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiUAAAGJCAYAAABVW0PjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABbQElEQVR4nO3deXiU9b3//+c9k2SykElIIBtJICwCEVBEwbgVFYlKrf6kp/XUCtbt6Bc8Bc6xlnPcqlVabFGrKMdjhZ4Kbm21Cm6RzQUQRCKEVTYDZIOEZJKQdeb+/TGZgYHsCzNJXo/rmqvMzGfueU+Kkxef1TBN00RERETEzyz+LkBEREQEFEpEREQkQCiUiIiISEBQKBEREZGAoFAiIiIiAUGhRERERAKCQomIiIgEBIUSERERCQgKJSIiIhIQFEpEREQkICiUiEiHLFmyBMMw+Prrr894btCgQfzwhz9s02tEpPdSKBEREZGAoFAiIiIiAUGhRET8bvHixRiGwZYtW8547qmnnsJqtXLkyBEAJk6ciGEY3HTTTWe0/bd/+zcMw2DUqFEtvuegQYMwDAPDMLBYLCQkJPDTn/6U3NzcZl+3atUqLBYLjzzyiM/jy5YtwzAMXnrppRbfW0Qap1AiIn734x//mLCwMJYuXXrGc0uXLmXixIkMGDDA+1hoaCgrVqygqKjI+1hVVRVvvvkmoaGhrX7fyy+/nL/+9a8sWbKEadOm8c4773Drrbc2+5qrrrqK//f//h/z5s3jm2++ASA/P5/777+fSZMmce+997b6/UXEl0KJiPhdZGQkN910E6+//joul8v7+JYtW9ixYwe33XabT/shQ4aQnp7OX//6V+9jf//734mMjOSiiy5q9fsOHjyYn//850ybNo358+fzL//yL96g0Zz58+czaNAgpk2bRk1NDXfffTf19fX8+c9/xjCMVr+/iPhSKBGRgDBt2jTy8vJYvXq197GlS5cSFhbG1KlTz2j/i1/8gsWLF3vvL168mOnTp2OxtP5rraamhmPHjlFUVERWVharVq3i6quvbvF14eHhLFmyhJ07d3LFFVewYsUKnnnmGVJTU1v93iJyJoUSEQkI11xzDYmJid4hHJfLxeuvv86NN95IZGTkGe1vvfVW9uzZw8aNGzl48CBr1qzh9ttvb9N7vvHGG/Tv35/4+HgmT55MSkoKr7zySqtee+mll3LfffexceNGMjMzueOOO9r03iJyJoUSEQkIVquVn/3sZ/z973+nurqa1atXk5eXx89//vNG2/fv358bbriBxYsXs2TJEi699FKGDh3apvecPHkyWVlZZGVlsXjxYsrLy7nyyiupqqpq8bU1NTWsWbMGgH379nHixIk2vbeInEmhREQCxrRp03A4HLz//vssXbqU/v37k5mZ2WT7O+64g9dff53Fixfzi1/8os3vl5iYyKRJk5g0aRK33347r776Kjt27ODdd99t8bWPPvooO3fu5A9/+AMHDhzg17/+dZvfX0R8Bfm7ABERjzFjxjBmzBheeeUVNmzYwPTp0wkKavpr6tprryUiIoKSkhJ+8pOfdPj9PT0kNTU1zbb76quv+MMf/sCsWbP4j//4D44dO8bvf/97pk6dyg9+8IMO1yHSWymUiEinePXVV/noo498HisrK2Pv3r389re/9Xm8sf1IPKZNm8Z//ud/AjQ5dONhtVrZuXMnpmkSERHR5pr379/Pa6+9BsCRI0d44YUXsNvtzU52ra6uZvr06QwbNownn3wSgN/85je8//77/OIXv2Dbtm3tqkVEFEpEpJM0tWlYaWkpDz/8cKuvc+utt/Lggw8yZMgQxo8f32J7u93e6muf7vPPP+fzzz8HoF+/flxwwQX85je/ISUlpcnX/Nd//Rd79+5l3bp13j1RQkJC+Mtf/sLFF1/MAw88wIsvvtjumkR6M8M0TdPfRYiIeBw7dozExEQeeeSRNoUZEen+NNFVRALKkiVLcDqdZ2yYJiI9n4ZvRCQgrFq1ih07dvDkk09y0003MWjQIH+XJCJnmYZvRCQgTJw4kXXr1nHppZfy2muv+Zx1IyK9g0KJiIiIBATNKREREZGAoFAiIiIiAUETXVvB5XKRl5dHZGSkjiUXERFpA9M0KS8vJykpqcVTvBVKWiEvL6/ZzZRERESkeYcOHSI5ObnZNgolreA5Nv3QoUMd2j1SRESkt3E4HKSkpHh/lzZHoaQVPEM2drtdoURERKQdWjP9QRNdRUREJCAolIiIiEhAUCgRERGRgKBQIiIiIgFBoUREREQCgkKJiIiIBASFEhEREQkICiUiIiISEBRKREREJCAolIiIiEhA0DbzItKiuro6cnJyfB4bNWoUwcHBfqpIRHoihRIRaVFOTg4L3l5NXMpgAIoO7WcOMHbsWP8WJiI9ikKJiLRKXMpgkoem+7sMEenBNKdEREREAoJ6SkTkDKfPIdm1axemK8SPFYlIb+DXnpKXXnqJMWPGYLfbsdvtZGRk8OGHH3qfnzhxIoZh+Nzuvfden2vk5uYyZcoUwsPDiYuL44EHHqC+vt6nzZo1a7jggguw2WwMHTqUJUuWnI2PJ9JteeaQvLbhe/ctaxMlJcf9XZaI9HB+7SlJTk7md7/7HcOGDcM0Tf7yl79w4403smXLFs4991wA7r77bh5//HHva8LDw71/djqdTJkyhYSEBNatW0d+fj7Tpk0jODiYp556CoADBw4wZcoU7r33XpYuXcrKlSu56667SExMJDMz8+x+YJFu5NQ5JEWH9vu5GhHpDfwaSm644Qaf+08++SQvvfQSGzZs8IaS8PBwEhISGn39J598wo4dO/j000+Jj4/n/PPP54knnuDBBx/kscceIyQkhEWLFpGWlsYf//hHAEaOHMkXX3zBM888o1AiIiISQAJmoqvT6eSNN96gsrKSjIwM7+NLly6lX79+jBo1irlz53LixAnvc+vXr2f06NHEx8d7H8vMzMThcLB9+3Zvm0mTJvm8V2ZmJuvXr2+ylpqaGhwOh89NREREupbfJ7pu27aNjIwMqqur6dOnD++88w7p6e4u45/97GcMHDiQpKQktm7dyoMPPsju3bv5xz/+AUBBQYFPIAG89wsKCppt43A4qKqqIiws7Iya5s2bx29+85tO/6wiIiLSNL+HkuHDh5OdnU1ZWRl/+9vfmD59OmvXriU9PZ177rnH22706NEkJiZy9dVXs2/fPoYMGdJlNc2dO5c5c+Z47zscDlJSUrrs/URERCQAhm9CQkIYOnQo48aNY968eZx33nk899xzjbadMGECAHv37gUgISGBwsJCnzae+555KE21sdvtjfaSANhsNu+KIM9NREREupbfQ8npXC4XNTU1jT6XnZ0NQGJiIgAZGRls27aNoqIib5usrCzsdrt3CCgjI4OVK1f6XCcrK8tn3oqIiIj4n1+Hb+bOnct1111Hamoq5eXlLFu2jDVr1vDxxx+zb98+li1bxvXXX09sbCxbt25l9uzZXHHFFYwZMwaAyZMnk56ezm233cb8+fMpKCjgoYceYsaMGdhsNgDuvfdeXnjhBX71q19xxx13sGrVKt566y1WrFjhz48uIiIip/FrKCkqKmLatGnk5+cTFRXFmDFj+Pjjj7nmmms4dOgQn376Kc8++yyVlZWkpKQwdepUHnroIe/rrVYry5cv57777iMjI4OIiAimT5/us69JWloaK1asYPbs2Tz33HMkJyfzyiuvaDmwiIhIgPFrKPnzn//c5HMpKSmsXbu2xWsMHDiQDz74oNk2EydOZMuWLW2uT0RERM6egJtTIiIiIr2TQomIiIgEBIUSERERCQgKJSIiIhIQFEpEREQkICiUiIiISEBQKBEREZGAoFAiIiIiAUGhRERERAKCQomIiIgEBIUSERERCQgKJSIiIhIQFEpEREQkICiUiIiISEBQKBEREZGAoFAiIiIiAUGhRERERAKCQomIiIgEBIUSERERCQgKJSIiIhIQFEpEREQkICiUiIiISEBQKBEREZGAoFAiIiIiAUGhRERERAKCQomIiIgEBIUSERERCQgKJSIiIhI
"text/plain": [
"<Figure size 600x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiUAAAGJCAYAAABVW0PjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABCxUlEQVR4nO3de1xUZcIH8N8MzAx4mcEbt0RELRUVL2Q0a/VqEmjUq2+2q2VCq9bqi27Kbhnva2a2G63dtDJ9WzepTdPa1jYlRQTRStREWW9larS44kCpMIjM/bx/wBxnuF9mmKPn9/185iNzzjPnPOdA8Ou5HYUgCAKIiIiIfEzp6woQERERAQwlREREJBEMJURERCQJDCVEREQkCQwlREREJAkMJURERCQJDCVEREQkCQwlREREJAkMJURERCQJDCVEREQkCQwlRNQhmZmZUCgUOHz4cIN9/fv3xwMPPNCmzxCRfDGUEBERkSQwlBAREZEkMJQQkc9t2LABCoUCR48ebbDvpZdegp+fHy5cuAAAGD9+PBQKBaZOndqg7G9+8xsoFAoMHz68xXP2798fCoUCCoUCSqUSoaGhmD59OkpKSpr9XEpKCnr37g2r1dpgX0JCAgYPHtziuYmocQwlRORzDz/8MAIDA7Fx48YG+zZu3Ijx48fjlltuEbcFBAQgKysL5eXl4raamhps2bIFAQEBrT7v3Xffjb/+9a/IzMxEcnIytm7dipkzZzb7mVmzZuHSpUvIzs52224wGJCXl4fHHnus1ecnIncMJUTkc927d8fUqVPx0UcfweFwiNuPHj2KU6dOYdasWW7lBw4ciOjoaPz1r38Vt3366afo3r07xo4d2+rzDhgwAI899hiSk5OxcuVK/PKXv8SRI0ea/cy9996Lvn374sMPP3Tb7qw7QwlR+zGUEJEkJCcno7S0FHv27BG3bdy4EYGBgZg2bVqD8r/+9a+xYcMG8f2GDRuQkpICpbL1v9bMZjN+/vlnlJeXIycnB3l5eZg4cWKzn1EqlZg5cyY+//xzVFVVudX1F7/4BaKiolp9fiJyx1BCRJJw3333ISwsTOzCcTgc+OijjzBlyhR07969QfmZM2fi+++/x6FDh/Djjz8iPz8fjz/+eJvOuXnzZvTp0wchISFISEhAREQE1q9f3+LnkpOTUVNTg61btwIATp8+jcLCwgYtOkTUNgwlRCQJfn5+ePTRR/Hpp5/CZDJhz549KC0tbbI7pE+fPnjwwQexYcMGZGZmYty4cRg0aFCbzpmQkICcnBzk5ORgw4YNqKqqwoQJE1BTU9Ps56KjoxEbGyt24Xz44YdQq9X41a9+1abzE5E7f19XgIjIKTk5Ga+99hq2bduGHTt2oE+fPkhMTGyy/OzZszFz5kzodDosX768zecLCwtDfHy8+H7w4MH4xS9+gc8++wyPPPJIi3VNS0vDxYsXsWnTJiQlJaFHjx5trgMRXceWEiKSjJiYGMTExGD9+vX49NNPMWPGDPj7N/3/TpMmTULXrl1x+fJlj7RSOFtIzGZzi2UfeeQRKBQKPPXUU/jhhx84wJXIA9hSQkQe8d5772Hnzp1u2yorK3H27Fn84Q9/cNve2HokTsnJyfj9738PAC3+offz88O3334LQRDQtWvXNtf5hx9+ELtgLly4gLfffhtarbbFwa5AbffRpEmT8MknnyAoKAhJSUltPj8RuWMoISKPWLt2baPbKyoq8Nxzz7X6ODNnzsSSJUswcOBA3HHHHS2W12q1rT52fV9++SW+/PJLAEDv3r0xZswYvPDCC4iIiGjV55OTk7F9+3b86le/gkajaXc9iKgWQwkRdcjjjz/e5lkvzfH394dCoWiylSQ/P7/Zz7e03+nHH39sW8UaoVarAbTcokNErcMxJUQkKZmZmbDb7TfE9No///nPGDBgAO666y5fV4XopsCWEiKShLy8PJw6dQp//OMfMXXqVPTv39/XVWrS5s2bcezYMWRlZWH16tVQKBS+rhLRTUEhCILg60oQEY0fPx779+/HuHHj8OGHH7o960ZqFAoFunXrhunTp2PdunXNzhAiotZjKCEiIiJJ4JgSIiIikgSGEiIiIpIEdoS2gsPhQGlpKbp3784BbURERG0gCAKqqqoQHh7e4lO8GUpaobS0tNWLKREREVFD58+fR9++fZstw1DSCs7Hpp8/f75Dq0cSERHJjdFoREREhPi3tDkMJa3g7LLRarUMJURERO3QmuEPHOhKREREksBQQkRERJLAUEJERESSwFBCREREksBQQkRERJLAUEJERESSwFBCREREksBQQkRERJLAUEJERESSwFBCREREksBQQkRERJLAUCJTNTU1qKmp8XU1iIiIRAwlREREJAkMJURERCQJDCVEREQkCQwlREREJAkMJURERCQJDCVEREQkCT4NJWvXrkVMTAy0Wi20Wi30ej127Ngh7h8/fjwUCoXba968eW7HKCkpQVJSErp06YLg4GA8/fTTsNlsbmXy8/MxZswYaDQaDBo0CJmZmZ1xeURERNQG/r48ed++ffHyyy/j1ltvhSAIeP/99zFlyhQcPXoUw4YNAwA88cQTWLFihfiZLl26iF/b7XYkJSUhNDQU+/fvx8WLF5GcnAyVSoWXXnoJAFBcXIykpCTMmzcPGzduRG5uLubOnYuwsDAkJiZ27gUTERFRkxSCIAi+roSrnj174pVXXsGcOXMwfvx4jBo1CqtWrWq07I4dO/DAAw+gtLQUISEhAIB169ZhyZIl+Omnn6BWq7FkyRJkZWXhxIkT4udmzJiBiooK7Ny5s1V1MhqN0Ol0qKyshFar7fA1SoFz4bTAwEAf14SIiG5mbfkbKpkxJXa7HZs3b0Z1dTX0er24fePGjejduzeGDx+O9PR0XLt2TdxXUFCAESNGiIEEABITE2E0GnHy5EmxTHx8vNu5EhMTUVBQ0GRdzGYzjEaj24uIiIi8y6fdNwBw/Phx6PV6mEwmdOvWDVu3bkV0dDQA4NFHH0VkZCTCw8Nx7NgxLFmyBKdPn8bf//53AIDBYHALJADE9waDodkyRqMRNTU1jbYUZGRk4IUXXvD4tRIREVHTfB5KBg8ejKKiIlRWVuJvf/sbUlJSsHfvXkRHR+PJJ58Uy40YMQJhYWGYOHEizp07h4EDB3qtTunp6UhLSxPfG41GREREeO18REREJIHuG7VajUGDBiE2NhYZGRkYOXIkVq9e3WjZuLg4AMDZs2cBAKGhoSgrK3Mr43wfGhrabBmtVtvkeAqNRiPOCHK+iIiIyLt8HkrqczgcMJvNje4rKioCAISFhQEA9Ho9jh8/jvLycrFMTk4OtFqt2AWk1+uRm5vrdpycnBy3cStERETkez7tvklPT8fkyZPRr18/VFVVYdOmTcjPz0d2djbOnTuHTZs24f7770evXr1w7NgxLF68GPfccw9iYmIAAAkJCYiOjsasWbOwcuVKGAwGLF26FKmpqdBoNACAefPm4e2338YzzzyD2bNnIy8vDx9//DGysrJ8eelERERUj09DSXl5OZKTk3Hx4kXodDrExMQgOzsb9913H86fP4/du3dj1apVqK6uRkREBKZNm4alS5eKn/fz88P27dsxf/586PV6dO3aFSkpKW7rmkRFRSErKwuLFy/G6tWr0bdvX6xfv55rlBAREUmM5NYpkSKuU0JERNQ+N+Q6JURERCRvDCVEREQkCQwlREREJAkMJURERCQJDCVEREQkCQwlREREJAkMJURERCQJDCVEREQkCQwlREREJAkMJURERCQJDCVEREQkCQwlREREJAkMJURERCQJDCVEREQkCQwlREREJAkMJURERCQJDCVEREQkCQwlREREJAkMJURERCQJDCVEREQkCQwlREREJAkMJURERCQJDCVEREQkCQwlREREJAkMJURERCQJDCVEREQkCQwlREREJAkMJURERCQJDCVEREQkCT4NJWvXrkVMTAy0Wi20Wi30ej127Ngh7jeZTEhNTUWvXr3QrVs3TJs2DWVlZW7HKCkpQVJSErp06YLg4GA8/fTTsNlsbmXy8/MxZswYaDQ
"text/plain": [
"<Figure size 600x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiUAAAGJCAYAAABVW0PjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABHDElEQVR4nO3dfVhUdaIH8O/MMDOAMoNvMJCoqKWiomlFs22uJoHGdnX1PtfKlC3T1YvelFZdds23dpfWXTMrtdvjJu1N82U3rbRUBEFN1CTJ1yiNFgsH8gUGYRhg5tw/hjnM8D4InCPz/TzP3Muc85szvxnPs/Pt96oQBEEAERERkcSUUleAiIiICGAoISIiIplgKCEiIiJZYCghIiIiWWAoISIiIllgKCEiIiJZYCghIiIiWWAoISIiIllgKCEiIiJZYCghIiIiWWAoIaI7kpKSAoVCgdOnT9c7169fP/zyl7/06DVE5L0YSoiIiEgWGEqIiIhIFhhKiEhyW7ZsgUKhwJkzZ+qd+/Of/wyVSoUff/wRADB27FgoFApMnjy5Xtnf/OY3UCgUGDZsWLPv2a9fPygUCigUCiiVShgMBkybNg35+flNvm7lypXi6+o+fv3rX7fo8xJRw3ykrgAR0X/+538iISEBW7duxf333+92buvWrRg7dizuuece8Zivry/27duHoqIiBAUFAQAsFgt27NgBX1/fFr/vo48+ijlz5sBut+P8+fN4/fXXUVBQgKNHjzb6milTpmDgwIFux7Kzs/H666+LdSGi1mEoISLJBQQEYPLkyfjggw+wZs0aKJWORtwzZ87g4sWLWLx4sVv5AQMGQKVS4f/+7//w0ksvAQD+9a9/ISAgAJGRkbh582aL3rd///549tlnxec//vgj9uzZ0+RrIiMjERkZKT6/fv06/vCHP2D48OFYsWJFi96XiBrG7hsikoWZM2eioKAAhw8fFo9t3boVfn5+mDp1ar3yzz33HLZs2SI+37JlC+Lj48VA0xJWqxXXr19HUVERUlNTkZ6ejvHjx7f49TabDU8//TRKS0uxe/dudOnSpcWvJaL6GEqISBYef/xxhISEYOvWrQAAu92ODz74AJMmTUJAQEC98tOnT8c333yDU6dO4fvvv0dGRobHYzq2b9+OXr16ITg4GDExMQgLC8PmzZtb/Pply5YhPT0d27Ztw4ABAzx6byKqj6GEiGRBpVLhmWeewb/+9S9UVFTg8OHDKCgocOtecdWrVy88+eST2LJlC1JSUvDII4/UG+vRnJiYGKSmpiI1NRVbtmxBaWkpxo0bB4vF0uxr9+zZg7/85S9YvXo1JkyY4NH7ElHDOKaEiGRj5syZWLt2LT755BN89tln6NWrF2JjYxst//zzz2P69OnQ6/VYuXKlx+8XEhKC6Oho8fmgQYPws5/9DHv27MHTTz/d6Ou++eYbxMfHY/Lkyfj973/v8fsSUcMYSohINpyDSDdv3owTJ04gPj4ePj6N/8/UhAkT0KVLF9y8eRP/9V//dcfv72whsVqtjZa5ffs2fvWrX+Gee+7Be++9B4VCccfvS0QODCVE1Cbeffdd7N+/3+1YSUkJLl++jD/+8Y9uxxtaj8Rp5syZ+O1vfwsAjXbdOKlUKly6dAmCILRqkOl3332H999/H4Bj5s1bb70FnU7X5GDXVatW4eLFi1i2bBk++ugjt3MDBgyA0Wj0uB5E5MBQQkRtYtOmTQ0eLy4uxssvv9zi60yfPh1Lly7FgAED8NBDDzVbXqfTtfjadR09elRck6Rnz54YNWoUVq1ahbCwsEZf89NPPwFAvaAFAPHx8QwlRHdAIQiCIHUliIicrl+/jpCQECxfvtyjMENEdz/OviEiWUlJSYHNZsOMGTOkrgoRdTB23xCRLKSnp+PixYv405/+hMmTJ6Nfv35SV4mIOhi7b4hIFsaOHYvjx4/jkUcewfvvv++21w0ReQeGEiIiIpIFjikhIiIiWWAoISIiIlngQNcWsNvtKCgoQEBAAFdvJCIi8oAgCCgtLUVoaGizu3gzlLRAQUFBk4spERERUdOuXr2K3r17N1mGoaQFnNumX7169Y5WjyQiIvI2ZrMZYWFh4m9pUxhKWsDZZaPT6RhKiIiIWqElwx840JWIiIhkgaGEiIiIZIGhhIiIiGSBoYSIiIhkgaGEiIiIZIGhhIiIiGSBoYSIiIhkgaGEiIiIZIGhhIiIiGSBoYSIiIhkgaGEADh2cSwvL4cgCFJXhYiIvBRDCQEALBYLXt19ChaLReqqEBGRl2IoIZFa6yt1FYiIyIsxlBAREZEsMJQQERGRLMgmlLz66qtQKBRYuHCheKyiogIJCQno0aMHunbtiqlTp6KwsNDtdfn5+YiLi4O/vz+CgoKwePFiVFdXu5XJyMjAqFGjoNVqMXDgQKSkpHTAJyIiIiJPyCKUfPHFF/jf//1fREZGuh1ftGgRPvnkE+zatQuZmZkoKCjAlClTxPM2mw1xcXGorKzE8ePH8d577yElJQXLly8Xy+Tl5SEuLg7jxo1DTk4OFi5ciBdeeAEHDhzosM9HRERELSBIrLS0VLj33nuF1NRU4Re/+IXw4osvCoIgCMXFxYJarRZ27dollr106ZIAQMjKyhIEQRA+/fRTQalUCiaTSSyzadMmQafTCVarVRAEQViyZIkwdOhQt/ecNm2aEBsb2+I6lpSUCACEkpKS1n5M2SsrKxNW78oSysrKpK4KERF1Ip78hkreUpKQkIC4uDhER0e7Hc/OzkZVVZXb8cGDB6NPnz7IysoCAGRlZWH48OEIDg4Wy8TGxsJsNuPChQtimbrXjo2NFa/REKvVCrPZ7PYgIiKi9uUj5Ztv374dX375Jb744ot650wmEzQaDQIDA92OBwcHw2QyiWVcA4nzvPNcU2XMZjMsFgv8/PzqvXdycjJWrVrV6s9FREREnpOspeTq1at48cUXsXXrVvj6ymt9jKSkJJSUlIiPq1evSl0lIiKiTk+yUJKdnY2ioiKMGjUKPj4+8PHxQWZmJt544w34+PggODgYlZWVKC4udntdYWEhDAYDAMBgMNSbjeN83lwZnU7XYCsJAGi1Wuh0OrcHERERtS/JQsn48eNx7tw55OTkiI8HHngA06dPF/9Wq9VIS0sTX5Obm4v8/HwYjUYAgNFoxLlz51BUVCSWSU1NhU6nQ0REhFjG9RrOMs5rEBERkTxINqYkICAAw4YNczvWpUsX9OjRQzw+a9YsJCYmonv37tDpdFiwYAGMRiMefvhhAEBMTAwiIiIwY8YMrFmzBiaTCcuWLUNCQgK0Wi0AYO7cuXjrrbewZMkSPP/880hPT8fOnTuxb9++jv3ARERE1CRJB7o2Z926dVAqlZg6dSqsVitiY2OxceNG8bxKpcLevXsxb948GI1GdOnSBfHx8Vi9erVYJjw8HPv27cOiRYuwfv169O7dG5s3b0ZsbKwUH4mIiIgaoRAE7lXfHLPZDL1ej5KSkk47vqS8vBxrPz2Ll56IhL+/v9TVISKiTsKT31DJ1ykhIiIiAhhKiIiISCYYSoiIiEgWGEqIiIhIFhhKiIiISBYYSoiIiEgWGEqIiIhIFhhKiIiISBYYSoiIiEgWGEqIiIhIFhhKiIiISBYYSoiIiEgWGEqIiIhIFhhKiIiISBYYSoiIiEgWGEqIiIhIFhhKiIiISBYYSoiIiEgWGEqIiIhIFhhKiIiISBYYSoiIiEgWGEqIiIhIFhhKiIiISBYYSoiIiEgWGEqIiIhIFiQNJZs2bUJkZCR0Oh10Oh2MRiM+++wz8fzYsWOhUCjcHnPnznW7Rn5+PuLi4uDv74+goCAsXrwY1dXVbmUyMjIwatQoaLVaDBw4ECkpKR3x8YiIiMgDPlK+ee/evfHqq6/i3nvvhSAIeO+99zBp0iScOXMGQ4cOBQDMnj0bq1evFl/j7+8v/m2z2RAXFweDwYDjx4/j2rVrmDlzJtRqNf785z8DAPLy8hAXF4e5c+di69atSEtLwwsvvICQkBDExsZ27AcmIiKiRkkaSp588km353/605+wadMmnDhxQgwl/v7+MBgMDb7
"text/plain": [
"<Figure size 600x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"for column in df.select_dtypes(include=['float64', 'int64']).columns:\n",
" plt.figure(figsize=(6, 4))\n",
" sns.histplot(df[column], kde=True)\n",
" plt.title(f'Шум в {column}')\n",
" plt.show"
]
},
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 3,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi4AAAGJCAYAAACtu7gUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA7G0lEQVR4nO3deXgO9/7/8dedkEUisWURgpDag4rS2Eva2FoOraVOLY2lStHUcpxj66JOKaUop5tUi6ItTlGkQWwpitiKonGsQRGxVBLJ/P7oN/NzS4RGJIbn47ru6zIz7/nMeyZxe5nlvm2GYRgCAACwAIf8bgAAAOBuEVwAAIBlEFwAAIBlEFwAAIBlEFwAAIBlEFwAAIBlEFwAAIBlEFwAAIBlEFwAAIBlEFwAAIBlEFyAB1xkZKRsNpv5cnFxUcWKFTVgwACdOXMmv9vDQ+7atWsaO3as1q1bl9+tAJKkAvndAIC789ZbbykgIEDXr1/Xxo0bNXPmTK1YsUJ79+5VoUKF8rs9PKSuXbumN998U5LUtGnT/G0GEMEFsIyWLVuqTp06kqRevXqpePHimjx5spYuXaouXbrkc3cAkDe4VARYVLNmzSRJ8fHxkqQLFy5oyJAhCgoKkru7uzw8PNSyZUvt2rUr07rXr1/X2LFjVbFiRbm4uKhkyZJq3769jhw5Ikk6evSo3eWpW183/8973bp1stlsWrBggf75z3/K19dXbm5ueu6553T8+PFM296yZYtatGghT09PFSpUSE2aNNGmTZuy3MemTZtmuf2xY8dmqv3qq68UHBwsV1dXFStWTJ07d85y+9nt283S09M1ZcoUVatWTS4uLvLx8VHfvn118eJFu7py5cqpTZs2mbYzYMCATGNm1fvEiRMzHVNJSk5O1pgxYxQYGChnZ2f5+/tr2LBhSk5OzvJY3WrLli1q1aqVihYtKjc3N9WoUUNTp041lzdt2jTLMyg9evRQuXLlJP15rLy8vCRJb775ZrbHH8grnHEBLCojZBQvXlyS9Ntvv2nJkiV64YUXFBAQoDNnzug///mPmjRpol9++UV+fn6SpLS0NLVp00bR0dHq3LmzBg0apMuXLysqKkp79+5VhQoVzG106dJFrVq1stvuiBEjsuxn3LhxstlsGj58uM6ePaspU6YoNDRUcXFxcnV1lSStWbNGLVu2VHBwsMaMGSMHBwfNnj1bzZo104YNG1S3bt1M45YuXVrjx4+XJF25ckX9+vXLctujRo1Sx44d1atXL507d07Tpk1T48aNtXPnThUpUiTTOn369FGjRo0kSd99950WL15st7xv376KjIxUz549NXDgQMXHx2v69OnauXOnNm3apIIFC2Z5HP6KxMREc99ulp6erueee04bN25Unz59VKVKFe3Zs0cffPCBfv31Vy1ZsiTbcaOiotSmTRuVLFlSgwYNkq+vr/bv369ly5Zp0KBBd92fl5eXZs6cqX79+ulvf/ub2rdvL0mqUaPGX9pPIFcZAB5os2fPNiQZP/74o3Hu3Dnj+PHjxtdff20UL17ccHV1NU6cOGEYhmFcv37dSEtLs1s3Pj7ecHZ2Nt566y1z3ueff25IMiZPnpxpW+np6eZ6koyJEydmqqlWrZrRpEkTc3rt2rWGJKNUqVJGUlKSOX/hwoWGJGPq1Knm2I899pgRFhZmbscwDOPatWtGQECA8fTTT2faVv369Y3q1aub0+fOnTMkGWPGjDHnHT161HB0dDTGjRtnt+6ePXuMAgUKZJp/6NAhQ5LxxRdfmPPGjBlj3Px2uGHDBkOSMXfuXLt1V65cmWl+2bJljdatW2fqvX///satb7G39j5s2DDD29vbCA4OtjumX375peHg4GBs2LDBbv1Zs2YZkoxNmzZl2l6GGzduGAEBAUbZsmWNixcv2i27+bg3adLEbpsZunfvbpQtW9aczuqYA/mJS0WARYSGhsrLy0v+/v7q3Lmz3N3dtXjxYpUqVUqS5OzsLAeHP/9Kp6Wl6fz583J3d1elSpW0Y8cOc5xvv/1WJUqU0GuvvZZpG7de2vgrunXrpsKFC5vTzz//vEqWLKkVK1ZIkuLi4nTo0CG9+OKLOn/+vH7//Xf9/vvvunr1qpo3b67169crPT3dbszr16/LxcUl2+1+9913Sk9PV8eOHc0xf//9d/n6+uqxxx7T2rVr7epTUlIk/Xm8bmfRokXy9PTU008/bTdmcHCw3N3dM42ZmppqV/f777/r+vXr2fZ98uRJTZs2TaNGjZK7u3um7VepUkWVK1e2GzPj8uCt27/Zzp07FR8fr8GDB2c603QvP1/gQcGlIsAiZsyYoYoVK6pAgQLy8fFRpUqVzKAi/Xl5YerUqfroo48UHx+vtLQ0c1nG5STpz0tMlSpVUoECufvX/7HHHrObttlsCgwM1NGjRyVJhw4dkiR17979tmNcunRJRYsWNad///33TOPe6tChQzIM47Z1t17SSUxMlKRMYeHWMS9duiRvb+8sl589e9ZuevXq1ea9IHdrzJgx8vPzU9++ffXNN99k2v7+/ftvO+at279ZxiXE6tWr/6V+AKsguAAWUbduXfOpoqy8++67GjVqlF5++WW9/fbbKlasmBwcHDR48OBMZzLyQ0YPEydOVK1atbKsuTlMpKSk6PTp03r66afvOK7NZtMPP/wgR0fHbMeUpISEBEmSr69vtmN6e3tr7ty5WS6/NVDUq1dP77zzjt286dOna+nSpVmuv3//fkVGRuqrr77K8l6Z9PR0BQUFafLkyVmu7+/vf9ve75bNZpNhGJnm3xx4gQcRwQV4SHzzzTd66qmn9Nlnn9nNT0xMVIkSJczpChUqaMuWLUpNTc2VG0wzZJxRyWAYhg4fPmzeyJlx06+Hh4dCQ0PvON6uXbuUmpqabVjLGNcwDAUEBKhixYp3HPeXX36RzWZTpUqVsh3zxx9/VIMGDcwbi7NTokSJTPuU3Q20I0aMUK1atdSpU6fbbn/Xrl1q3rz5X768k3Gc9+7dm+1xLlq0qH777bdM8//3v//ZTXN5CQ8a7nEBHhKOjo6Z/ge9aNEinTx50m5ehw4d9Pvvv2v69OmZxsjqf+B3a86cObp8+bI5/c033+j06dNq2bKlJCk4OFgVKlTQ+++/rytXrmRa/9y5c5l6d3R0zPJR45u1b99ejo6OevPNNzP1bxiGzp8/b07fuHFD3377rerWrZvtpaKOHTsqLS1Nb7/9dqZlN27cMC835URsbKyWLl2qf//737cNBR07dtTJkyf1ySefZFr2xx9/6OrVq7cdv3bt2goICNCUKVMy9Xnz8alQoYIOHDhgd9x37dqV6dH0jA83vJd9BnITZ1yAh0SbNm301ltvqWfPnqpfv7727NmjuXPnqnz58nZ13bp105w5cxQREaGtW7eqUaNGunr1qn788Ue9+uqratu2bY62X6xYMTVs2FA9e/bUmTNnNGXKFAUGBqp3796SJAcHB3366adq2bKlqlWrpp49e6pUqVI6efKk1q5dKw8PD33//fe6evWqZsyYoQ8//FAVK1a0+6j5jMCze/duxcbGKiQkRBUqVNA777yjESNG6OjRo2rXrp0KFy6s+Ph4LV68WH369NGQIUP0448/atSoUdq9e7e+//77bPelSZMm6tu3r8aPH6+4uDg988wzKliwoA4dOqRFixZp6tSpev7553N0nFavXq2nn34627MhL730khYuXKhXXnlFa9euVYMGDZSWlqYDBw5o4cKFWrVq1W3PRDk4OGjmzJl69tlnVatWLfXs2VMlS5bUgQMHtG/fPq1atUqS9PLLL2vy5MkKCwtTeHi4zp49q1mzZqlatWpKSkoyx3N1dVXVqlW1YMECVaxYUcWKFVP16tW5hwb5J9+eZwJwVzIeh962bVu2ddevXzfeeOMNo2TJkoarq6vRoEEDIzY2NsvHXq9du2b861//MgICAoyCBQsavr6+xvPPP28cOXLEMIycPQ49f/58Y8SIEYa3t7fh6upqtG7d2vjf//6Xaf2dO3ca7du3N4oXL244OzsbZcuWNTp27GhER0fbbftOr+7du9u
"text/plain": [
"<Figure size 600x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi4AAAGJCAYAAACtu7gUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA0HklEQVR4nO3deVhUdd/H8c8gMiAKiMoARkZabrncYRlRriialj7ZYlGampZBaZYa3blmWVrmmmZl2pPelZVWWrgvZaREaqhIVnhnC2gqkJiAcJ4/fDgxsqSIDkffr+s619X8znd+53uOCp/OMmMzDMMQAACABbi5ugEAAIAzRXABAACWQXABAACWQXABAACWQXABAACWQXABAACWQXABAACWQXABAACWQXABAACWQXABgEvcwoULZbPZtH//fle3AvwjggtQBRT94ihaPD09dfXVVys2NlYZGRmubg8Aqgx3VzcA4G8TJ05UaGioTpw4oS+//FJz587VZ599pl27dqlGjRqubg8AXI7gAlQh3bt3V5s2bSRJDz74oOrUqaNp06bp448/1j333OPi7oAzc/z4cYI2zhsuFQFVWKdOnSRJaWlpkqQjR47oySefVIsWLVSzZk35+Pioe/fu2rlzZ4n3njhxQuPHj9fVV18tT09PBQUF6fbbb9ePP/4oSdq/f7/T5anTlw4dOphzbdy4UTabTe+9956efvppBQYGytvbW7fddpsOHDhQYttbt25Vt27d5Ovrqxo1aqh9+/basmVLqfvYoUOHUrc/fvz4ErXvvPOOwsLC5OXlJX9/f/Xt27fU7Ze3b8UVFhZq+vTpat68uTw9PeVwOPTQQw/p6NGjTnVXXHGFevbsWWI7sbGxJeYsrfepU6eWOKaSlJubq3HjxqlRo0ay2+0KCQnRqFGjlJubW+qxOt3WrVt1yy23qHbt2vL29lbLli01Y8YMp5r169fr5ptvlre3t/z8/NSrVy+lpKSc0fyvvvqqmjdvLrvdruDgYMXExCgzM9OppkOHDrrmmmuUlJSkdu3aqUaNGnr66afPaH6gIjjjAlRhRSGjTp06kqSffvpJy5cv15133qnQ0FBlZGTotddeU/v27bVnzx4FBwdLkgoKCtSzZ0+tW7dOffv21bBhw/Tnn39qzZo12rVrlxo2bGhu45577tEtt9zitN24uLhS+3nuuedks9k0evRoHTx4UNOnT1dkZKR27NghLy8vSad+UXbv3l1hYWEaN26c3Nzc9NZbb6lTp0764osvdP3115eY97LLLtPkyZMlSceOHdPQoUNL3faYMWN011136cEHH9ShQ4c0a9YstWvXTtu3b5efn1+J9wwZMkQ333yzJOmjjz7SsmXLnNY/9NBDWrhwoQYMGKDHHntMaWlpmj17trZv364tW7aoevXqpR6Hs5GZmWnuW3GFhYW67bbb9OWXX2rIkCFq2rSpkpOT9corr+j777/X8uXLy513zZo16tmzp4KCgjRs2DAFBgYqJSVFK1as0LBhwyRJa9euVffu3XXllVdq/Pjx+uuvvzRr1ixFRETo22+/1RVXXFHm/OPHj9eECRMUGRmpoUOHKjU1VXPnzlViYmKJY3P48GF1795dffv21X333SeHw1GhYwWcEQOAy7311luGJGPt2rXGoUOHjAMHDhjvvvuuUadOHcPLy8v45ZdfDMMwjBMnThgFBQVO701LSzPsdrsxceJEc2zBggWGJGPatGkltlVYWGi+T5IxderUEjXNmzc32rdvb77esGGDIcmoX7++kZ2dbY6///77hiRjxowZ5txXXXWVERUVZW7HMAzj+PHjRmhoqNGlS5cS27rxxhuNa665xnx96NAhQ5Ixbtw4c2z//v1GtWrVjOeee87pvcnJyYa7u3uJ8X379hmSjEWLFplj48aNM4r/yPviiy8MScbixYud3hsfH19ivEGDBkaPHj1K9B4TE2Oc/mP09N5HjRplBAQEGGFhYU7H9H//938NNzc344svvnB6/7x58wxJxpYtW0psr8jJkyeN0NBQo0GDBsbRo0ed1hU/7q1btzYCAgKMw4cPm2M7d+403NzcjH79+pljRX//0tLSDMMwjIMHDxoeHh5G165dnf6+zZ4925BkLFiwwBxr3769IcmYN29emf0ClYlLRUAVEhkZqXr16ikkJER9+/ZVzZo1tWzZMtWvX1+SZLfb5eZ26p9tQUGBDh8+rJo1a6px48b69ttvzXk+/PBD1a1bV48++miJbZx+aeNs9OvXT7Vq1TJf33HHHQoKCtJnn30mSdqxY4f27dune++9V4cPH9Yff/yhP/74Qzk5OercubM2b96swsJCpzlPnDghT0/Pcrf70UcfqbCwUHfddZc55x9//KHAwEBdddVV2rBhg1N9Xl6epFPHqyxLly6Vr6+vunTp4jRnWFiYatasWWLO/Px8p7o//vhDJ06cKLfvX3/9VbNmzdKYMWNUs2bNEttv2rSpmjRp4jRn0eXB07df3Pbt25WWlqbhw4eXONNU9Of7+++/a8eOHXrggQfk7+9vrm/ZsqW6dOli/pmVZu3atcrLy9Pw4cPNv2+SNHjwYPn4+GjlypVO9Xa7XQMGDCj3WACVhUtFQBUyZ84cXX311XJ3d5fD4VDjxo2dfnEUFhZqxowZevXVV5WWlqaCggJzXdHlJOnUJabGjRvL3b1y/4lfddVVTq9tNpsaNWpkfv7Hvn37JEn9+/cvc46srCzVrl3bfP3HH3+UmPd0+/btk2EYZdadfkmn6D6M08PC6XNmZWUpICCg1PUHDx50er169WrVq1ev3D5PN27cOAUHB+uhhx7SBx98UGL7KSkpZc55+vaLK7qEeM0115RZ89///leS1Lhx4xLrmjZtqlWrViknJ0fe3t5n/F4PDw9deeWV5voi9evXl4eHR5m9AJWJ4AJUIddff735VFFpnn/+eY0ZM0YDBw7Us88+K39/f7m5uWn48OElzmS4QlEPU6dOVevWrUutKR4m8vLy9Pvvv6tLly7/OK/NZtPnn3+uatWqlTunJKWnp0uSAgMDy50zICBAixcvLnX96YGibdu2mjRpktPY7Nmz9fHHH5f6/pSUFC1cuFDvvPNOqffKFBYWqkWLFpo2bVqp7w8JCSmz96qm6P4m4EIguAAW8sEHH6hjx4568803ncYzMzNVt25d83XDhg21detW5efnV8oNpkWKzqgUMQxDP/zwg1q2bGluV5J8fHwUGRn5j/Pt3LlT+fn55Ya1onkNw1BoaKiuvvrqf5x3z549stlspZ5tKD7n2rVrFRERcUa/eOvWrVtin8q7gTYuLk6tW7fW3XffXeb2d+7cqc6dO5/15bui47xr164yj3ODBg0kSampqSXW7d27V3Xr1i31bMvp773yyivN8by8PKWlpZ3Rny1wvnCPC2Ah1apVk2EYTmNLly7Vr7/+6jTWp08f/fHHH5o9e3aJOU5//9l4++239eeff5qvP/jgA/3+++/q3r27JCksLEwNGzbUSy+9pGPHjpV4/6FDh0r0Xq1atVIfNS7u9ttvV7Vq1TRhwoQS/RuGocOHD5uvT548qQ8//FDXX399uZeK7rrrLhUUFOjZZ58tse7kyZMlHvs9GwkJCfr444/1wgsvlBlK7rrrLv366696/fXXS6z766+/lJOTU+b81157rUJDQzV9+vQSfRYdn6CgILVu3VqLFi1yqtm1a5dWr15d4kmy4iIjI+Xh4aGZM2c6He8333xTWVlZ6tGjR5nvBc43zrgAFtKzZ09NnDhRAwYM0I033qjk5GQtXrzY6f+KpVM30b799tsaMWKEtm3bpptvvlk5OTlau3atHnnkEfXq1atC2/f399dNN92kAQMGKCMjQ9OnT1ejRo00ePBgSZKbm5veeOMNde/eXc2bN9eAAQNUv359/frrr9qwYYN8fHz06aefKicnR3PmzNHMmTN19dVXa+PGjeY2igLPd999p4SEBIWHh6thw4aaNGmS4uLitH//fvXu3Vu1atVSWlqali1bpiFDhujJJ5/U2rVrNWbMGH333Xf69NNPy92X9u3b66GHHtLkyZO
"text/plain": [
"<Figure size 600x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi4AAAGJCAYAAACtu7gUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA+QklEQVR4nO3deVhV9d7+8XsDMigCToAkIpk5D4VlqDkkimmWR3PKJydSKzTNTpoe5/J41HI2TcvpUSutNDWPSc4pOZDkbFaYloKaAokJCOv3Rw/rxxZERWSz6v26rnVd7fX97LU+3w1b7tawt80wDEMAAAAW4OToBgAAAG4XwQUAAFgGwQUAAFgGwQUAAFgGwQUAAFgGwQUAAFgGwQUAAFgGwQUAAFgGwQUAAFgGwQUALG7s2LGy2WwFvt1KlSqpV69eBb5d4G4QXIBCtHjxYtlsNnNxd3fXgw8+qAEDBighIcHR7QF5Onr0qMaOHatTp045uhX8jbk4ugHg72j8+PEKDg7WtWvX9PXXX2vu3LnasGGDDh8+rOLFizu6PUCSdOLECTk5/f//vz169KjGjRunZs2aqVKlSo5rDH9rBBfAAZ588knVr19fkvTCCy+oTJkymjp1qj7//HN169bNwd3h78wwDF27dk0eHh5yc3NzdDtADpwqAoqAJ554QpIUFxcnSbp06ZL++c9/qnbt2vL09JSXl5eefPJJfffddzmee+3aNY0dO1YPPvig3N3dVb58eXXo0EE//vijJOnUqVN2p6duXJo1a2Zua9u2bbLZbPr44481YsQI+fv7q0SJEnr66ad15syZHPves2ePWrduLW9vbxUvXlxNmzbVrl27cp1js2bNct3/2LFjc9QuW7ZMISEh8vDwUOnSpdW1a9dc95/X3LLLzMzU9OnTVbNmTbm7u8vPz0/9+/fX5cuX7eoqVaqkp556Ksd+BgwYkGObufU+ZcqUHK+pJKWmpmrMmDF64IEH5ObmpsDAQA0dOlSpqam5vlY32rNnj9q0aaNSpUqpRIkSqlOnjmbMmJHncxYtWqQnnnhCvr6+cnNzU40aNTR37twcdVlz/vLLL1W/fn15eHjovffeM8eyrnFZvHixOnXqJElq3ry5+Tpv27ZNPXv2VNmyZZWenp5j+61atVLVqlVva57A7eCIC1AEZIWMMmXKSJJ++uknrVmzRp06dVJwcLASEhL03nvvqWnTpjp69KgCAgIkSRkZGXrqqae0efNmde3aVYMGDdLvv/+uqKgoHT58WJUrVzb30a1bN7Vp08Zuv8OHD8+1nwkTJshms2nYsGE6f/68pk+frrCwMMXGxsrDw0OStGXLFj355JMKCQnRmDFj5OTkZP6x3Llzpx599NEc261QoYImTpwoSbpy5YpeeumlXPc9atQode7cWS+88IIuXLigWbNmqUmTJjpw4IB8fHxyPKdfv356/PHHJUmfffaZVq9ebTfev39/LV68WL1799Yrr7yiuLg4zZ49WwcOHNCuXbtUrFixXF+HO5GYmGjOLbvMzEw9/fTT+vrrr9WvXz9Vr15dhw4d0rRp0/T9999rzZo1eW43KipKTz31lMqXL69BgwbJ399fx44d0/r16zVo0KCbPm/u3LmqWbOmnn76abm4uGjdunV6+eWXlZmZqcjISLvaEydOqFu3burfv7/69u2ba9Bo0qSJXnnlFc2cOVMjRoxQ9erVJUnVq1fX888/r6VLl+rLL7+0C37x8fHasmWLxowZk+ccgTtiACg0ixYtMiQZX331lXHhwgXjzJkzxkcffWSUKVPG8PDwMH755RfDMAzj2rVrRkZGht1z4+LiDDc3N2P8+PHmuoULFxqSjKlTp+bYV2Zmpvk8ScaUKVNy1NSsWdNo2rSp+Xjr1q2GJOO+++4zkpOTzfUrV640JBkzZswwt12lShUjPDzc3I9hGMbVq1eN4OBgo2XLljn21bBhQ6NWrVrm4wsXLhiSjDFjxpjrTp06ZTg7OxsTJkywe+6hQ4cMFxeXHOtPnjxpSDKWLFlirhszZoyR/Z+2nTt3GpKM5cuX2z1348aNOdYHBQUZbdu2zdF7ZGSkceM/lzf2PnToUMPX19cICQmxe03/93//13BycjJ27txp9/x58+YZkoxdu3bl2F+W69evG8HBwUZQUJBx+fJlu7Hsr/uNczaMP38WNwoPDzfuv/9+u3VBQUGGJGPjxo056oOCgoyePXuaj1etWmVIMrZu3WpXl5GRYVSoUMHo0qWL3fqpU6caNpvN+Omnn246R+BOcaoIcICwsDCVK1dOgYGB6tq1qzw9PbV69Wrdd999kiQ3NzfzosiMjAz99ttv8vT0VNWqVfXtt9+a2/n0009VtmxZDRw4MMc+7ub22B49eqhkyZLm42effVbly5fXhg0bJEmxsbE6efKknnvuOf3222+6ePGiLl68qJSUFLVo0UI7duxQZmam3TavXbsmd3f3PPf72WefKTMzU507dza3efHiRfn7+6tKlSraunWrXX1aWpok5XktxqpVq+Tt7a2WLVvabTMkJESenp45tpmenm5Xd/HiRV27di3Pvn/99VfNmjVLo0aNkqenZ479V69eXdWqVbPbZtbpwRv3n92BAwcUFxenwYMH5zjSdKufb9aRMUlKSkrSxYsX1bRpU/30009KSkqyqw0ODlZ4eHie28uLk5OTunfvrrVr1+r333831y9fvlwNGzZUcHBwvrcN3IhTRYADzJkzRw8++KBcXFzk5+enqlWr2t29kZmZqRkzZujdd99VXFycMjIyzLGs00nSn6eYqlatKheXgn0rV6lSxe6xzWbTAw88YN4Ge/LkSUlSz549b7qNpKQklSpVynx88eLFHNu90cmTJ2UYxk3rbjylk5iYKEk5wsKN20xKSpKvr2+u4+fPn7d7vGnTJpUrVy7PPm80ZswYBQQEqH///vrkk09y7P/YsWM33eaN+88u6xRirVq17qgfSdq1a5fGjBmj6OhoXb161W4sKSlJ3t7e5uOCCBY9evTQpEmTtHr1avXo0UMnTpxQTEyM5s2bd9fbBrIjuAAO8Oijj5p3FeXm3//+t0aNGqU+ffrozTffVOnSpeXk5KTBgwfnOJLhCFk9TJkyRfXq1cu1JnuYSEtL07lz59SyZctbbtdms+m///2vnJ2d89ym9Oc1FJLk7++f5zZ9fX21fPnyXMdvDBQNGjTQW2+9Zbdu9uzZ+vzzz3N9/rFjx7R48WItW7Ys12tlMjMzVbt2bU2dOjXX5wcGBt609/z68ccf1aJFC1WrVk1Tp05VYGCgXF1dtWHDBk2bNi3H71D2ozP5VaNGDYWEhGjZsmXq0aOHli1bJldXV3Xu3Pmutw1kR3ABiqBPPvlEzZs31wcffGC3PjExUWXLljUfV65cWXv27FF6enqBXGCaJeuIShbDMPTDDz+oTp065n4lycvLS2FhYbfc3nfffaf09PQ8w1rWdg3DUHBwsB588MFbbvfo0aOy2Wx53rVSuXJlffXVV2rUqNFt/YEuW7ZsjjnldQHt8OHDVa9ePXXp0uWm+//uu+/UokWLOz59l/U6Hz58+LZe5yzr1q1Tamqq1q5dq4oVK5rr8zotdTtu1X+PHj00ZMgQnTt3TitWrFDbtm3tjroBBYFrXIAiyNnZWYZh2K1btWqVfv31V7t1HTt21MWLFzV79uwc27jx+Xdi6dKldtcqfPLJJzp37pyefPJJSVJISIgqV66st99+W1euXMnx/AsXLuTo3dnZOddbjbPr0KGDnJ2dNW7cuBz9G4ah3377zXx8/fp1ffrpp3r00UfzPFXUuXNnZWRk6M0338wxdv36dfN0U35ER0fr888/13/+85+b/lHv3Lmzfv31Vy1YsCDH2B9//KGUlJSbbv/hhx9WcHCwpk+fnqPPvH6+WUerstckJSVp0aJFeU3nlkqUKCFJN33NunXrJpvNpkGDBumnn37S//zP/9zV/oDccMQFKIKeeuopjR8/Xr1791bDhg116NAhLV++XPfff79dXY8ePbR06VINGTJEe/fu1eOPP66UlBR99dVXevnll/XMM8/ka/+
"text/plain": [
"<Figure size 600x400 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"for column in ['cut', 'color', 'clarity']:\n",
" plt.figure(figsize=(6,4))\n",
" sns.countplot(data=df, x=column)\n",
" plt.title(f'Распределение {column}')\n",
" plt.show"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ищем выбросы"
]
},
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 4,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Количество выбросов (z-score): 2077\n",
"Количество выбросов после замены на медиану: 1532\n"
]
}
],
"source": [
"from scipy.stats import zscore\n",
"\n",
"outliers = df[(zscore(df.select_dtypes(include=['float64', 'int64'])) > 3).any(axis=1)]\n",
"print(f\"Количество выбросов (z-score): {len(outliers)}\")\n",
"\n",
"df_copy = df.copy()\n",
"for column in df.select_dtypes(include=['float64', 'int64']).columns:\n",
" median = df[column].median()\n",
" std_dev = df[column].std()\n",
" df_copy[column] = np.where(zscore(df[column]) > 3, median, df[column])\n",
"\n",
"outliers_after = df_copy[(zscore(df_copy.select_dtypes(include=['float64', 'int64'])) > 3).any(axis=1)]\n",
"print(f\"Количество выбросов после замены на медиану: {len(outliers_after)}\")\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Скорее всего тут реальные данные, поэтому убрав самые большие остальные оставим."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ищем корреляции"
]
},
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 5,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Корреляция признаков с целевой переменной 'price':\n",
"price 1.000000\n",
"carat 0.921591\n",
"x 0.884433\n",
"y 0.865419\n",
"z 0.861249\n",
"table 0.127118\n",
"depth -0.010630\n",
"Name: price, dtype: float64\n"
]
}
],
"source": [
"correlations = df.select_dtypes(include=['float64', 'int64']).corr()['price'].sort_values(ascending=False)\n",
"print(\"Корреляция признаков с целевой переменной 'price':\")\n",
"print(correlations)"
]
},
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 6,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAwAAAAIQCAYAAAA2IAmhAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADB0UlEQVR4nOzdd3hT1RvA8W/Ske49KaWljJa99xRBRGWJyFAZAg6GSnFQRRAHdfJDEUVBFHCgshwgG2TvvcpqGYWOdO+R3N8flZTQMFoaut7P89xHcnLuzXtO480994yrUhRFQQghhBBCCFElqMs6ACGEEEIIIcT9Iw0AIYQQQgghqhBpAAghhBBCCFGFSANACCGEEEKIKkQaAEIIIYQQQlQh0gAQQgghhBCiCpEGgBBCCCGEEFWINACEEEIIIYSoQqQBIIQQQgghRBUiDQAhhBBCCCGqEGkACFFO/fDDD6hUKvbv31/kvXnz5qFSqejXrx86na4MohNCCCFERSUNACEqmBUrVvDiiy/SqVMnlixZgoWFRVmHJIQQQogKRBoAQlQgW7ZsYciQIdSvX5+//voLGxubsg5JCCGEEBWMNACEqCAOHz5M37598fX1Ze3atTg7OxfJ8/vvv9OiRQtsbW3x8PDg6aefJjo62ijPiBEjcHBw4MKFC/Ts2RN7e3uqVavGu+++i6IohnxRUVGoVCo+/fRT/ve//xEQEICtrS1dunTh+PHjRT779OnTPPHEE7i5uWFjY0PLli35888/TZala9euqFSqItsPP/xglO/rr7+mYcOG2NnZGeVbunSp0bEaNmxY5DM+/fRTVCoVUVFRhrTrw6puTNPr9TRu3Njk52/atIlOnTphb2+Pi4sLffv25dSpU0Z53nnnHVQqFVqt1ih9//79RY55ve5vtnTpUlQqFVu2bDGkbdu2jYEDB1KjRg00Gg3+/v5MnDiRrKwsk/u3bNkSR0dHo3r69NNPi+S90fX6sLa2Jj4+3ui9Xbt2GY5z4zC0u4lrxIgRJv++N27X/waBgYE89thjrFu3jqZNm2JjY0P9+vVZvny5yVjv5m9XnHrOzc1l6tSptGjRAmdnZ+zt7enUqRObN2++bd0JIURFZlnWAQgh7uz8+fM8/PDDaDQa1q5di6+vb5E8P/zwAyNHjqRVq1aEh4cTGxvL559/zo4dOzh06BAuLi6GvDqdjocffpi2bdvy8ccfs2bNGqZNm0Z+fj7vvvuu0XEXLVpEWloa48aNIzs7m88//5xu3bpx7NgxvL29AThx4gQdOnTAz8+PyZMnY29vz2+//Ua/fv1YtmwZ/fv3LxJvSEgIb731FgBarZaJEycavf/rr78yduxYunbtyoQJE7C3t+fUqVPMmDHjXqvTyOLFizl27FiR9A0bNtCrVy+CgoJ45513yMrKYvbs2XTo0IGDBw8SGBhYqnHc7PfffyczM5MXX3wRd3d39u7dy+zZs7ly5Qq///67Id+uXbt48sknadKkCR9++CHOzs4m6/N2LCws+PHHH432+f7777GxsSE7O7vYcT3//PN0797dsM8zzzxD//79efzxxw1pnp6ehn+fPXuWQYMG8cILLzB8+HC+//57Bg4cyJo1a+jRo8ct477V3644UlNTmT9/PkOGDGHMmDGkpaXx3Xff0bNnT/bu3UvTpk3v6fhCCFEuKUKIcun7779XAOXvv/9WatWqpQDKQw89ZDJvbm6u4uXlpTRs2FDJysoypP/9998KoEydOtWQNnz4cAVQJkyYYEjT6/XKo48+qlhbWyvx8fGKoihKZGSkAii2trbKlStXDHn37NmjAMrEiRMNaQ8++KDSqFEjJTs72+iY7du3V+rUqVMk3g4dOigPPPCA4fX1z/r+++8NaUOGDFFcXFyMyrN582YFUH7//XdDWpcuXZQGDRoU+YxPPvlEAZTIyEhD2vU6vZ6WnZ2t1KhRQ+nVq1eRz2/atKni5eWlJCQkGNKOHDmiqNVqZdiwYYa0adOmKYCh3q7bt29fkWMOHz5csbe3LxLr77//rgDK5s2bDWmZmZlF8oWHhysqlUq5ePGiIS0sLEwBlGvXrhnSrtfnJ598UuQYN7peH0OGDFEaNWpkSM/IyFCcnJyUoUOHKoCyb9++Ysd1I0CZNm2ayfcCAgIUQFm2bJkhLSUlRfH19VWaNWtWJNa7+dsVp57z8/OVnJwco3xJSUmKt7e38uyzz5qMWQghKjoZAiREOTdixAguX77M0KFDWbdundHd3+v2799PXFwcY8eONZoX8OijjxISEsKqVauK7DN+/HjDv1UqFePHjyc3N5cNGzYY5evXrx9+fn6G161bt6ZNmzasXr0agMTERDZt2sSTTz5JWloaWq0WrVZLQkICPXv25OzZs0WGIeXm5qLRaG5b7rS0NOzs7Mw6z2HOnDkkJCQwbdo0o/Rr165x+PBhRowYgZubmyG9cePG9OjRw1D2GyUmJhrKrtVqSUlJueXn3phPq9WSlpZWJI+tra3h3xkZGWi1Wtq3b4+iKBw6dMjwXlpaGmq12qiHp7ieeeYZTp8+bRjqs2zZMpydnXnwwQdLHFdxVKtWzaiXyMnJiWHDhnHo0CFiYmJM7nOrv11xWVhYYG1tDRQMKUpMTCQ/P5+WLVty8ODBezq2EEKUV9IAEKKcS0xM5Mcff2ThwoU0bdqUl19+ucjF5cWLFwEIDg4usn9ISIjh/evUajVBQUFGaXXr1gUwGmMNUKdOnSLHrFu3riHfuXPnUBSFt99+G09PT6Pt+sVZXFyc0f7Jyckmx2jfqF27dly9epV33nmHS5cu3fGiurhSUlKYMWMGoaGhhqFM192uPuvVq4dWqyUjI8MoPTg42KjsNw6BuVFGRkaRenr22WeL5Lt06ZKhAeLg4ICnpyddunQxxH5du3bt0Ov1vPzyy5w/fx6tVktSUlKx6sLT05NHH32UBQsWALBgwQKGDx+OWl30J+Ju4yqO2rVro1KpjNJu9X28/jm3+tuVxMKFC2ncuDE2Nja4u7vj6enJqlWrSvX7JoQQ5YnMARCinPvkk08YOHAgAN9++y1t27YlLCyMr776qowjK6DX6wF49dVX6dmzp8k8tWvXNnodExNzy7zXTZw4kYiICN577z2mT59eOsHe4KOPPkKtVvPaa6+RkJBwz8dbtmwZTk5Ohtdnzpxh3LhxRfLZ2Njw119/GaVt27bNaO6FTqejR48eJCYm8sYbbxASEoK9vT3R0dGMGDHCUOcAgwcP5uDBg8yePZtvv/22xPE/++yzDBs2jAkTJrB161bmz5/Ptm3bjPIUJy5zKs2/3Y8//siIESPo168fr732Gl5eXlhYWBAeHs758+dLKWIhhChfpAEgRDnXuXNnw79btWrFuHHjmDNnDsOGDaNt27YABAQEABAREUG3bt2M9o+IiDC8f51er+fChQuGu6xQcMEKFJncevbs2SIxnTlzxpDvek+ClZXVLe963+jKlSukpaVRr1692+aztbVl3rx5HDp0CGdnZ6ZNm8aRI0d49dVX7/gZd3L16lU+//xzwsPDcXR0LHIReWN93uz06dN4eHhgb29vlN65c2c8PDwMr281JMfCwqJIPSUnJxu9PnbsGGfOnGHhwoUMGzbMkL5+/foix1Or1Xz66accO3aMyMhIvvrqK2JjY3n66adNfv6t9OrVCxsbGwYPHkzHjh2pVatWkQZAceIqjuu9SDf2Atzq+3inv11xLV26lKCgIJYvX270+fc6tEgIIcozGQIkRAXzwQcf4Ovry3PPPUd+fj4ALVu2xMvLi7lz55KTk2PI+88//3Dq1CkeffTRIsf58ssvDf9WFIUvv/wSKyurIuO+V65caTSGf+/evezZs4devXoB4OXlRdeuXfnmm2+4du1akc+5eXnJJUuWABRpqJgSFhbGpUuX+PHHH+nevTstWrS44z53Y/r06Xh7e/PCCy+YfN/X15emTZuycOFCo4vz48ePs27dOh555JFSieNWrj/cTblhWVZFUfj8889N5p89ezabNm3ip59+onv37nTo0KHYn2lpacmwYcM4evSoySFJJYnrbl2
"text/plain": [
"<Figure size 1000x600 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(10, 6))\n",
"sns.heatmap(df.select_dtypes(include=['float64', 'int64']).corr(), annot=True, fmt=\".2f\", cmap=\"coolwarm\")\n",
"plt.title(\"Корреляционная матрица\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 7,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Корреляция признаков с целевой переменной 'price' для 'cut', 'color', 'clarity':\n",
"price 1.000000\n",
"clarity_SI2 0.128427\n",
"color_I 0.097130\n",
"cut_Premium 0.095685\n",
"color_J 0.081714\n",
"color_H 0.059229\n",
"clarity_SI1 0.008940\n",
"color_G 0.008564\n",
"cut_Very Good 0.006589\n",
"cut_Good -0.000307\n",
"clarity_VS2 -0.001066\n",
"clarity_VS1 -0.009879\n",
"color_F -0.024166\n",
"clarity_IF -0.049593\n",
"clarity_VVS2 -0.052375\n",
"clarity_VVS1 -0.095261\n",
"cut_Ideal -0.097160\n",
"color_E -0.101101\n",
"Name: price, dtype: float64\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1cAAAJmCAYAAACwk3pMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd1gUx//HX0fvIL13BAEFe8Hea2zRWCL2khijMRpjElvyTYxRY4umaey9xt57wY4FsYIFQZpSjg63vz8O7ji4Q7BEkt++nmefB/ZmPvvemc/M7OyUlQiCICAiIiIiIiIiIiIiIiLyWmi9awEiIiIiIiIiIiIiIiL/BcTOlYiIiIiIiIiIiIiIyBtA7FyJiIiIiIiIiIiIiIi8AcTOlYiIiIiIiIiIiIiIyBtA7FyJiIiIiIiIiIiIiIi8AcTOlYiIiIiIiIiIiIiIyBtA7FyJiIiIiIiIiIiIiIi8AcTOlYiIiIiIiIiIiIiIyBtA7FyJiIiIiIiIiIiIiIi8AcTOlYiIiIiIiAgA06dPRyKRvGsZbwR3d3cGDRr0Rm0eP34ciUTC8ePH36jdfzMrVqxAIpHw8OHDdy1FRKRSIHauRP7VFFXqly5dKvXbn3/+iUQioVu3bhQUFLwDdSIiIpWVorrjbREbG8v06dMJDw8v9dugQYNo3rz5K9l927rfFu7u7kyfPv2V4r5Oev0TrFu3jvnz579y/KIO29vunPxbfeffqlvk/y9i50rkP8n27dv56KOPaNKkCRs2bEBbW/tdSxIREfl/RGxsLDNmzFDbuRL599K0aVOysrJo2rSp4tzrdq7+7QwYMICsrCzc3NzetRQRkUqB2LkS+c9x/Phx+vbti7+/P7t27cLAwOBdSxIRERH5f4cgCGRlZb1rGW+E7OxsZDIZWlpaGBgYoKUlPj5lZGQAoK2tjYGBgTi6JCJSiFg7iPynCA8Pp2vXrjg4OHDgwAHMzc1Lhdm8eTO1a9fG0NAQa2trPvzwQ54+faoSZtCgQZiYmBAVFUW7du0wNjbG0dGRb7/9FkEQFOEePnyIRCJhzpw5zJs3Dzc3NwwNDWnWrBk3b94sde3bt2/z/vvvY2lpiYGBAXXq1GHnzp1q76V58+ZIJJJSx4oVK1TC/frrrwQGBmJkZKQSbsuWLSq2AgMDS11jzpw5paajqJs/L5PJqFGjhtrrHz16lCZNmmBsbIyFhQVdu3YlMjJSJUzROo6kpCSV85cuXSplsyjtS7Jly5ZSax1OnTpFr169cHV1RV9fHxcXFz777DO1D3RbtmyhTp06mJqaqqTTnDlzSoUtTlF66OnpkZiYqPLbuXPnFHaKT00tj65Bgwapzd/iR1EeuLu707lzZw4ePEhwcDAGBgb4+/uzbds2tVrLk3cVSefc3FymTp1K7dq1MTc3x9jYmCZNmnDs2LEy0644RWWlrPsszpv0/4qwb98+mjVrhqmpKWZmZtStW5d169Ypfte0jqd58+aKqWvHjx+nbt26AAwePFij9jfJy3RD+eo+deTn5/Pdd9/h5eWFvr4+7u7ufPXVV+Tk5KiEK/LTAwcOUKdOHQwNDfn999/f6H0WIZPJWLBgAdWrV8fAwAAbGxvat2+vdop4Ec+fP2fChAlUr14dExMTzMzM6NChA9euXVMJVzRNb8OGDXzzzTc4OTlhZGREWlpaqTVXzZs3Z8+ePTx69EiRz+7u7kilUoyNjRk7dmwpHTExMWhrazNz5swK3bO69XBSqRR7e/tXXgdW0brlxIkTfPzxx9ja2uLs7KzyW8lyXB6fPH/+PO3bt8fc3BwjIyOaNWvGmTNnKnwfIiKVCZ13LUBE5E3x4MED2rdvj76+PgcOHMDBwaFUmBUrVjB48GDq1q3LzJkziY+PZ8GCBZw5c4arV69iYWGhCFtQUED79u1p0KABP/30E/v372fatGnk5+fz7bffqthdtWoV6enpjB49muzsbBYsWEDLli25ceMGdnZ2AERERBASEoKTkxNffvklxsbGbNq0iW7durF161a6d+9eSq+fnx9ff/01AElJSXz22Wcqv2/cuJGPP/6Y5s2bM2bMGIyNjYmMjOSHH3543eRUYfXq1dy4caPU+cOHD9OhQwc8PT2ZPn06WVlZLFq0iJCQEK5cuYK7u/sb1VGSzZs3k5mZyUcffYSVlRUXLlxg0aJFxMTEsHnzZkW4c+fO0bt3b4KCgvjxxx8xNzdXm55loa2tzZo1a1TiLF++HAMDA7Kzsyusa+TIkbRu3VoRZ8CAAXTv3p0ePXooztnY2Cj+vnfvHh988AGjRo1i4MCBLF++nF69erF//37atGmjUbemvKsIaWlpLF26lL59+zJ8+HDS09NZtmwZ7dq148KFCwQHB5fbVt++fenYsSMAe/fuZf369RrD/tP+v2LFCoYMGUJAQACTJ0/GwsKCq1evsn//fvr161duO9WqVePbb79l6tSpjBgxgiZNmgDQqFGjV9L1JnRXpO4rybBhw1i5ciXvv/8+n3/+OefPn2fmzJlERkayfft2lbB37tyhb9++jBw5kuHDh+Pr6/tW7nno0KGsWLGCDh06MGzYMPLz8zl16hRhYWHUqVNHbZyoqCh27NhBr1698PDwID4+nt9//51mzZpx69YtHB0dVcJ/99136OnpMWHCBHJyctDT0ytl8+uvvyY1NZWYmBjmzZsHgImJCSYmJnTv3p2NGzfy888/q0xNX79+PYIg0L9//9dOh7lz5xIfH/9aNipSt3z88cfY2NgwdepUxciVOsrjk0ePHqVDhw7Url2badOmoaWlxfLly2nZsiWnTp2iXr16r3VfIiLvDEFE5F/M8uXLBUDYvXu34OXlJQBC27Zt1YbNzc0VbG1thcDAQCErK0txfvfu3QIgTJ06VXFu4MCBAiCMGTNGcU4mkwmdOnUS9PT0hMTEREEQBCE6OloABENDQyEmJkYR9vz58wIgfPbZZ4pzrVq1EqpXry5kZ2er2GzUqJHg4+NTSm9ISIjQokULxf9F11q+fLniXN++fQULCwuV+zl27JgACJs3b1aca9asmRAQEFDqGrNnzxYAITo6WnGuKE2LzmVnZwuurq5Chw4dSl0/ODhYsLW1FZKTkxXnrl27JmhpaQmhoaGKc9OmTRMARboVcfHixVI2Bw4cKBgbG5fSunnzZgEQjh07pjiXmZlZKtzMmTMFiUQiPHr0SHFu8uTJAiDExcUpzhWl5+zZs0vZKE5RevTt21eoXr264nxGRoZgZmYm9OvXTwCEixcvVlhXcQBh2rRpan9zc3MTAGHr1q2Kc6mpqYKDg4NQs2bNUlrLk3cVSef8/HwhJydHJdyLFy8EOzs7YciQIWo1l+Tu3bsCIMyZM0dxTp3/FfEm/b88pKSkCKampkL9+vVV7AmCvJwW4ebmJgwcOLBU/GbNmgnNmjVT/K/Ot98G5dFdkbqvqKwWER4eLgDCsGHDVGxPmDBBAISjR48qzhX56f79+9/oPZbk6NGjAiB8+umnpX4rK6+ys7OFgoIClfDR0dGCvr6+8O233yrOFfmQp6dnqbJc9Fvx8tGpUyfBzc2tlJYDBw4IgLBv3z6V8zVq1FDxlfJSMm8SEhIEU1NTRfkurqm8VLRuady4sZCfn69io2S9Ux6flMlkgo+Pj9CuXTuVPMvMzBQ8PDyENm3aVPheREQqC+K0QJH/BIMGDeLJkyf069ePgwcPqoxaFHHp0iUSEhL4+OOPVdZhderUCT8/P/bs2VMqzieffKL4WyKR8Mknn5Cbm8vhw4dVwnXr1g0nJyfF//Xq1aN+/frs3bsXkE9HOXr0KL179yY9PZ2kpCSSkpJITk6mXbt23Lt3r9T0nNzcXPT19cu87/T0dIyMjN7qurLFixeTnJzMtGnTVM7HxcURHh7OoEGDsLS0VJyvUaMGbdq0Udx7cZ4/f66496SkJFJTUzVet3i4pKQk0tPTS4UxNDRU/J2RkUFSUhKNGjVCEASuXr2q+C09PR0tLa0y386/jAEDBnD79m3FtKOtW7dibm5Oq1atXllXRXB0dFQZ3TQzMyM0NJSrV6/y7NkztXE
"text/plain": [
"<Figure size 1000x600 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df_dummies = pd.get_dummies(df, columns=['cut', 'color', 'clarity'], drop_first=True)\n",
"\n",
"selected_columns = ['price'] + [col for col in df_dummies.columns if 'cut_' in col or 'color_' in col or 'clarity_' in col]\n",
"correlation_matrix = df_dummies[selected_columns].corr()\n",
"\n",
"correlations = df_dummies[selected_columns].corr()['price'].sort_values(ascending=False)\n",
"print(\"Корреляция признаков с целевой переменной 'price' для 'cut', 'color', 'clarity':\")\n",
"print(correlations)\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
"sns.heatmap(correlation_matrix, annot=True, fmt=\".2f\", cmap=\"coolwarm\")\n",
"plt.title(\"Корреляционная матрица для 'cut', 'color', 'clarity' и 'price'\")\n",
"plt.show()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вывод такое:<br>\n",
"Сильное влияние на цену оказывают характиристики 'carat', 'x', 'y', 'z'.<br>\n",
"Низкое влияние оказывают 'table', 'cut', 'color', 'clarity'.<br>\n",
"А признак 'depth' не оказывает влияни вовсе."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Выполним разбиение каждого набора данных на обучающую, контрольную и тестовую выборки для устранения проблемы просачивания данных"
]
},
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 8,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(37760, 8091, 8092)"
]
},
2024-11-30 13:19:01 +04:00
"execution_count": 8,
2024-11-01 21:16:02 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_data, temp_data = train_test_split(df, test_size=0.3, random_state=42)\n",
"validation_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)\n",
"\n",
"train_size = len(train_data)\n",
"validation_size = len(validation_data)\n",
"test_size = len(test_data)\n",
"\n",
"train_size, validation_size, test_size"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Оцень сбалансированность выборки"
]
},
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 9,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABckAAAIDCAYAAADffZa8AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3wU1f7/8fduym56gJBCS2hSQxcMCKFp6GJBCChFxIqiKF7xKiCo6FUURATlIlhAEeSiIhdUFL8qsdBsiILSFENTQm/J+f3hb+fuJhtIQsgm2dfz8ZhHNrNnzpwpO5+Zz5ydtRljjAAAAAAAAAAA8EN2XzcAAAAAAAAAAABfIUkOAAAAAAAAAPBbJMkBAAAAAAAAAH6LJDkAAAAAAAAAwG+RJAcAAAAAAAAA+C2S5AAAAAAAAAAAv0WSHAAAAAAAAADgt0iSAwAAAAAAAAD8FklyAAAAAAAAAIDfIkkOAAAAlKCkpCQNHTrU1804b0eOHNGNN96o+Ph42Ww23XXXXb5uEgAAAFAkJMkBAADKsV9++UU333yzatWqJafTqcjISLVr107Tpk3T8ePHC13f888/r3nz5hV/Q0uhefPmyWazeQyxsbHq1KmT/vvf//q6eT732GOPad68ebr11lv16quv6vrrr7/g88zOztbcuXPVsWNHVaxYUQ6HQ0lJSRo2bJjWrl17wefvzaZNmzRhwgRt377dJ/MHAADA+Qv0dQMAAABwYbz33nvq16+fHA6HBg8erMaNG+vUqVP67LPPNGbMGP3www968cUXC1Xn888/r5iYmHLRE7qgJk6cqJo1a8oYoz179mjevHnq0aOH3n33XfXq1cvXzfOZjz76SJdcconGjx9fIvM7fvy4rrrqKq1YsUIdOnTQAw88oIoVK2r79u1688039fLLL2vnzp2qVq1aibTHZdOmTXr44YfVsWNHJSUllei8AQAAUDxIkgMAAJRD27Zt04ABA5SYmKiPPvpICQkJ1nu33367tm7dqvfee8+HLbywjh49qrCwsGKpq3v37mrVqpX1//DhwxUXF6fXX3/dr5Pke/fuVcOGDYutvjNnzignJ0fBwcFe3x8zZoxWrFihZ555Js+jXcaPH69nnnmmWNpRnPsOAAAAygYetwIAAFAO/etf/9KRI0c0Z84cjwS5S506dTRq1Cjr/7lz56pz586KjY2Vw+FQw4YNNXPmTI9pkpKS9MMPP+iTTz6xHj/SsWNH6/2DBw/qrrvuUvXq1eVwOFSnTh098cQTysnJ8ajnwIEDuv766xUZGano6GgNGTJE33zzjWw2W55HuXz00Udq3769wsLCFB0drSuuuEI//vijR5kJEybIZrNp06ZNGjhwoCpUqKBLL71Uc+fOlc1m04YNG/Is/2OPPaaAgAD9/vvvBV2llujoaIWEhCgw0LO/yVNPPaW2bduqUqVKCgkJUcuWLbV48eJz1vfnn3/q3nvvVXJyssLDwxUZGanu3bvrm2++8Si3evVq2Ww2vfnmm3r00UdVrVo1OZ1OdenSRVu3bs1T75dffqkePXqoQoUKCgsLU5MmTTRt2jSPMps3b9Y111yjihUryul0qlWrVnrnnXfO2l5XO7Zt26b33nvP2hdcjxvZu3evdSPB6XSqadOmevnllz3q2L59u2w2m5566ilNnTpVtWvXlsPh0KZNm7zO87ffftMLL7ygyy67zOuzzwMCAnTvvfdavch37Nih2267TfXq1VNISIgqVaqkfv365XkkiuuROp988oluu+02xcbGFqqOefPmqV+/fpKkTp06Weti9erVZ12HAAAAKF3oSQ4AAFAOvfvuu6pVq5batm1boPIzZ85Uo0aN1KdPHwUGBurdd9/VbbfdppycHN1+++2SpKlTp+qOO+5QeHi4/vnPf0qS4uLiJEnHjh1Tamqqfv/9d918882qUaOG1qxZo7Fjx+qPP/7Q1KlTJUk5OTnq3bu3vvrqK916662qX7++3n77bQ0ZMiRPmz788EN1795dtWrV0oQJE3T8+HFNnz5d7dq10/r16/M82qJfv36qW7euHnvsMRljdM011+j222/X/Pnz1bx5c4+y8+fPV8eOHVW1atVzrpusrCzt379fxhjt3btX06dP15EjR3Tdddd5lJs2bZr69OmjQYMG6dSpU3rjjTfUr18/LVu2TD179sy3/l9//VVLly5Vv379VLNmTe3Zs0cvvPCCUlNTtWnTJlWpUsWj/OOPPy673a57771XWVlZ+te//qVBgwbpyy+/tMp88MEH6tWrlxISEjRq1CjFx8frxx9/1LJly6ybIz/88IPatWunqlWr6v7771dYWJjefPNN9e3bV2+99ZauvPJKr+1t0KCBXn31Vd19992qVq2a7rnnHklS5cqVdfz4cXXs2FFbt27VyJEjVbNmTS1atEhDhw7VwYMHPW7MSH/fnDlx4oRuuukmORwOVaxY0es8//vf/+rMmTMFfu75119/rTVr1mjAgAGqVq2atm/frpkzZ6pjx47atGmTQkNDPcrfdtttqly5ssaNG6ejR48WuI4OHTrozjvv1LPPPqsHHnhADRo0sNYRAAAAyhADAACAciUrK8tIMldccUWBpzl27FiecWlpaaZWrVoe4xo1amRSU1PzlJ00aZIJCwszP//8s8f4+++/3wQEBJidO3caY4x56623jCQzdepUq0x2drbp3LmzkWTmzp1rjW/WrJmJjY01Bw4csMZ98803xm63m8GDB1vjxo8fbySZ9PT0PO1KT083VapUMdnZ2da49evX55mXN3PnzjWS8gwOh8PMmzcvT/nc6/DUqVOmcePGpnPnzh7jExMTzZAhQ6z/T5w44dE+Y4zZtm2bcTgcZuLEida4jz/+2EgyDRo0MCdPnrTGT5s2zUgy3333nTHGmDNnzpiaNWuaxMRE89dff3nUm5OTY73u0qWLSU5ONidOnPB4v23btqZu3bpnXTeu5ejZs6fHuKlTpxpJ5rXXXvNYDykpKSY8PNwcOnTIWj5JJjIy0uzdu/ec87r77ruNJLNhw4ZzljXG+/6ckZFhJJlXXnnFGufaxpdeeqk5c+ZMkepYtGiRkWQ+/vjjArUNAAAApQ+PWwEAAChnDh06JEmKiIgo8DQhISHWa1fP6dTUVP3666/Kyso65/SLFi1S+/btVaFCBe3fv98aunbtquzsbP3f//2fJGnFihUKCgrSiBEjrGntdrvVW93ljz/+0MaNGzV06FCP3sVNmjTRZZddpuXLl+dpwy233JJn3ODBg7V79259/PHH1rj58+crJCREV1999TmXS5JmzJihDz74QB988IFee+01derUSTfeeKOWLFniUc59Hf7111/KyspS+/bttX79+rPW73A4ZLf/fVqenZ2tAwcOKDw8XPXq1fM67bBhwzye292+fXtJf/dIl6QNGzZo27ZtuuuuuxQdHe0xrc1mk/T3I14++ugjXXvttTp8+LC1vQ4cOKC0tDRt2bKlSI+iWb58ueLj45Wenm6NCwoK0p133qkjR47ok08+8Sh/9dVXq3Llyuest7D7tPu2OH36tA4cOKA6deooOjra6zodMWKEAgICzqsOAAAAlF08bgUAAKCciYyMlCQdPny4wNN8/vnnGj9+vDIyMnTs2DGP97KyshQVFXXW6bds2aJvv/0234Tn3r17Jf39nOeEhIQ8j7uoU6eOx/87duyQJNWrVy9PXQ0aNNDKlSvz/MBizZo185S97LLLlJCQoPnz56tLly7KycnR66+/riuuuKLACdfWrVt7/HBnenq6mjdvrpEjR6pXr15WwnrZsmV65JFHtHHjRp08edIq70pM5ycnJ0fTpk3T888/r23btik7O9t6r1KlSnnK16hRw+P/ChUqSPo7MS9Jv/zyiySpcePG+c5z69atMsbooYce0kMPPeS1zN69ewv0OBp3O3bsUN26da2kv4vr8SOu7eribZt5U9h9+vjx45o8ebLmzp2r33//XcYY6z1vN328taOwdQAAAKDsIkkOAABQzkRGRqpKlSr6/vvvC1T+l19+UZcuXVS/fn09/fTTql69uoKDg7V
"text/plain": [
"<Figure size 1800x500 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABcMAAAIsCAYAAAA+pbGWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB2rUlEQVR4nOzde3zP9f//8ft7Y2eGZgeHzClnVhMfSlTLnHOokD5Ykm+1pCWlTwwRlTQhShmJiKR0QBYd9yk5lSSHCGlzasOwsb1+f/Tz/vRuww6v7bX3a7fr5fK+5P18PV+v9+P1Qvd57LXny2EYhiEAAAAAAAAAAGzMw+oCAAAAAAAAAAAobjTDAQAAAAAAAAC2RzMcAAAAAAAAAGB7NMMBAAAAAAAAALZHMxwAAAAAAAAAYHs0wwEAAAAAAAAAtkczHAAAAAAAAABgezTDAQAAAAAAAAC2RzMcAAAAAAAAAGB7NMMBAACAUiY8PFyDBw+2uowiO336tO677z6FhobK4XBoxIgRVpcEAACAMoxmOAAAQBm3d+9eDRs2THXq1JGPj48qVqyoG264QdOnT9fZs2cLfLxXXnlF8+fPN7/QUmj+/PlyOBwur+DgYN1888365JNPrC7Pcs8++6zmz5+vBx54QAsXLtS///3vYv/M7OxsJSYmqkOHDqpSpYq8vb0VHh6umJgYff/994U65rPPPquVK1eaWygAAABKnMMwDMPqIgAAAGCNjz76SHfeeae8vb01cOBANW3aVFlZWfrqq6/07rvvavDgwXrttdcKdMymTZsqKChIGzZsKJ6iS5H58+crJiZGEyZMUO3atWUYhlJTUzV//nz99NNPWrVqlbp161bg44aHh6tDhw5u/02Ff/3rXypXrpy++uqrEvm8s2fPqnfv3lq9erVuuukmde/eXVWqVNH+/fv1zjvvaNeuXTpw4IBq1KhRoOMGBATojjvucPvfDwAAgLKunNUFAAAAwBr79u1Tv379VKtWLX322WcKCwtzbnvooYe0Z88effTRRxZWWLwyMjLk7+9vyrE6d+6sli1bOt8PGTJEISEhevvttwvVDLeLI0eOqHHjxqYd78KFC8rJyZGXl1ee2x9//HGtXr1aL730Uq4lWeLj4/XSSy+ZVgsAAADcD8ukAAAAlFHPP/+8Tp8+rTfeeMOlEX5RvXr19MgjjzjfJyYm6pZbblFwcLC8vb3VuHFjzZ4922Wf8PBw/fTTT/r888+dy4Z06NDBuT0tLU0jRoxQzZo15e3trXr16um5555TTk6Oy3GOHz+uf//736pYsaIqVaqkQYMGadu2bXI4HLnuzv3ss8/Url07+fv7q1KlSrr99tv1888/u8wZN26cHA6HduzYobvvvluVK1fWjTfeqMTERDkcDm3ZsiXX+T/77LPy9PTU77//nt9L6lSpUiX5+vqqXDnXe0+mTp2qtm3b6qqrrpKvr68iIyO1fPnyKx7vxIkTGjlypJo1a6aAgABVrFhRnTt31rZt21zmbdiwQQ6HQ++8844mTZqkGjVqyMfHR7feeqv27NmT67jffvutunTposqVK8vf31/NmzfX9OnTXebs3LlTd9xxh6pUqSIfHx+1bNlSH3zwwWXrvVjHvn379NFHHzn/LOzfv1/SX03yi98w8PHxUYsWLbRgwQKXY+zfv18Oh0NTp05VQkKC6tatK29vb+3YsSPPzzx06JBeffVV3XbbbXmuTe7p6amRI0c67wofPHiwwsPDc827+GflIofDoYyMDC1YsMB5HnZYzx0AAKAs4s5wAACAMmrVqlWqU6eO2rZtm6/5s2fPVpMmTdSjRw+VK1dOq1at0oMPPqicnBw99NBDkqSEhAQ9/PDDCggI0H/+8x9JUkhIiCTpzJkzat++vX7//XcNGzZMV199tb755huNHj1af/zxhxISEiRJOTk56t69u7777js98MADatiwod5//30NGjQoV03r1q1T586dVadOHY0bN05nz57VjBkzdMMNN2jz5s25mp133nmn6tevr2effVaGYeiOO+7QQw89pEWLFunaa691mbto0SJ16NBB1atXv+K1SU9P17Fjx2QYho4cOaIZM2bo9OnTuueee1zmTZ8+XT169NCAAQOUlZWlJUuW6M4779SHH36orl27XvL4v/76q1auXKk777xTtWvXVmpqql599VW1b99eO3bsULVq1VzmT5kyRR4eHho5cqTS09P1/PPPa8CAAfr222+dcz799FN169ZNYWFheuSRRxQaGqqff/5ZH374ofObID/99JNuuOEGVa9eXU8++aT8/f31zjvvqGfPnnr33XfVq1evPOtt1KiRFi5cqEcffVQ1atTQY489JkmqWrWqzp49qw4dOmjPnj2KjY1V7dq1tWzZMg0ePFhpaWku34CR/vomzLlz53T//ffL29tbVapUyfMzP/nkE124cMH0dckXLlyo++67T61atdL9998vSapbt66pnwEAAIASYgAAAKDMSU9PNyQZt99+e773OXPmTK6x6Ohoo06dOi5jTZo0Mdq3b59r7jPPPGP4+/sbu3btchl/8sknDU9PT+PAgQOGYRjGu+++a0gyEhISnHOys7ONW265xZBkJCYmOscjIiKM4OBg4/jx486xbdu2GR4eHsbAgQOdY/Hx8YYko3///rnq6t+/v1GtWjUjOzvbObZ58+Zcn5WXxMREQ1Kul7e3tzF//vxc8/95DbOysoymTZsat9xyi8t4rVq1jEGDBjnfnzt3zqU+wzCMffv2Gd7e3saECROcY+vXrzckGY0aNTIyMzOd49OnTzckGT/++KNhGIZx4cIFo3bt2katWrWMP//80+W4OTk5zl/feuutRrNmzYxz5865bG/btq1Rv379y16bi+fRtWtXl7GEhARDkvHWW2+5XIc2bdoYAQEBxsmTJ53nJ8moWLGiceTIkSt+1qOPPmpIMrZs2XLFuYZhGIMGDTJq1aqVa/zin5W/8/f3d/n9AAAAgHtimRQAAIAy6OTJk5KkChUq5HsfX19f568v3gndvn17/frrr0pPT7/i/suWLVO7du1UuXJlHTt2zPmKiopSdna2vvjiC0nS6tWrVb58eQ0dOtS5r4eHh/Pu84v++OMPbd26VYMHD3a5W7h58+a67bbb9PHHH+eq4f/+7/9yjQ0cOFCHDx/W+vXrnWOLFi2Sr6+v+vTpc8XzkqRZs2bp008/1aeffqq33npLN998s+677z6tWLHCZd7fr+Gff/6p9PR0tWvXTps3b77s8b29veXh8deX7tnZ2Tp+/LgCAgLUoEGDPPeNiYlxWVe7Xbt2kv66w1yStmzZon379mnEiBGqVKmSy74Xlwg5ceKEPvvsM9111106deqU8/fr+PHjio6O1u7duwu1hMzHH3+s0NBQ9e/f3zlWvnx5DR8+XKdPn9bnn3/uMr9Pnz6qWrXqFY9bmD/TAAAAKFtYJgUAAKAMqlixoiTp1KlT+d7n66+/Vnx8vJKTk3XmzBmXbenp6QoMDLzs/rt379YPP/xwycbmkSNHJEm//fabwsLC5Ofn57K9Xr16Lu9/++03SVKDBg1yHatRo0Zas2ZNrodk1q5dO9fc2267TWFhYVq0aJFuvfVW5eTk6O2339btt9+e78Zqq1atXB6g2b9/f1177bWKjY1Vt27dnI3pDz/8UBMnTtTWrVuVmZnpnP/3NarzkpOTo+nTp+uVV17Rvn37lJ2d7dx21VVX5Zp/9dVXu7yvXLmypL8a8JK0d+9eSVLTpk0v+Zl79uyRYRgaM2aMxowZk+ecI0eO5GsZmb/77bffVL9+fWdz/6JGjRo5t/9dXr9neSnMn2kAAACULTTDAQAAyqCKFSuqWrVq2r59e77m7927V7feeqsaNmyoadOmqWbNmvLy8tLHH3+sl156KdcDMPOSk5Oj2267TaNGjcpz+zXXXFOgcyiMv9+ZfZGnp6fuvvtuzZ07V6+88oq+/vprHT58ONd63wXh4eGhm2++WdOnT9fu3bvVpEkTffnll+rRo4duuukmvfLKKwoLC1P58uWVmJioxYsXX/Z4zz77rMaMGaN7771
"text/plain": [
"<Figure size 1800x500 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABcMAAAHvCAYAAACYMccRAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABY60lEQVR4nO3deZiWVf0/8PewDTsu7GqAYuJOYRpuuKAIuFBmipZIZqaSmdtX/SW4prmFmVuWqCVpLrmHCy59VSpNXHL7qkEuBCgGKKsw9+8PL54aBxTGgYFnXq/rei55zn3uc58zDH5m3nPPuSuKoigCAAAAAABlrFF9TwAAAAAAAFY2YTgAAAAAAGVPGA4AAAAAQNkThgMAAAAAUPaE4QAAAAAAlD1hOAAAAAAAZU8YDgAAAABA2ROGAwAAAABQ9oThAAAAAACUPWE4AACsYt27d89hhx1W39P43D788MN897vfTefOnVNRUZHjjjuuvqe0wh599NFUVFTk0Ucfre+pAACwkgnDAQDK3BtvvJEjjzwyG264YZo3b562bdtmhx12yKWXXpp58+at8HhXXHFFrrvuurqf6GrouuuuS0VFRbVXx44ds+uuu+aPf/xjfU+v3v3kJz/Jddddl6OOOiq/+c1v8u1vf3ulX3Px4sUZM2ZMdtlll6yzzjqprKxM9+7dM3z48Dz99NMr/foAAKy5mtT3BAAAWHnuvffeHHDAAamsrMyhhx6aLbbYIgsXLszjjz+ek046KS+++GJ++ctfrtCYV1xxRdq3b18WdzYvr7POOis9evRIURSZNm1arrvuugwaNCh333139t577/qeXr15+OGH89WvfjWjRo1aJdebN29evv71r2fcuHHZeeedc9ppp2WdddbJ5MmT8/vf/z7XX3993nzzzay//vqrZD4AAKxZhOEAAGVq0qRJOeigg9KtW7c8/PDD6dKlS+nYMccck9dffz333ntvPc5w5ZozZ05atWpVJ2MNHDgw22yzTen94Ycfnk6dOuV3v/tdgw7Dp0+fns0226zOxlu0aFGqqqrSrFmzpR4/6aSTMm7cuPzsZz+rsSXLqFGj8rOf/azO5lJbdfl5BwBA3bJNCgBAmbrgggvy4Ycf5te//nW1IHyJnj175oc//GHp/ZgxY7LbbrulY8eOqayszGabbZYrr7yy2jndu3fPiy++mMcee6y0bcguu+xSOj5z5swcd9xx2WCDDVJZWZmePXvmpz/9aaqqqqqNM2PGjHz7299O27Zts9Zaa2XYsGF57rnnUlFRUWMLlocffjg77bRTWrVqlbXWWiv77bdfXn755Wp9zjjjjFRUVOSll17KwQcfnLXXXjs77rhjxowZk4qKikycOLHG+n/yk5+kcePGeeedd5b3Q1qy1lprpUWLFmnSpPq9JRdddFG23377rLvuumnRokX69OmTW2+99TPHe//993PiiSdmyy23TOvWrdO2bdsMHDgwzz33XLV+S/a3/v3vf59zzz0366+/fpo3b57dd989r7/+eo1x//KXv2TQoEFZe+2106pVq2y11Va59NJLq/V55ZVX8o1vfCPrrLNOmjdvnm222SZ33XXXp853yTwmTZqUe++9t/S5MHny5CQfh+RLfmDQvHnzbL311rn++uurjTF58uRUVFTkoosuyujRo7PRRhulsrIyL7300lKv+fbbb+fqq6/OHnvssdS9yRs3bpwTTzyx2l3hEydOzMCBA9O2bdu0bt06u+++e/785z9/6tqWuOWWW9KnT5+0aNEi7du3z7e+9a0anyuHHXZYWrdunTfeeCODBg1KmzZtcsghhyzX+AAArHruDAcAKFN33313Ntxww2y//fbL1f/KK6/M5ptvnn333TdNmjTJ3XffnaOPPjpVVVU55phjkiSjR4/OD37wg7Ru3Tr/7//9vyRJp06dkiRz585Nv3798s477+TII4/MF77whTz55JM59dRT869//SujR49OklRVVWWfffbJX//61xx11FHp1atX7rzzzgwbNqzGnB566KEMHDgwG264Yc4444zMmzcvl112WXbYYYc888wz6d69e7X+BxxwQDbeeOP85Cc/SVEU+cY3vpFjjjkmN954Y770pS9V63vjjTdml112yXrrrfeZH5tZs2blvffeS1EUmT59ei677LJ8+OGH+da3vlWt36WXXpp99903hxxySBYuXJibbropBxxwQO65554MHjx4meP/4x//yB133JEDDjggPXr0yLRp03L11VenX79+eemll9K1a9dq/c8///w0atQoJ554YmbNmpULLrgghxxySP7yl7+U+jz44IPZe++906VLl/zwhz9M586d8/LLL+eee+4p/RDkxRdfzA477JD11lsvp5xySlq1apXf//73GTJkSG677bZ87WtfW+p8N9100/zmN7/Jj370o6y//vo54YQTkiQdOnTIvHnzsssuu+T111/PiBEj0qNHj9xyyy057LDDMnPmzGo/gEk+/iHM/Pnz873vfS+VlZVZZ511lnrNP/7xj1m0aNFy70v+4osvZqeddkrbtm1z8sknp2nTprn66quzyy675LHHHst22223zHOvu+66DB8+PF/5yldy3nnnZdq0abn00kvzxBNPZOLEiVlrrbVKfRctWpQBAwZkxx13zEUXXZSWLVsu1/wAAKgHBQAAZWfWrFlFkmK//fZb7nPmzp1bo23AgAHFhhtuWK1t8803L/r161ej79lnn120atWq+L//+79q7aecckrRuHHj4s033yyKoihuu+22IkkxevToUp/FixcXu+22W5GkGDNmTKm9d+/eRceOHYsZM2aU2p577rmiUaNGxaGHHlpqGzVqVJGkGDp0aI15DR06tOjatWuxePHiUtszzzxT41pLM2bMmCJJjVdlZWVx3XXX1ej/yY/hwoULiy222KLYbbfdqrV369atGDZsWOn9/Pnzq82vKIpi0qRJRWVlZXHWWWeV2h555JEiSbHpppsWCxYsKLVfeumlRZLihRdeKIqiKBYtWlT06NGj6NatW/Hvf/+72rhVVVWlP+++++7FlltuWcyfP7/a8e23377YeOONP/Vjs2QdgwcPrtY2evToIknx29/+ttrHoW/fvkXr1q2L2bNnl9aXpGjbtm0xffr0z7zWj370oyJJMXHixM/sWxRFMWTIkKJZs2bFG2+8UWqbMmVK0aZNm2LnnXcutS35mD7yyCOluXbs2LHYYostinnz5pX63XPPPUWSYuTIkaW2YcOGFUmKU045ZbnmBABA/bJNCgBAGZo9e3aSpE2bNst9TosWLUp/XnIndL9+/fKPf/wjs2bN+szzb7nlluy0005Ze+21895775Ve/fv3z+LFi/OnP/0pSTJu3Lg0bdo0RxxxROncRo0ale4+X+Jf//pXnn322Rx22GHV7hbeaqutsscee+S+++6rMYfvf//7NdoOPfTQTJkyJY888kip7cYbb0yLFi2y//77f+a6kuTyyy/Pgw8+mAcffDC//e1vs+uuu+a73/1ubr/99mr9/vtj+O9//zuzZs3KTjvtlGeeeeZTx6+srEyjRh9/ab548eLMmDEjrVu3ziabbLLUc4cPH15tX+2ddtopycd3mCcfbw8yadKkHHfccdXuYk6SioqKJB9vzfLwww/nm9/8Zj744IPS39eMGTMyYMCAvPbaa7XaQua+++5L586dM3To0FJb06ZNc+yxx+bDDz/MY489Vq3//vvvnw4dOnzmuCvyOb148eI88MADGTJkSDbccMNSe5cuXXLwwQfn8ccfL433SU8//XSmT5+eo48+Os2bNy+1Dx48OL169VrqPvtHHXXUZ84JAID6Z5sUAIAy1LZt2yTJBx98sNznPPHEExk1alQmTJiQuXPnVjs2a9astGvX7lPPf+211/L8888vM9icPn16kuSf//xnunTpUmM7iZ49e1Z7/89//jNJsskmm9QYa9NNN839999f42GFPXr0qNF3jz32SJcuXXLjjTdm9913T1VVVX73u99lv/32W+4fFmy77bbVHqA5dOjQfOlLX8qIESOy9957l4Lpe+65J+ecc06
"text/plain": [
"<Figure size 1800x500 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABcMAAAIICAYAAACimPa2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABqoUlEQVR4nO3dd3gUVfv/8c8mkEJCaAlJ0Eho0iEKigEhKEhoIkUp4hMSiliwISr4FUIRQVQEkaIoPQiCCCJIi4ANK01BHgFBUAw9hJaEJPP7w1/2YU2A9N0c3q/rmkv3zJmZ++zs7h3unT1jsyzLEgAAAAAAAAAABnNzdgAAAAAAAAAAABQ2iuEAAAAAAAAAAONRDAcAAAAAAAAAGI9iOAAAAAAAAADAeBTDAQAAAAAAAADGoxgOAAAAAAAAADAexXAAAAAAAAAAgPEohgMAAAAAAAAAjEcxHAAAAAAAAABgPIrhAAAAQCEKDQ1VdHS0s8PIt3Pnzql///4KCgqSzWbT008/7eyQ7ObMmSObzaaDBw8W6H5btmypli1bFug+AQAA4DwUwwEAAAyyf/9+DRw4UFWrVpWXl5f8/PzUrFkzTZ48WRcvXsz1/qZNm6Y5c+YUfKAuKLOgevlSsWJF3XXXXfrss8+cHZ7TvfLKK5ozZ44effRRzZ8/X//5z38K/Zjp6emaPXu2WrZsqfLly8vT01OhoaGKiYnRjz/+WOjH/7cjR45o5MiR2r59e5EfGwAAAPlXwtkBAAAAoGCsWrVKDzzwgDw9PRUVFaV69eopNTVVX331lZ577jnt2rVL7777bq72OW3aNPn7+xtxZXNOjR49WlWqVJFlWTp69KjmzJmj9u3ba+XKlerYsaOzw3Oazz//XHfccYdiY2OL5HgXL15U165dtWbNGrVo0UIvvviiypcvr4MHD+rDDz/U3LlzdejQId14442FFsO6descHh85ckSjRo1SaGiowsLCCu24AAAAKBwUwwEAAAxw4MAB9ezZU5UrV9bnn3+u4OBg+7rHH39c+/bt06pVq5wYYeE6f/68fHx8CmRf7dq1U+PGje2P+/Xrp8DAQH3wwQfXdTH82LFjqlOnToHtLy0tTRkZGfLw8Mh2/XPPPac1a9bozTffzDIlS2xsrN58880Ci+XfLly4oFKlSl0xNgAAABRPTJMCAABggAkTJujcuXN6//33HQrhmapXr66nnnrK/nj27Nm6++67VbFiRXl6eqpOnTqaPn26wzahoaHatWuXNm/ebJ825PL5kxMTE/X0008rJCREnp6eql69ul599VVlZGQ47OfkyZP6z3/+Iz8/P5UtW1Z9+vTRjh07ZLPZskzB8vnnn6t58+by8fFR2bJldd999+nXX3916DNy5EjZbDbt3r1bDz74oMqVK6c777xTs2fPls1m07Zt27KM/5VXXpG7u7v++uuvnD6ldmXLlpW3t7dKlHC8juT1119X06ZNVaFCBXl7e6tRo0ZaunTpNfd36tQpDRkyRPXr15evr6/8/PzUrl077dixw6Hfpk2bZLPZ9OGHH2rs2LG68cYb5eXlpVatWmnfvn1Z9vvdd9+pffv2KleunHx8fNSgQQNNnjzZoc+ePXt0//33q3z58vLy8lLjxo31ySefXDXezDgOHDigVatW2V8LmfNzHzt2zP6FgZeXlxo2bKi5c+c67OPgwYOy2Wx6/fXXNWnSJFWrVk2enp7avXt3tsf8888/9c477+iee+7Jdm5yd3d3DRky5KpXha9YsUIdOnRQpUqV5OnpqWrVqmnMmDFKT0936NeyZUvVq1dPP/30k1q0aKFSpUrpxRdftK/LfM1v2rRJt912myQpJibG/jzMmTNHsbGxKlmypI4fP54ljocfflhly5ZVcnLyFWMFAABA0eDKcAAAAAOsXLlSVatWVdOmTXPUf/r06apbt646deqkEiVKaOXKlXrssceUkZGhxx9/XJI0adIkPfHEE/L19dX//d//SZICAwMl/XPlbEREhP766y8NHDhQN910k7755hsNGzZMf//9tyZNmiRJysjI0L333qvvv/9ejz76qGrVqqUVK1aoT58+WWLasGGD2rVrp6pVq2rkyJG6ePGipkyZombNmmnr1q0KDQ116P/AAw+oRo0aeuWVV2RZlu6//349/vjjiouL0y233OLQNy4uTi1bttQNN9xwzefmzJkzOnHihCzL0rFjxzRlyhSdO3dODz30kEO/yZMnq1OnTurdu7dSU1O1aNEiPfDAA/r000/VoUOHK+7/999/1/Lly/XAAw+oSpUqOnr0qN555x1FRERo9+7dqlSpkkP/8ePHy83NTUOGDNGZM2c0YcIE9e7dW9999529z/r169WxY0cFBwfrqaeeUlBQkH799Vd9+umn9i9Bdu3apWbNmumGG27Q0KFD5ePjow8//FCdO3fWRx99pC5dumQbb+3atTV//nw988wzuvHGG/Xss89KkgICAnTx4kW1bNlS+/bt06BBg1SlShUtWbJE0dHRSkxMdPgCRvrnS5jk5GQ9/PDD8vT0VPny5bM95meffaa0tLR8zUs+Z84c+fr6avDgwfL19dXnn3+uESNGKCkpSa+99ppD35MnT6pdu3bq2bOnHnroIfvr/N/Pw+jRozVixAg9/PDDat68uSSpadOmuvPOOzV69GgtXrxYgwYNsm+TmpqqpUuXqlu3bvLy8srzWAAAAFBALAAAABRrZ86csSRZ9913X463uXDhQpa2yMhIq2rVqg5tdevWtSIiIrL0HTNmjOXj42P99ttvDu1Dhw613N3drUOHDlmWZVkfffSRJcmaNGmSvU96erp19913W5Ks2bNn29vDwsKsihUrWidPnrS37dixw3Jzc7OioqLsbbGxsZYkq1evXlni6tWrl1WpUiUrPT3d3rZ169Ysx8rO7NmzLUlZFk9PT2vOnDlZ+v/7OUxNTbXq1atn3X333Q7tlStXtvr06WN/nJyc7BCfZVnWgQMHLE9PT2v06NH2to0bN1qSrNq1a1spKSn29smTJ1uSrJ9//tmyLMtKS0uzqlSpYlWuXNk6ffq0w34zMjLs/9+qVSurfv36VnJyssP6pk2bWjVq1Ljqc5M5jg4dOji0TZo0yZJkLViwwOF5CA8Pt3x9fa2kpCT7+CRZfn5+1rFjx655rGeeecaSZG3btu2afS3rf+fuwIED9rbsXuMDBw60SpUq5fAcREREWJKsGTNmZOkfERHh8Pr/4YcfrvhaCg8Pt5o0aeLQtmzZMkuStXHjxhyNAwAAAIWLaVIAAACKuaSkJElS6dKlc7yNt7e3/f8zr4SOiIjQ77//rjNnzlxz+yVLlqh58+YqV66cTpw4YV9at26t9PR0ffHFF5KkNWvWqGTJkhowYIB9Wzc3N/vV55n+/vtvbd++XdHR0Q5XCzdo0ED33HOPVq9enSWGRx55JEtbVFSUjhw5oo0bN9rb4uLi5O3trW7dul1zXJI0depUrV+/XuvXr9eCBQt01113qX///lq2bJlDv8ufw9OnT+vMmTNq3ry5tm7detX9e3p6ys3tnz/D09PTdfLkSfn6+qpmzZrZbhsTE+Mwd3XmFcm///67JGnbtm06cOCAnn76aZUtW9ZhW5vNJumfqVk+//xzde/eXWfPnrWfr5MnTyoyMlJ79+7N0xQyq1evVlBQkHr16mVvK1mypJ588kmdO3dOmzdvdujfrVs3BQQEXHO/eXlN/9vl5ydzzM2bN9eFCxe0Z88eh76enp6KiYnJ87Gkf1573333nfbv329vi4uLU0hIiCIiIvK1bwAAABQMiuEAAADFnJ+fn6R/Cn459fXXX6t169b2ubkDAgLs8yTnpBi+d+9erVmzRgEBAQ5L69atJf0zj7Qk/fHHHwoODlapUqUctq9evbrD4z/++EOSVLNmzSzHql27tk6cOKHz5887tFepUiVL33vuuUfBwcGKi4uT9M80LR988IHuu+++HBdWb7/9drVu3VqtW7dW7969tWrVKtWpU0eDBg1Samqqvd+nn36qO+64Q15eXipfvrwCAgI0ffr0az5/GRkZevPNN1WjRg1
"text/plain": [
"<Figure size 1800x500 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"def plot_category_balance(train_data, validation_data, test_data, column, title):\n",
" fig, ax = plt.subplots(1, 3, figsize=(18, 5), sharey=True)\n",
" \n",
" train_data[column].value_counts(normalize=True).plot(kind='bar', ax=ax[0])\n",
" ax[0].set_title(f'Training {title}')\n",
" ax[0].set_ylabel('Proportion')\n",
" \n",
" validation_data[column].value_counts(normalize=True).plot(kind='bar', ax=ax[1])\n",
" ax[1].set_title(f'Validation {title}')\n",
" \n",
" test_data[column].value_counts(normalize=True).plot(kind='bar', ax=ax[2])\n",
" ax[2].set_title(f'Test {title}')\n",
" \n",
" plt.suptitle(f'Category Balance for {title}')\n",
" plt.show()\n",
"\n",
"plot_category_balance(train_data, validation_data, test_data, 'carat', 'Carat')\n",
"plot_category_balance(train_data, validation_data, test_data, 'cut', 'Cut')\n",
"plot_category_balance(train_data, validation_data, test_data, 'color', 'Color')\n",
"plot_category_balance(train_data, validation_data, test_data, 'clarity', 'Clarity')\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Буду использовать увеличение выборки (Oversampling) для \"Clarity\""
]
},
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 10,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"До Oversampling: clarity\n",
"SI1 9256\n",
"VS2 8590\n",
"SI2 6355\n",
"VS1 5694\n",
"VVS2 3551\n",
"VVS1 2547\n",
"IF 1254\n",
"I1 513\n",
"Name: count, dtype: int64\n",
"После Oversampling: clarity\n",
"VVS1 9256\n",
"SI1 9256\n",
"SI2 9256\n",
"VVS2 9256\n",
"VS2 9256\n",
"VS1 9256\n",
"IF 9256\n",
"I1 9256\n",
"Name: count, dtype: int64\n"
]
}
],
"source": [
"from imblearn.over_sampling import RandomOverSampler\n",
"\n",
"print(\"До Oversampling: \", train_data['clarity'].value_counts())\n",
"\n",
"ros = RandomOverSampler(sampling_strategy='auto', random_state=42)\n",
"\n",
"X_train = train_data.drop(columns=['clarity']) # все данные, кроме столбца clarity, то есть признаки, используемые для предсказания\n",
"y_train = train_data['clarity'] # целевой столбец clarity, который содержит классы, которые нужно сбалансировать\n",
"\n",
"X_resampled, y_resampled = ros.fit_resample(X_train, y_train)\n",
"\n",
"train_data_resampled = X_resampled.copy()\n",
"train_data_resampled['clarity'] = y_resampled\n",
"\n",
"print(\"После Oversampling: \", train_data_resampled['clarity'].value_counts())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Используем Undersampling для \"Cut\""
]
},
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 11,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"До Undersampling: cut\n",
"Ideal 31497\n",
"Premium 17342\n",
"Very Good 14994\n",
"Good 6338\n",
"Fair 3877\n",
"Name: count, dtype: int64\n",
"После Undersampling: cut\n",
"Fair 3877\n",
"Good 3877\n",
"Ideal 3877\n",
"Premium 3877\n",
"Very Good 3877\n",
"Name: count, dtype: int64\n",
"Столбцы после Undersampling: Index(['carat', 'color', 'depth', 'table', 'price', 'x', 'y', 'z', 'clarity',\n",
" 'cut'],\n",
" dtype='object')\n"
]
}
],
"source": [
"from imblearn.under_sampling import RandomUnderSampler\n",
"\n",
"print(\"До Undersampling: \", train_data_resampled['cut'].value_counts())\n",
"\n",
"undersampler = RandomUnderSampler(sampling_strategy='auto', random_state=42)\n",
"X_train_cut = train_data_resampled.drop(columns=['cut'])\n",
"y_train_cut = train_data_resampled['cut']\n",
"X_resampled_cut, y_resampled_cut = undersampler.fit_resample(X_train_cut, y_train_cut)\n",
"\n",
"train_data_resampled_cut = X_resampled_cut.copy()\n",
"train_data_resampled_cut['cut'] = y_resampled_cut\n",
"\n",
"print(\"После Undersampling: \", train_data_resampled_cut['cut'].value_counts())\n",
"print(\"Столбцы после Undersampling:\", train_data_resampled_cut.columns)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"И увеличиваем количество меньших значений в 'carat' с помощью Oversampling"
]
},
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 12,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAHCCAYAAADy9P3IAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABOMUlEQVR4nO3deVgVdf//8ddBNkUBUdmU0Nz39c7IJRcSFU1LKw3XSEulNM3KuzLT0lvNfc3KJW/NLTNvywWX1BQ3klxyLRU3oHLBFRHm90df5ucRxBFRUJ+P6zpXzXzeZ+Y95xzwxefMmWMzDMMQAAAAMuWQ0w0AAAA8CAhNAAAAFhCaAAAALCA0AQAAWEBoAgAAsIDQBAAAYAGhCQAAwAJCEwAAgAWEJgAAAAsITcgRgwYNks1muy/7atCggRo0aGAu//TTT7LZbFq0aNF92X+XLl1UvHjx+7KvrLp48aJeffVV+fr6ymazqU+fPjndUq5y9OhR2Ww2zZw5M6dbydD169f1zjvvKCAgQA4ODmrdunVOt4RsdPPvsNz+enyYEZpw12bOnCmbzWbeXF1d5e/vr5CQEI0fP14XLlzIlv2cOnVKgwYNUkxMTLZsLzvl5t6sGDp0qGbOnKkePXpo9uzZ6tixY6b1KSkpmjFjhho0aCAvLy+5uLioePHi6tq1q3bs2HGfurb322+/adCgQTp69Kil+rTgnnZzcHCQn5+fWrRooS1bttzbZrPZ9OnTNXLkSLVt21azZs3SW2+9dcePR3bau3evOnTooKJFi8rFxUX+/v4KCwvT3r1773svQLYygLs0Y8YMQ5IxePBgY/bs2cb06dONoUOHGk2aNDFsNpsRGBho/Prrr3b3SU5ONq5cuXJH+9m+fbshyZgxY8Yd3S8pKclISkoyl9etW2dIMhYuXHhH28lqb9euXTOuXr2abfu6F2rXrm3UqVPHUu3ly5eNpk2bGpKM+vXrGyNHjjS++uor48MPPzTKli1r2Gw24/jx4/e44/QWLlxoSDLWrVtnqf6jjz4yJBlTpkwxZs+ebcyaNcv45JNPjMDAQMPJycnYuXOnWZuammpcuXLFuH79+r1p/i699NJLRtGiRe3W3enjkV2+/fZbw9nZ2fD19TXef/9948svvzQ++OADw8/Pz3B2djYWL158X/t5GDz99NPG008/bS7n9tfjw8wx5+IaHjbNmjVTrVq1zOUBAwZo7dq1atGihZ599lnt27dPefPmlSQ5OjrK0fHevvwuX76sfPnyydnZ+Z7u53acnJxydP9WJCQkqEKFCpZq+/fvrxUrVmjMmDHp3sb76KOPNGbMmGzp6dKlS3Jzc8uWbWWmbdu2Kly4sLncunVrVapUSQsXLlS1atUkyZxBza0SEhLk6el5X/aV2fPy+++/q2PHjnr88ce1YcMGFSlSxBzr3bu36tWrp44dO2rXrl16/PHH70u/0v17Ld0vuf31+FDL6dSGB1/aTNP27dszHB86dKghyZg2bZq5Lu2v/ButWrXKqFOnjuHh4WG4ubkZZcqUMQYMGGAYxv+fHbr5ljaz8/TTTxsVK1Y0duzYYdSrV8/Imzev0bt3b3Psxr/S0rY1b948Y8CAAYaPj4+RL18+o2XLlkZsbKxdT4GBgUbnzp3THdON27xdb507dzYCAwPt7n/x4kWjb9++RrFixQxnZ2ejTJkyxsiRI43U1FS7OklGr169jO+++86oWLGi4ezsbFSoUMFYvnx5ho/1zeLj441XXnnF8Pb2NlxcXIwqVaoYM2fOTPdY3Hw7cuRIhts7fvy44ejoaDzzzDOW9n/06FGjR48eRpkyZQxXV1fDy8vLaNu2bbrtp72GfvrpJ6NHjx5GkSJFDE9PT8vbSLv/zbfMZlnSXoN//vmn3fq//vrLkGQMHDjQXHfkyJF0M4mdO3c23NzcjBMnThitWrUy3NzcjMKFCxv9+vVLNwPwzTffGDVq1DDy589vFChQwKhUqZIxduzY2z5+I0eONIKCggwvLy/D1dXVqFGjht0MaVpfGb32bvd4/Pjjj0bdunWNfPnyGfnz5zeaN29u7Nmzx27/acd4+PBho1mzZkb+/PmNVq1a3bLf1157zZBkbNiwIcPx9evXG5KM1157zTCM/z8b9tNPP6WrnTp1qiHJ2L17t7lu3759Rps2bYyCBQsaLi4uRs2aNY3vv//e7n6ZvZYSExON3r17G4GBgYazs7NRpEgRIzg42IiOjjbvv2HDBqNt27ZGQECA4ezsbBQrVszo06ePcfny5Qwfm2PHjhmhoaGGm5ub4e/vb0ycONEwDMPYtWuX0bBhQyNfvnzGY489ZsyZMyfDPtevX290797d8PLyMgoUKGB07NjROHPmjF3tzb/D7vb1+NdffxkdOnQwChQoYHh4eBidOnUyYmJisjST/6hhpgn3XMeOHfXvf/9bq1atUrdu3TKs2bt3r1q0aKEqVapo8ODBcnFx0eHDh7Vp0yZJUvny5TV48GANHDhQ3bt3V7169SRJTz31lLmNv//+W82aNVO7du3UoUMH+fj4ZNrXp59+KpvNpnfffVcJCQkaO3asgoODFRMTY86IWWGltxsZhqFnn31W69atU3h4uKpVq6aVK1eqf//+OnnyZLqZmp9//lmLFy9Wz549VaBAAY0fP15t2rRRbGysChUqdMu+rly5ogYNGujw4cOKiIhQiRIltHDhQnXp0kXnzp1T7969Vb58ec2ePVtvvfWWihUrpn79+kmS3QzBjZYvX67r16/f9pynNNu3b9fmzZvVrl07FStWTEePHtWUKVPUoEED/fbbb8qXL59dfc+ePVWkSBENHDhQly5dsryN+vXr680339T48eP173//W+XLl5ck87+ZOXPmjCQpNTVVJ0+e1JAhQ+Tq6qoXX3zxtvdNSUlRSEiIateurc8++0yrV6/WqFGjVLJkSfXo0UOSFBkZqfbt26tx48YaPny4JGnfvn3atGmTevfunen2x40bp2effVZhYWG6du2a5s2bpxdeeEHLli1TaGioihQpotmzZ+vTTz/VxYsXNWzYMElS6dKlM308Zs+erc6dOyskJETDhw/X5cuXNWXKFNWtW1c7d+60++DC9evXFRISorp16+qzzz5L95zd6H//+5+KFy9u/gzcrH79+ipevLh++OEHSVJoaKjy58+vBQsW6Omnn7arnT9/vipWrKhKlSpJ+ud3RJ06dVS0aFG99957cnNz04IFC9S6dWt9++23eu655+zun9Fr6fXXX9eiRYsUERGhChUq6O+//9bPP/+sffv2qUaNGpKkhQsX6vLly+rRo4cKFSqkbdu2acKECTpx4oQWLlxot4+UlBQ1a9ZM9evX14gRIzRnzhxFRETIzc1N77//vsLCwvT8889r6tSp6tSpk4KCglSiRAm7bURERMjT01ODBg3SgQMHNGXKFB07dsz8wMqdsPJ6TE1NVcuWLbVt2zb16NFD5cqV0/fff6/OnTvf0b4eWTmd2vDgu91Mk2EYhoeHh1G9enVz+eaZpjFjxmT4V/+NMjtv6OmnnzYkGVOnTs1wLKOZpqJFixqJiYnm+gULFhiSjHHjxpnrrMw03a63m2ealixZYkgyPvnkE7u6tm3bGjabzTh8+LC5TpLh7Oxst+7XX381JBkTJkxIt68bjR071pBk/Pe//zXXXbt2zQgKCjLy589vd+yBgYFGaGhoptszDMN46623DEl25/tk5ua/zg3DMKKiogxJxtdff22uS3sN1a1bN91fxVa3kdVzmm6+eXp6GitWrLCrvdVf9vq/c/luVL16daNmzZrmcu/evQ13d/csnX9y87Ffu3bNqFSpktGoUSO79WkzrTe61eNx4cIFw9PT0+jWrZvd+ri4OMPDw8Nufdoxvvfee7ft9dy5c4akTGeiDMMwnn32WUOS+fpr37694e3tbff4nD592nBwcLB7bBs3bmxUrlzZ7vzA1NRU46mnnjJKly5trsvsteTh4WH06tUr0/4yer0NGzbMsNlsxrFjx8x1aY/N0KFDzXVnz5418ubNa9hsNmPevHnm+v379xuSjI8++ihdnzV
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Столбцы после Oversampling: Index(['carat', 'color', 'depth', 'table', 'price', 'x', 'y', 'z', 'clarity',\n",
" 'cut', 'carat_binned'],\n",
" dtype='object')\n"
]
}
],
"source": [
"train_data_resampled_cut['carat_binned'] = pd.cut(train_data_resampled_cut['carat'], bins=5, labels=False)\n",
"\n",
"X_train_carat = train_data_resampled_cut.drop(columns=['carat_binned'])\n",
"y_train_carat = train_data_resampled_cut['carat_binned']\n",
"\n",
"oversampler = RandomOverSampler(sampling_strategy='auto', random_state=42)\n",
"X_resampled_carat, y_resampled_carat = oversampler.fit_resample(X_train_carat, y_train_carat)\n",
"\n",
"train_data_resampled_carat = X_resampled_carat.copy()\n",
"train_data_resampled_carat['carat_binned'] = y_resampled_carat\n",
"\n",
"train_data_resampled_carat['carat_binned'].value_counts().plot(kind='bar')\n",
"plt.title(\"Distribution of Carat Bins after Oversampling\")\n",
"plt.xlabel(\"Carat Bin\")\n",
"plt.ylabel(\"Frequency\")\n",
"plt.show()\n",
"\n",
"print(\"Столбцы после Oversampling:\", train_data_resampled_carat.columns)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Конструирование признаков"
]
},
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 13,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" carat color depth table price x y z clarity cut ... \\\n",
"0 1.50 G 64.5 57.0 10352 7.15 7.09 4.59 SI1 Fair ... \n",
"1 0.60 G 65.7 55.0 1197 5.31 5.23 3.46 SI1 Fair ... \n",
"2 1.83 J 70.0 58.0 5083 7.34 7.28 5.12 I1 Fair ... \n",
"3 0.90 D 63.8 61.0 4252 6.07 5.99 3.85 SI1 Fair ... \n",
"4 0.71 G 65.7 56.0 2274 5.51 5.54 3.63 VS2 Fair ... \n",
"\n",
" color_H color_I color_J clarity_IF clarity_SI1 clarity_SI2 \\\n",
"0 0.0 0.0 0.0 0.0 1.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 1.0 0.0 \n",
"2 0.0 0.0 1.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 1.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"\n",
" clarity_VS1 clarity_VS2 clarity_VVS1 clarity_VVS2 \n",
"0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 \n",
"4 0.0 1.0 0.0 0.0 \n",
"\n",
"[5 rows x 28 columns]\n"
]
}
],
"source": [
"from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler\n",
"\n",
"categorical_features = ['cut', 'color', 'clarity']\n",
"\n",
"encoder = OneHotEncoder(sparse_output=False, drop='first')\n",
"encoded_data = pd.DataFrame(encoder.fit_transform(train_data_resampled_carat[categorical_features]))\n",
"encoded_data.columns = encoder.get_feature_names_out(categorical_features)\n",
"\n",
"train_data_encoded = pd.concat([train_data_resampled_carat.reset_index(drop=True), encoded_data], axis=1)\n",
"\n",
"print(train_data_encoded.head())"
]
},
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 14,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" carat carat_binned depth depth_binned table table_binned price \\\n",
"0 1.50 1 64.5 2 57.0 1 10352 \n",
"1 0.60 0 65.7 3 55.0 1 1197 \n",
"2 1.83 1 70.0 3 58.0 2 5083 \n",
"3 0.90 0 63.8 2 61.0 2 4252 \n",
"4 0.71 0 65.7 3 56.0 1 2274 \n",
"\n",
" price_binned \n",
"0 2 \n",
"1 0 \n",
"2 1 \n",
"3 1 \n",
"4 0 \n"
]
}
],
"source": [
"num_bins = 5\n",
"\n",
"train_data_encoded['carat_binned'] = pd.cut(train_data_encoded['carat'], bins=num_bins, labels=False)\n",
"train_data_encoded['depth_binned'] = pd.cut(train_data_encoded['depth'], bins=num_bins, labels=False)\n",
"train_data_encoded['table_binned'] = pd.cut(train_data_encoded['table'], bins=num_bins, labels=False)\n",
"train_data_encoded['price_binned'] = pd.cut(train_data_encoded['price'], bins=num_bins, labels=False)\n",
"\n",
"print(train_data_encoded[['carat', 'carat_binned', 'depth', 'depth_binned', \n",
" 'table', 'table_binned', 'price', 'price_binned']].head())\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ручной синтез"
]
},
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 15,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [],
"source": [
"data = train_data_encoded.copy()\n",
"\n",
"data['price_per_carat'] = data['price'] / data['carat']\n",
"data['volume'] = data['x'] * data['y'] * data['z']\n",
"data['surface_area'] = data['table'] * data['depth'] / 100\n",
"\n",
"\n",
"data['cut_score'] = data['cut'].map({'Fair': 1, 'Good': 2, 'Very Good': 3, 'Premium': 4, 'Ideal': 5})\n",
"data['color_score'] = data['color'].map({'J': 1, 'I': 2, 'H': 3, 'G': 4, 'F': 5, 'E': 6, 'D': 7})\n",
"data['clarity_score'] = data['clarity'].map({'I1': 1, 'SI2': 2, 'SI1': 3, 'VS2': 4, 'VS1': 5, 'VVS2': 6, 'VVS1': 7, 'IF': 8})\n",
"data['quality_score'] = data['cut_score'] + data['color_score'] + data['clarity_score']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Масштабирование признаков"
]
},
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 16,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" carat color depth table price x y z clarity cut ... \\\n",
"0 1.50 G 64.5 57.0 10352 7.15 7.09 4.59 SI1 Fair ... \n",
"1 0.60 G 65.7 55.0 1197 5.31 5.23 3.46 SI1 Fair ... \n",
"2 1.83 J 70.0 58.0 5083 7.34 7.28 5.12 I1 Fair ... \n",
"3 0.90 D 63.8 61.0 4252 6.07 5.99 3.85 SI1 Fair ... \n",
"4 0.71 G 65.7 56.0 2274 5.51 5.54 3.63 VS2 Fair ... \n",
"\n",
" price_per_carat_standard volume_standard surface_area_standard \\\n",
"0 1.486375 -0.636594 0.011271 \n",
"1 -1.179201 -1.340819 -0.335758 \n",
"2 -0.754022 -0.425704 2.123745 \n",
"3 0.303687 -1.114517 1.197231 \n",
"4 -0.523003 -1.264937 0.026144 \n",
"\n",
" quality_score_standard carat_norm price_norm price_per_carat_norm \\\n",
"0 0.046561 0.302326 0.542508 0.348688 \n",
"1 0.046561 0.093023 0.047080 0.056256 \n",
"2 -1.288747 0.379070 0.257373 0.102901 \n",
"3 0.847745 0.162791 0.212403 0.218939 \n",
"4 0.313622 0.118605 0.105363 0.128245 \n",
"\n",
" volume_norm surface_area_norm quality_score_norm \n",
"0 0.333140 0.391412 0.294118 \n",
"1 0.137573 0.373071 0.294118 \n",
"2 0.391705 0.503057 0.000000 \n",
"3 0.200418 0.454090 0.470588 \n",
"4 0.158646 0.392198 0.352941 \n",
"\n",
"[5 rows x 50 columns]\n"
]
}
],
"source": [
"features_to_scale = ['carat', 'price', 'price_per_carat', 'volume', 'surface_area', 'quality_score']\n",
"\n",
"# Стандартизация признаков\n",
"scaler_standard = StandardScaler()\n",
"data_standardized = pd.DataFrame(scaler_standard.fit_transform(data[features_to_scale]), columns=[f\"{col}_standard\" for col in features_to_scale])\n",
"\n",
"# Нормализация признаков\n",
"scaler_minmax = MinMaxScaler()\n",
"data_normalized = pd.DataFrame(scaler_minmax.fit_transform(data[features_to_scale]), columns=[f\"{col}_norm\" for col in features_to_scale])\n",
"\n",
"data = pd.concat([data.reset_index(drop=True), data_standardized, data_normalized], axis=1) # теперь их объеденяем\n",
"\n",
"print(data.head())"
]
},
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 17,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\featuretools\\entityset\\entityset.py:1733: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\featuretools\\synthesis\\deep_feature_synthesis.py:169: UserWarning: Only one dataframe in entityset, changing max_depth to 1 since deeper features cannot be created\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" carat color depth table price x y z clarity cut ... \\\n",
"index ... \n",
"0 1.50 G 64.5 57.0 10352 7.15 7.09 4.59 SI1 Fair ... \n",
"1 0.60 G 65.7 55.0 1197 5.31 5.23 3.46 SI1 Fair ... \n",
"2 1.83 J 70.0 58.0 5083 7.34 7.28 5.12 I1 Fair ... \n",
"3 0.90 D 63.8 61.0 4252 6.07 5.99 3.85 SI1 Fair ... \n",
"4 0.71 G 65.7 56.0 2274 5.51 5.54 3.63 VS2 Fair ... \n",
"\n",
" clarity_IF clarity_SI1 clarity_SI2 clarity_VS1 clarity_VS2 \\\n",
"index \n",
"0 0.0 1.0 0.0 0.0 0.0 \n",
"1 0.0 1.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 0.0 \n",
"3 0.0 1.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 1.0 \n",
"\n",
" clarity_VVS1 clarity_VVS2 depth_binned table_binned price_binned \n",
"index \n",
"0 0.0 0.0 2 1 2 \n",
"1 0.0 0.0 3 1 0 \n",
"2 0.0 0.0 3 2 1 \n",
"3 0.0 0.0 2 2 1 \n",
"4 0.0 0.0 3 1 0 \n",
"\n",
"[5 rows x 31 columns]\n"
]
}
],
"source": [
"import featuretools as ft\n",
"\n",
"data = train_data_encoded.copy() # Используем предобработанные данные\n",
"\n",
"es = ft.EntitySet(id=\"diamonds\")\n",
"\n",
"es = es.add_dataframe(dataframe_name=\"diamonds_data\", dataframe=data, index=\"index\")\n",
"\n",
"feature_matrix, feature_defs = ft.dfs(\n",
" entityset=es, \n",
" target_dataframe_name=\"diamonds_data\",\n",
" max_depth=2\n",
")\n",
"\n",
"print(feature_matrix.head())"
]
},
2024-11-30 13:19:01 +04:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Оцениваем качество каждого набора<br>\n",
"В коде есть комментарии указывающие что мы сейчас оцениваем."
]
},
2024-11-01 21:16:02 +04:00
{
"cell_type": "code",
2024-11-30 13:19:01 +04:00
"execution_count": 18,
2024-11-01 21:16:02 +04:00
"metadata": {},
"outputs": [
{
2024-11-30 13:19:01 +04:00
"name": "stdout",
"output_type": "stream",
"text": [
"Предсказательная способность (MSE): 769447.425412744\n",
"Скорость обучения: 0.9312052726745605 секунд\n",
"Скорость предсказания: 0.009320497512817383 секунд\n",
"Надежность (стабильность MSE): 9415.05041335384\n",
"Корреляционная матрица признаков:\n",
" index carat depth table x y \\\n",
"index 1.000000 0.918976 0.256560 -0.012994 0.881745 0.881997 \n",
"carat 0.918976 1.000000 0.317029 0.011338 0.965007 0.964633 \n",
"depth 0.256560 0.317029 1.000000 -0.523388 0.214227 0.212123 \n",
"table -0.012994 0.011338 -0.523388 1.000000 0.068165 0.061429 \n",
"x 0.881745 0.965007 0.214227 0.068165 1.000000 0.998661 \n",
"y 0.881997 0.964633 0.212123 0.061429 0.998661 1.000000 \n",
"z 0.874093 0.962961 0.422732 -0.067003 0.966991 0.966578 \n",
"carat_binned 0.936744 0.980069 0.299586 -0.006563 0.954800 0.954348 \n",
"cut_Good -0.197076 -0.178975 -0.244465 0.324476 -0.140993 -0.137115 \n",
"cut_Ideal -0.266915 -0.293529 -0.159223 -0.225245 -0.308059 -0.303008 \n",
"cut_Premium -0.106465 -0.150300 -0.286254 0.108195 -0.104058 -0.105738 \n",
"cut_Very Good -0.086479 -0.163226 -0.164405 -0.010754 -0.162451 -0.152861 \n",
"color_E -0.174812 -0.199907 -0.120239 0.132333 -0.204134 -0.202557 \n",
"color_F -0.253824 -0.269185 -0.048395 -0.001821 -0.268046 -0.268242 \n",
"color_G -0.264394 -0.282227 -0.076425 -0.032431 -0.262968 -0.263331 \n",
"color_H 0.260031 0.235182 0.080885 -0.015329 0.229047 0.223693 \n",
"color_I 0.117366 0.102229 -0.113178 0.080852 0.144121 0.146060 \n",
"color_J 0.202485 0.292015 0.215944 -0.024922 0.241304 0.244249 \n",
"clarity_IF -0.181797 -0.220384 -0.112810 -0.049651 -0.255666 -0.252669 \n",
"clarity_SI1 -0.212170 -0.222121 -0.108779 0.010751 -0.204318 -0.201694 \n",
"clarity_SI2 -0.162937 -0.168896 -0.141779 0.073969 -0.108325 -0.104168 \n",
"clarity_VS1 -0.202684 -0.220126 -0.116339 0.007504 -0.220583 -0.218858 \n",
"clarity_VS2 -0.190586 -0.206457 -0.112282 0.012328 -0.194913 -0.192109 \n",
"clarity_VVS1 -0.187562 -0.229651 -0.101446 -0.032463 -0.271730 -0.270343 \n",
"clarity_VVS2 -0.193781 -0.224968 -0.091511 -0.030425 -0.254022 -0.251780 \n",
"depth_binned 0.245417 0.295187 0.870892 -0.566471 0.205631 0.206883 \n",
"table_binned 0.074231 0.114544 -0.377101 0.817051 0.134186 0.131247 \n",
"price_binned 0.747669 0.815253 0.160678 0.042235 0.811691 0.817555 \n",
"cut_Good -0.197076 -0.178975 -0.244465 0.324476 -0.140993 -0.137115 \n",
"cut_Ideal -0.266915 -0.293529 -0.159223 -0.225245 -0.308059 -0.303008 \n",
"cut_Premium -0.106465 -0.150300 -0.286254 0.108195 -0.104058 -0.105738 \n",
"cut_Very Good -0.086479 -0.163226 -0.164405 -0.010754 -0.162451 -0.152861 \n",
"color_E -0.174812 -0.199907 -0.120239 0.132333 -0.204134 -0.202557 \n",
"color_F -0.253824 -0.269185 -0.048395 -0.001821 -0.268046 -0.268242 \n",
"color_G -0.264394 -0.282227 -0.076425 -0.032431 -0.262968 -0.263331 \n",
"color_H 0.260031 0.235182 0.080885 -0.015329 0.229047 0.223693 \n",
"color_I 0.117366 0.102229 -0.113178 0.080852 0.144121 0.146060 \n",
"color_J 0.202485 0.292015 0.215944 -0.024922 0.241304 0.244249 \n",
"clarity_IF -0.181797 -0.220384 -0.112810 -0.049651 -0.255666 -0.252669 \n",
"clarity_SI1 -0.212170 -0.222121 -0.108779 0.010751 -0.204318 -0.201694 \n",
"clarity_SI2 -0.162937 -0.168896 -0.141779 0.073969 -0.108325 -0.104168 \n",
"clarity_VS1 -0.202684 -0.220126 -0.116339 0.007504 -0.220583 -0.218858 \n",
"clarity_VS2 -0.190586 -0.206457 -0.112282 0.012328 -0.194913 -0.192109 \n",
"clarity_VVS1 -0.187562 -0.229651 -0.101446 -0.032463 -0.271730 -0.270343 \n",
"clarity_VVS2 -0.193781 -0.224968 -0.091511 -0.030425 -0.254022 -0.251780 \n",
"\n",
" z carat_binned cut_Good cut_Ideal ... color_H \\\n",
"index 0.874093 0.936744 -0.197076 -0.266915 ... 0.260031 \n",
"carat 0.962961 0.980069 -0.178975 -0.293529 ... 0.235182 \n",
"depth 0.422732 0.299586 -0.244465 -0.159223 ... 0.080885 \n",
"table -0.067003 -0.006563 0.324476 -0.225245 ... -0.015329 \n",
"x 0.966991 0.954800 -0.140993 -0.308059 ... 0.229047 \n",
"y 0.966578 0.954348 -0.137115 -0.303008 ... 0.223693 \n",
"z 1.000000 0.948784 -0.191932 -0.311613 ... 0.226114 \n",
"carat_binned 0.948784 1.000000 -0.175908 -0.285435 ... 0.273119 \n",
"cut_Good -0.191932 -0.175908 1.000000 -0.131819 ... -0.135046 \n",
"cut_Ideal -0.311613 -0.285435 -0.131819 1.000000 ... -0.071273 \n",
"cut_Premium -0.161545 -0.138476 -0.180437 -0.141673 ... -0.101136 \n",
"cut_Very Good -0.178681 -0.160568 -0.158199 -0.124212 ... -0.066590 \n",
"color_E -0.210177 -0.199353 0.180753 0.043481 ... -0.192503 \n",
"color_F -0.256080 -0.267207 0.020500 0.079422 ... -0.199995 \n",
"color_G -0.268521 -0.273979 0.029838 0.102871 ... -0.255060 \n",
"color_H 0.226114 0.273119 -0.135046 -0.071273 ... 1.000000 \n",
"color_I 0.102372 0.129941 0.064618 -0.038568 ... -0.309138 \n",
"color_J 0.279789 0.219450 -0.098352 -0.072643 ... -0.264988 \n",
"clarity_IF -0.253221 -0.213292 -0.008462 0.203602 ... -0.054606 \n",
"clarity_SI1 -0.210001 -0.227317 0.081039 0.051568 ... -0.024638 \n",
"clarity_SI2 -0.130004 -0.162642 0.110280 0.019205 ... -0.037742 \n",
"clarity_VS1 -0.225411 -0.224820 0.045321 0.096886 ... -0.065186 \n",
"clarity_VS2 -0.200809 -0.208082 0.030034 0.081414 ... -0.071251 \n",
"clarity_VVS1 -0.266684 -0.221868 0.002891 0.158572 ... -0.052581 \n",
"clarity_VVS2 -0.248962 -0.224522 0.018263 0.129101 ... -0.068533 \n",
"depth_binned 0.393501 0.276906 -0.352815 -0.126332 ... -0.037413 \n",
"table_binned 0.034246 0.076420 0.218950 -0.309130 ... -0.059176 \n",
"price_binned 0.792153 0.798011 -0.109808 -0.163178 ... 0.151799 \n",
"cut_Good -0.191932 -0.175908 1.000000 -0.131819 ... -0.135046 \n",
"cut_Ideal -0.311613 -0.285435 -0.131819 1.000000 ... -0.071273 \n",
"cut_Premium -0.161545 -0.138476 -0.180437 -0.141673 ... -0.101136 \n",
"cut_Very Good -0.178681 -0.160568 -0.158199 -0.124212 ... -0.066590 \n",
"color_E -0.210177 -0.199353 0.180753 0.043481 ... -0.192503 \n",
"color_F -0.256080 -0.267207 0.020500 0.079422 ... -0.199995 \n",
"color_G -0.268521 -0.273979 0.029838 0.102871 ... -0.255060 \n",
"color_H 0.226114 0.273119 -0.135046 -0.071273 ... 1.000000 \n",
"color_I 0.102372 0.129941 0.064618 -0.038568 ... -0.309138 \n",
"color_J 0.279789 0.219450 -0.098352 -0.072643 ... -0.264988 \n",
"clarity_IF -0.253221 -0.213292 -0.008462 0.203602 ... -0.054606 \n",
"clarity_SI1 -0.210001 -0.227317 0.081039 0.051568 ... -0.024638 \n",
"clarity_SI2 -0.130004 -0.162642 0.110280 0.019205 ... -0.037742 \n",
"clarity_VS1 -0.225411 -0.224820 0.045321 0.096886 ... -0.065186 \n",
"clarity_VS2 -0.200809 -0.208082 0.030034 0.081414 ... -0.071251 \n",
"clarity_VVS1 -0.266684 -0.221868 0.002891 0.158572 ... -0.052581 \n",
"clarity_VVS2 -0.248962 -0.224522 0.018263 0.129101 ... -0.068533 \n",
"\n",
" color_I color_J clarity_IF clarity_SI1 clarity_SI2 \\\n",
"index 0.117366 0.202485 -0.181797 -0.212170 -0.162937 \n",
"carat 0.102229 0.292015 -0.220384 -0.222121 -0.168896 \n",
"depth -0.113178 0.215944 -0.112810 -0.108779 -0.141779 \n",
"table 0.080852 -0.024922 -0.049651 0.010751 0.073969 \n",
"x 0.144121 0.241304 -0.255666 -0.204318 -0.108325 \n",
"y 0.146060 0.244249 -0.252669 -0.201694 -0.104168 \n",
"z 0.102372 0.279789 -0.253221 -0.210001 -0.130004 \n",
"carat_binned 0.129941 0.219450 -0.213292 -0.227317 -0.162642 \n",
"cut_Good 0.064618 -0.098352 -0.008462 0.081039 0.110280 \n",
"cut_Ideal -0.038568 -0.072643 0.203602 0.051568 0.019205 \n",
"cut_Premium 0.105213 -0.084217 0.012019 0.073734 0.076342 \n",
"cut_Very Good 0.062781 -0.077472 0.062992 0.077184 0.071627 \n",
"color_E -0.146245 -0.125359 0.006989 0.048760 0.060037 \n",
"color_F -0.151937 -0.130238 0.067814 0.026771 0.056315 \n",
"color_G -0.193770 -0.166096 0.118431 0.009543 0.012232 \n",
"color_H -0.309138 -0.264988 -0.054606 -0.024638 -0.037742 \n",
"color_I 1.000000 -0.201312 -0.056505 -0.029603 -0.010066 \n",
"color_J -0.201312 1.000000 -0.042570 -0.032981 -0.082478 \n",
"clarity_IF -0.056505 -0.042570 1.000000 -0.049155 -0.065849 \n",
"clarity_SI1 -0.029603 -0.032981 -0.049155 1.000000 -0.099840 \n",
"clarity_SI2 -0.010066 -0.082478 -0.065849 -0.099840 1.000000 \n",
"clarity_VS1 -0.008767 -0.024548 -0.041445 -0.062839 -0.084180 \n",
"clarity_VS2 -0.001448 0.005467 -0.044484 -0.067447 -0.090353 \n",
"clarity_VVS1 -0.038316 -0.052719 -0.033043 -0.050101 -0.067116 \n",
"clarity_VVS2 -0.051227 -0.036439 -0.034892 -0.052904 -0.070871 \n",
"depth_binned -0.075038 0.318499 -0.079937 -0.107482 -0.137301 \n",
"table_binned 0.017798 0.061547 -0.052251 0.019997 0.046550 \n",
"price_binned 0.080443 0.194458 -0.116621 -0.094520 0.030333 \n",
"cut_Good 0.064618 -0.098352 -0.008462 0.081039 0.110280 \n",
"cut_Ideal -0.038568 -0.072643 0.203602 0.051568 0.019205 \n",
"cut_Premium 0.105213 -0.084217 0.012019 0.073734 0.076342 \n",
"cut_Very Good 0.062781 -0.077472 0.062992 0.077184 0.071627 \n",
"color_E -0.146245 -0.125359 0.006989 0.048760 0.060037 \n",
"color_F -0.151937 -0.130238 0.067814 0.026771 0.056315 \n",
"color_G -0.193770 -0.166096 0.118431 0.009543 0.012232 \n",
"color_H -0.309138 -0.264988 -0.054606 -0.024638 -0.037742 \n",
"color_I 1.000000 -0.201312 -0.056505 -0.029603 -0.010066 \n",
"color_J -0.201312 1.000000 -0.042570 -0.032981 -0.082478 \n",
"clarity_IF -0.056505 -0.042570 1.000000 -0.049155 -0.065849 \n",
"clarity_SI1 -0.029603 -0.032981 -0.049155 1.000000 -0.099840 \n",
"clarity_SI2 -0.010066 -0.082478 -0.065849 -0.099840 1.000000 \n",
"clarity_VS1 -0.008767 -0.024548 -0.041445 -0.062839 -0.084180 \n",
"clarity_VS2 -0.001448 0.005467 -0.044484 -0.067447 -0.090353 \n",
"clarity_VVS1 -0.038316 -0.052719 -0.033043 -0.050101 -0.067116 \n",
"clarity_VVS2 -0.051227 -0.036439 -0.034892 -0.052904 -0.070871 \n",
"\n",
" clarity_VS1 clarity_VS2 clarity_VVS1 clarity_VVS2 \n",
"index -0.202684 -0.190586 -0.187562 -0.193781 \n",
"carat -0.220126 -0.206457 -0.229651 -0.224968 \n",
"depth -0.116339 -0.112282 -0.101446 -0.091511 \n",
"table 0.007504 0.012328 -0.032463 -0.030425 \n",
"x -0.220583 -0.194913 -0.271730 -0.254022 \n",
"y -0.218858 -0.192109 -0.270343 -0.251780 \n",
"z -0.225411 -0.200809 -0.266684 -0.248962 \n",
"carat_binned -0.224820 -0.208082 -0.221868 -0.224522 \n",
"cut_Good 0.045321 0.030034 0.002891 0.018263 \n",
"cut_Ideal 0.096886 0.081414 0.158572 0.129101 \n",
"cut_Premium 0.058620 0.089749 0.029293 0.012928 \n",
"cut_Very Good 0.059445 0.073481 0.070278 0.087436 \n",
"color_E 0.028625 0.023490 0.045116 0.055769 \n",
"color_F 0.046575 0.032305 0.065653 0.059278 \n",
"color_G 0.064229 0.036578 0.072317 0.078458 \n",
"color_H -0.065186 -0.071251 -0.052581 -0.068533 \n",
"color_I -0.008767 -0.001448 -0.038316 -0.051227 \n",
"color_J -0.024548 0.005467 -0.052719 -0.036439 \n",
"clarity_IF -0.041445 -0.044484 -0.033043 -0.034892 \n",
"clarity_SI1 -0.062839 -0.067447 -0.050101 -0.052904 \n",
"clarity_SI2 -0.084180 -0.090353 -0.067116 -0.070871 \n",
"clarity_VS1 1.000000 -0.056868 -0.042242 -0.044606 \n",
"clarity_VS2 -0.056868 1.000000 -0.045340 -0.047877 \n",
"clarity_VVS1 -0.042242 -0.045340 1.000000 -0.035564 \n",
"clarity_VVS2 -0.044606 -0.047877 -0.035564 1.000000 \n",
"depth_binned -0.094953 -0.096364 -0.082692 -0.080568 \n",
"table_binned 0.006478 0.018598 -0.041154 -0.040427 \n",
"price_binned -0.097141 -0.070453 -0.143954 -0.134063 \n",
"cut_Good 0.045321 0.030034 0.002891 0.018263 \n",
"cut_Ideal 0.096886 0.081414 0.158572 0.129101 \n",
"cut_Premium 0.058620 0.089749 0.029293 0.012928 \n",
"cut_Very Good 0.059445 0.073481 0.070278 0.087436 \n",
"color_E 0.028625 0.023490 0.045116 0.055769 \n",
"color_F 0.046575 0.032305 0.065653 0.059278 \n",
"color_G 0.064229 0.036578 0.072317 0.078458 \n",
"color_H -0.065186 -0.071251 -0.052581 -0.068533 \n",
"color_I -0.008767 -0.001448 -0.038316 -0.051227 \n",
"color_J -0.024548 0.005467 -0.052719 -0.036439 \n",
"clarity_IF -0.041445 -0.044484 -0.033043 -0.034892 \n",
"clarity_SI1 -0.062839 -0.067447 -0.050101 -0.052904 \n",
"clarity_SI2 -0.084180 -0.090353 -0.067116 -0.070871 \n",
"clarity_VS1 1.000000 -0.056868 -0.042242 -0.044606 \n",
"clarity_VS2 -0.056868 1.000000 -0.045340 -0.047877 \n",
"clarity_VVS1 -0.042242 -0.045340 1.000000 -0.035564 \n",
"clarity_VVS2 -0.044606 -0.047877 -0.035564 1.000000 \n",
"\n",
"[45 rows x 45 columns]\n",
"Пропуски в данных:\n",
" index 0\n",
"carat 0\n",
"color 0\n",
"depth 0\n",
"table 0\n",
"price 0\n",
"x 0\n",
"y 0\n",
"z 0\n",
"clarity 0\n",
"cut 0\n",
"carat_binned 0\n",
"cut_Good 0\n",
"cut_Ideal 0\n",
"cut_Premium 0\n",
"cut_Very Good 0\n",
"color_E 0\n",
"color_F 0\n",
"color_G 0\n",
"color_H 0\n",
"color_I 0\n",
"color_J 0\n",
"clarity_IF 0\n",
"clarity_SI1 0\n",
"clarity_SI2 0\n",
"clarity_VS1 0\n",
"clarity_VS2 0\n",
"clarity_VVS1 0\n",
"clarity_VVS2 0\n",
"depth_binned 0\n",
"table_binned 0\n",
"price_binned 0\n",
"dtype: int64\n",
"Сводка по данным:\n",
" index carat depth table price \\\n",
"count 70315.000000 70315.000000 70315.000000 70315.000000 70315.000000 \n",
"mean 35157.000000 2.257318 63.299607 58.124889 9377.150124 \n",
"std 20298.336426 1.249989 3.131217 2.946560 5572.610635 \n",
"min 0.000000 0.200000 44.000000 43.000000 327.000000 \n",
"25% 17578.500000 1.200000 61.400000 56.000000 4916.000000 \n",
"50% 35157.000000 2.040000 63.300000 58.000000 9664.000000 \n",
"75% 52735.500000 3.110000 65.800000 60.000000 13945.000000 \n",
"max 70314.000000 4.500000 79.000000 79.000000 18806.000000 \n",
"\n",
" x y z carat_binned cut_Good \\\n",
"count 70315.000000 70315.000000 70315.000000 70315.000000 70315.000000 \n",
"mean 7.939428 7.885353 5.016576 2.000000 0.143753 \n",
"std 1.696162 1.661164 1.150922 1.414224 0.350842 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 6.760000 6.740000 4.190000 1.000000 0.000000 \n",
"50% 8.050000 8.010000 5.140000 2.000000 0.000000 \n",
"75% 9.420000 9.340000 5.970000 3.000000 0.000000 \n",
"max 10.230000 10.160000 6.720000 4.000000 1.000000 \n",
"\n",
" ... clarity_IF clarity_SI1 clarity_SI2 clarity_VS1 \\\n",
"count ... 70315.000000 70315.000000 70315.000000 70315.000000 \n",
"mean ... 0.031402 0.069359 0.117969 0.050316 \n",
"std ... 0.174402 0.254066 0.322574 0.218599 \n",
"min ... 0.000000 0.000000 0.000000 0.000000 \n",
"25% ... 0.000000 0.000000 0.000000 0.000000 \n",
"50% ... 0.000000 0.000000 0.000000 0.000000 \n",
"75% ... 0.000000 0.000000 0.000000 0.000000 \n",
"max ... 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" clarity_VS2 clarity_VVS1 clarity_VVS2 depth_binned table_binned \\\n",
"count 70315.000000 70315.000000 70315.000000 70315.000000 70315.000000 \n",
"mean 0.057527 0.032582 0.036194 2.244059 1.599346 \n",
"std 0.232848 0.177541 0.186775 0.581360 0.537429 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 2.000000 1.000000 \n",
"50% 0.000000 0.000000 0.000000 2.000000 2.000000 \n",
"75% 0.000000 0.000000 0.000000 3.000000 2.000000 \n",
"max 1.000000 1.000000 1.000000 4.000000 4.000000 \n",
"\n",
" price_binned \n",
"count 70315.000000 \n",
"mean 1.960620 \n",
"std 1.454469 \n",
"min 0.000000 \n",
"25% 1.000000 \n",
"50% 2.000000 \n",
"75% 3.000000 \n",
"max 4.000000 \n",
"\n",
"[8 rows x 29 columns]\n"
2024-11-01 21:16:02 +04:00
]
}
],
"source": [
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.model_selection import cross_val_score, train_test_split\n",
"from sklearn.metrics import mean_squared_error, accuracy_score, f1_score\n",
"import time\n",
"\n",
2024-11-30 13:19:01 +04:00
"categorical_features = ['cut', 'color', 'clarity']\n",
"encoder = OneHotEncoder(sparse_output=False, drop='first')\n",
"encoded_data = pd.DataFrame(encoder.fit_transform(data[categorical_features]))\n",
"encoded_data.columns = encoder.get_feature_names_out(categorical_features)\n",
"\n",
"data_encoded = pd.concat([data.drop(columns=categorical_features), encoded_data], axis=1)\n",
"\n",
"X = data_encoded.drop(columns=['price']) # Признаки\n",
"y = data_encoded['price'] # Целевая переменная\n",
"\n",
2024-11-01 21:16:02 +04:00
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"\n",
"\n",
"# 1. Оценка предсказательной способности\n",
"model = LinearRegression()\n",
"\n",
"cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')\n",
"mean_mse = -np.mean(cv_scores)\n",
"print(\"Предсказательная способность (MSE):\", mean_mse)\n",
"\n",
"\n",
"# 2. Оценка скорости вычисления\n",
"start_time = time.time()\n",
"model.fit(X_train, y_train)\n",
"train_time = time.time() - start_time\n",
"\n",
"start_time = time.time()\n",
"y_pred = model.predict(X_test)\n",
"predict_time = time.time() - start_time\n",
"\n",
"print(\"Скорость обучения:\", train_time, \"секунд\")\n",
"print(\"Скорость предсказания:\", predict_time, \"секунд\")\n",
"\n",
"# 3. Оценка надежности\n",
"std_mse = np.std(-cv_scores)\n",
"print(\"Надежность (стабильность MSE):\", std_mse)\n",
"\n",
"# 4. Оценка корреляции\n",
"correlation_matrix = X.corr()\n",
"print(\"Корреляционная матрица признаков:\\n\", correlation_matrix)\n",
"\n",
"# 5. Оценка цельности\n",
"print(\"Пропуски в данных:\\n\", data.isnull().sum())\n",
"print(\"Сводка по данным:\\n\", data.describe())\n"
]
2024-11-30 13:19:01 +04:00
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"По итогу MSE у меня равен 769447.43, что можно считать относительно высоким. В последующих работах я буду лучше больше уделять времени на выборки данных, для повышения точности предсказаний."
]
2024-11-01 21:16:02 +04:00
}
],
"metadata": {
"kernelspec": {
"display_name": "aimenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}