823 lines
389 KiB
Plaintext
823 lines
389 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Лабораторная 3"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Информация о диабете индейцев Пима"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 2,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',\n",
|
|||
|
" 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],\n",
|
|||
|
" dtype='object')\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import numpy as np\n",
|
|||
|
"import pandas as pd\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"\n",
|
|||
|
"df = pd.read_csv(\".//scv//diabetes.csv\")\n",
|
|||
|
"print(df.columns)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Столбцы на русском:\n",
|
|||
|
"'Pregnancies' - количество беременностей\n",
|
|||
|
"'Glucose' - уровень глюкозы\n",
|
|||
|
"'BloodPressure'- кровяное давление\n",
|
|||
|
"'SkinThickness' - толщина кожи\n",
|
|||
|
"'Insulin' - уровень инсулина\n",
|
|||
|
"'BMI' - ИМТ\n",
|
|||
|
"'DiabetesPedigreeFunction' - функция родословной диабета\n",
|
|||
|
"'Age' - возраст\n",
|
|||
|
"'Outcome' - исход"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 3,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|||
|
"RangeIndex: 768 entries, 0 to 767\n",
|
|||
|
"Data columns (total 9 columns):\n",
|
|||
|
" # Column Non-Null Count Dtype \n",
|
|||
|
"--- ------ -------------- ----- \n",
|
|||
|
" 0 Pregnancies 768 non-null int64 \n",
|
|||
|
" 1 Glucose 768 non-null int64 \n",
|
|||
|
" 2 BloodPressure 768 non-null int64 \n",
|
|||
|
" 3 SkinThickness 768 non-null int64 \n",
|
|||
|
" 4 Insulin 768 non-null int64 \n",
|
|||
|
" 5 BMI 768 non-null float64\n",
|
|||
|
" 6 DiabetesPedigreeFunction 768 non-null float64\n",
|
|||
|
" 7 Age 768 non-null int64 \n",
|
|||
|
" 8 Outcome 768 non-null int64 \n",
|
|||
|
"dtypes: float64(2), int64(7)\n",
|
|||
|
"memory usage: 54.1 KB\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Pregnancies</th>\n",
|
|||
|
" <th>Glucose</th>\n",
|
|||
|
" <th>BloodPressure</th>\n",
|
|||
|
" <th>SkinThickness</th>\n",
|
|||
|
" <th>Insulin</th>\n",
|
|||
|
" <th>BMI</th>\n",
|
|||
|
" <th>DiabetesPedigreeFunction</th>\n",
|
|||
|
" <th>Age</th>\n",
|
|||
|
" <th>Outcome</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>148</td>\n",
|
|||
|
" <td>72</td>\n",
|
|||
|
" <td>35</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>33.6</td>\n",
|
|||
|
" <td>0.627</td>\n",
|
|||
|
" <td>50</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>85</td>\n",
|
|||
|
" <td>66</td>\n",
|
|||
|
" <td>29</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>26.6</td>\n",
|
|||
|
" <td>0.351</td>\n",
|
|||
|
" <td>31</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>183</td>\n",
|
|||
|
" <td>64</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>23.3</td>\n",
|
|||
|
" <td>0.672</td>\n",
|
|||
|
" <td>32</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>89</td>\n",
|
|||
|
" <td>66</td>\n",
|
|||
|
" <td>23</td>\n",
|
|||
|
" <td>94</td>\n",
|
|||
|
" <td>28.1</td>\n",
|
|||
|
" <td>0.167</td>\n",
|
|||
|
" <td>21</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>137</td>\n",
|
|||
|
" <td>40</td>\n",
|
|||
|
" <td>35</td>\n",
|
|||
|
" <td>168</td>\n",
|
|||
|
" <td>43.1</td>\n",
|
|||
|
" <td>2.288</td>\n",
|
|||
|
" <td>33</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
|
|||
|
"0 6 148 72 35 0 33.6 \n",
|
|||
|
"1 1 85 66 29 0 26.6 \n",
|
|||
|
"2 8 183 64 0 0 23.3 \n",
|
|||
|
"3 1 89 66 23 94 28.1 \n",
|
|||
|
"4 0 137 40 35 168 43.1 \n",
|
|||
|
"\n",
|
|||
|
" DiabetesPedigreeFunction Age Outcome \n",
|
|||
|
"0 0.627 50 1 \n",
|
|||
|
"1 0.351 31 0 \n",
|
|||
|
"2 0.672 32 1 \n",
|
|||
|
"3 0.167 21 0 \n",
|
|||
|
"4 2.288 33 1 "
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 3,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df.info()\n",
|
|||
|
"df.head()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Объект наблюдения - экономика\n",
|
|||
|
"Атрибуты - содержит набор информации об обучении, такие как:\n",
|
|||
|
"количество беременностей, глюкоза, кровяное давление, толщина кожи, ИМТ, возраст и другие атрибуты"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 18,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA04AAAIjCAYAAAA0vUuxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hc1Z3/8fed3jTq1ZJlWS64N4wrYNN7SQKEElqSDdkkEMimbTYJJJtlUyDwSw9JgEBIKEvvphqwce82riq2em+j6ff3h0AgJBfJKrb0eT2Pn8dz750735kzku73nnO+xzBN00REREREREQOyjLUAYiIiIiIiBzrlDiJiIiIiIgchhInERERERGRw1DiJCIiIiIichhKnERERERERA5DiZOIiIiIiMhhKHESERERERE5DCVOIiIiIiIih6HESURERERE5DCUOImIiIiIiByGEicRETmsJ554AsMwevw3derUoQ5vxGptbeXHP/4x55xzDikpKRiGwQMPPDDUYYmIDEu2oQ5ARESOH//5n//JpEmTOh//7Gc/G8JopLa2lp/85CeMHj2aGTNm8NZbbw11SCIiw5YSJxEROWJnnnkmS5Ys6Xz8l7/8hdra2qELaITLzs6moqKCrKws1q5dy9y5c4c6JBGRYUtD9URE5LDC4TAAFsvh/2w88MADGIZBcXFx57Z4PM706dO7DSXbvHkz119/PWPHjsXlcpGVlcWNN95IXV1dl3PefvvtPQ4TtNk+vv+3ZMkSpk6dyrp161i4cCFut5uCggL++Mc/dnsvP/rRj5gzZw6JiYl4vV5OPvlk3nzzzS7HFRcXd77O008/3WVfMBgkOTkZwzD41a9+1S3OjIwMIpFIl+f885//7DzfJ5PNZ555hvPPP5+cnBycTieFhYX89Kc/JRaLHfazdjqdZGVlHfY4ERE5eupxEhGRw/oocXI6nX16/kMPPcSWLVu6bV+2bBn79u3jhhtuICsri23btvHnP/+Zbdu28f7772MYRpfj//CHP+Dz+ToffzqRa2ho4LzzzuPyyy/nyiuv5LHHHuOrX/0qDoeDG2+8EYDm5mb+8pe/cOWVV/LlL3+ZlpYW/vrXv3L22WezevVqZs6c2eWcLpeL+++/n0suuaRz25NPPkkwGDzo+21paeH555/n0ksv7dx2//3343K5uj3vgQcewOfzcdttt+Hz+XjjjTf40Y9+RHNzM7/85S8P+hoiIjK4lDiJiMhhNTU1AeB2u3v93FAoxI9+9CPOPfdcXnrppS77/v3f/51vfetbXbbNnz+fK6+8knfffZeTTz65y77Pfe5zpKWlHfS1ysvLueuuu7jtttsA+MpXvsK8efP4/ve/zxe+8AXsdjvJyckUFxfjcDg6n/flL3+ZE044gd/85jf89a9/7XLOSy+9lMcff5yqqioyMzMB+Nvf/sZnPvMZHnnkkR7juPTSS/nb3/7WmTiVlpby+uuvc8UVV/DPf/6zy7GPPPJIl8/1pptu4qabbuL3v/89//3f/93nZFVERPqXhuqJiMhhfTR0Lj09vdfP/d3vfkddXR0//vGPu+37ZMIQDAapra1l/vz5AKxfv77Xr2Wz2fjKV77S+djhcPCVr3yF6upq1q1bB4DVau1MmuLxOPX19USjUU488cQeX3P27NlMmTKFhx56CICSkhLefPNNrr/++oPGceONN/Lyyy9TWVkJwIMPPsiCBQuYMGFCt2M/+Rm0tLRQW1vLySefTCAQ4IMPPuj1ZyAiIgNDiZOIiBxWSUkJNput14lTU1MT//M//8Ntt93W2VvzSfX19dxyyy1kZmbidrtJT0+noKCg87m9lZOTg9fr7bLto2Tlk3OuHnzwQaZPn47L5SI1NZX09HReeOGFg77mDTfcwP333w90DK1buHAh48ePP2gcM2fOZOrUqfz973/HNE0eeOABbrjhhh6P3bZtG5deeimJiYn4/X7S09O55pprgL59BiIiMjCUOImIyGHt3LmTsWPHdinGcCR+/vOfY7FY+Pa3v93j/ssvv5z77ruPm266iSeffJJXX32Vl19+GejoDRoIDz/8MNdffz2FhYX89a9/5eWXX2bZsmWcdtppB33Na665hj179vD+++/z4IMPHjQJ+qQbb7yR+++/n7fffpvKykouv/zybsc0NjZy6qmnsmnTJn7yk5/w3HPPsWzZMn7+858DA/cZiIhI72mOk4iIHFIoFGLjxo1diiMcifLycu69917uvPNOEhISulXKa2ho4PXXX+eOO+7gRz/6Uef23bt39znW8vJy2trauvQ67dq1C4AxY8YAHYv5jh07lieffLJL8YmehhJ+JDU1lYsuuqhz2N/ll19+2DLsV199Nd/+9re55ZZb+NznPkdCQkK3Y9566y3q6up48sknOeWUUzq3FxUVHdH7FRGRwaMeJxEROaRHHnmEUCjE6aef3qvn3XHHHWRmZnLTTTf1uN9qtQJgmmaX7ffcc0+f4gSIRqP86U9/6nwcDof505/+RHp6OnPmzDno665atYqVK1ce8tw33ngjmzdv5rLLLutS2e9gUlJSuPjii9m8eXNnRb9P6ymWcDjM73//+8OeX0REBpd6nEREpEdtbW385je/4Sc/+QlWqxXTNHn44Ye7HFNVVUVraysPP/wwZ555Zpd5TK+++ir/+Mc/ulSv+yS/388pp5zCL37xCyKRCKNGjeLVV189qt6WnJwcfv7zn1NcXMyECRN49NFH2bhxI3/+85+x2+0AXHDBBTz55JNceumlnH/++RQVFfHHP/6RyZMn09raetBzn3POOdTU1BxR0vSRBx54gN/97ncHrQS4cOFCkpOTue6667j55psxDIOHHnqoWzJ5KL/97W9pbGykvLwcgOeee44DBw4A8I1vfIPExMQjPpeIiBycEicREelRTU0N3//+9zsff7Ja3ad94Qtf4M033+ySOM2cOZMrr7zykK/xyCOP8I1vfIPf/e53mKbJWWedxUsvvUROTk6fYk5OTubBBx/kG9/4Bvfddx+ZmZn89re/5ctf/nLnMddffz2VlZX86U9/4pVXXmHy5Mk8/PDDPP7447z11lsHPbdhGIcshd4Tt9t9yBLuqampPP/883zrW9/iv/7rv0hOTuaaa67h9NNP5+yzzz6i1/jVr35FSUlJ5+Mnn3ySJ598EuiYm6XESUSkfxhmb25riYjIiFFcXExBQQFvvvkmS5YsOerjBtqSJUuora1l69atQxaDiIgMX5rjJCIiIiIichhKnEREpEc+n4+rr766x/WX+nKciIjI8UxD9UREZFjQUD0RERlISpxEREREREQOQ0P1REREREREDkOJk4iIiIiIyGGMuHWc4vE45eXlJCQkYBjGUIcjIiIiIiJDxDRNWlpayMnJwWI5dJ/SiEucysvLycvLG+owRERERETkGLF//35yc3MPecyIS5wSEhKAjg/H7/f3+/kjkQivvvoqZ511Fna7vd/PL4NPbTo8qV2HH7Xp8KR2HX7UpsPP8dymzc3N5OXldeYIhzKkidOdd97Jk08+yQcffIDb7WbhwoX8/Oc/Z+LEiQd9zgMPPMANN9zQZZvT6SQYDB7Ra340PM/v9w9Y4uTxePD7/cfdF0d6pjYdntSuw4/adHhSuw4/atPhZzi06ZFM4RnS4hBvv/02X/va13j//fdZtmwZkUiEs846i7a2tkM+z+/3U1FR0fmvpKRkkCIWEREREZGRaEh7nF5++eUujx944AEyMjJYt24dp5xyykGfZxgGWVlZAx2eiIiIiIgIcIzNcWpqagIgJSXlkMe1traSn59PPB5n9uzZ/M///A9Tpkzp8dhQKEQoFOp83NzcDHR0KUYikX6K/GMfnXMgzi1DQ206PKldhx+16fCkdh1+1KbDz/Hcpr2J2TBN0xzAWI5YPB7noosuorGxkXffffegx61cuZLdu3czffp0mpqa+NWvfsXy5cvZtm1bj5Uwbr/9du64445u2x955BE8Hk+/vgcRERERETl+BAIBrrrqKpqamg5b/+CYSZy++tWv8tJLL/Huu+8ethTgJ0UiESZNmsSVV17
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"\n",
|
|||
|
"plt.scatter(df['Age'], df['DiabetesPedigreeFunction'], c=df['Age'], alpha=0.6)\n",
|
|||
|
"\n",
|
|||
|
"plt.title(\"Диаграмма 1\")\n",
|
|||
|
"plt.ylabel(\"Функция родословной диабета\")\n",
|
|||
|
"plt.xlabel(\"Возраст\")\n",
|
|||
|
"plt.grid(visible='true')\n",
|
|||
|
"\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA+QAAAIjCAYAAACKx9GpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAC/oUlEQVR4nOzdd3hb9dk38O+RZEnee+9ssvckizBadsIIOwltGKVAC20hPM/bQQuBlj6UUvYKDRDCCGWVGRIge++d2HG8t7w0LOm8f0jneNsaR8v+fq4rF0SWpV88dZ97CaIoiiAiIiIiIiIiv1IF+gBEREREREREAxEDciIiIiIiIqIAYEBOREREREREFAAMyImIiIiIiIgCgAE5ERERERERUQAwICciIiIiIiIKAAbkRERERERERAHAgJyIiIiIiIgoABiQExEREREREQUAA3IiIiIiIiKiAGBATkREpIAPPvgAgiB0+2f06NGBPt6AtXPnTvzyl7/EqFGjEBkZiZycHFx//fU4ceJEoI9GREQETaAPQERE1J888sgjOO+88+S/P/bYYwE8DT355JPYvHkzrrvuOowdOxbl5eX417/+hYkTJ2Lbtm28WEJERAEliKIoBvoQREREoe6DDz7Addddhw0bNmDevHny7fPmzUN1dTUOHToUuMMNYFu2bMHkyZOh1Wrl206ePIkxY8bg2muvxVtvvRXA0xER0UDHknUiIiIFWCwWAIBK1fev1lWrVkEQBBQWFsq32e12jB07FoIgYNWqVfLtBw4cwNKlSzFo0CDo9XqkpaXh9ttvR01NTYfH/OMf/9htubxG01YMN2/ePIwePRq7d+/GzJkzER4ejvz8fLz44otd/i2///3vMWnSJMTGxiIyMhKzZ8/Ghg0bOtyvsLBQfp7//Oc/Hd5mMpkQHx8PQRDw1FNPdTlnSkoKWltbO7zPmjVr5Merrq6Wb//4449x2WWXISMjAzqdDoMHD8af//xn2Gy2Pj/WM2fO7BCMA8DQoUMxatQoHD16tM/3JyIi8iWWrBMRESlACsh1Op1H77969WocPHiwy+3ffPMNzpw5g2XLliEtLQ2HDx/Gyy+/jMOHD2Pbtm0QBKHD/V944QVERUXJf+98gaCurg6XXnoprr/+etx444147733cPfdd0Or1eL2228HADQ0NODVV1/FjTfeiOXLl6OxsRGvvfYaLrnkEuzYsQPjx4/v8Jh6vR5vvPEGrr76avm2devWwWQy9fjvbWxsxGeffYaFCxfKt73xxhvQ6/Vd3m/VqlWIiorCAw88gKioKHz33Xf4/e9/j4aGBvztb3/r8Tl6IooiKioqMGrUKLffl4iISEkMyImIiBRgMBgAAOHh4W6/r9lsxu9//3v89Kc/xRdffNHhbb/4xS/w4IMPdrht+vTpuPHGG7Fp0ybMnj27w9uuvfZaJCUl9fhcpaWl+Pvf/44HHngAAHDnnXdi2rRpWLFiBW699VaEhYUhPj4ehYWFHTLLy5cvx4gRI/Dss8/itdde6/CYCxcuxPvvv4+KigqkpqYCAF5//XUsWrQI77zzTrfnWLhwIV5//XU5IC8qKsL69euxePFirFmzpsN933nnnQ4f17vuugt33XUXnn/+efzlL39x+yLI22+/jZKSEjz66KNuvR8REZHSWLJORESkAKmEPDk52e33fe6551BTU4M//OEPXd7WPhA1mUyorq7G9OnTAQB79uxx+7k0Gg3uvPNO+e9arRZ33nknKisrsXv3bgCAWq2Wg3G73Y7a2lpYrVZMnjy52+ecOHEiRo0ahdWrVwMAzp49iw0bNmDp0qU9nuP222/Hl19+ifLycgDAm2++iRkzZmDYsGFd7tv+Y9DY2Ijq6mrMnj0bLS0tOHbsmFv//mPHjuGee+7BjBkzsGTJErfel4iISGkMyImIiBRw9uxZaDQatwNyg8GAxx9/HA888ICcXW6vtrYW999/P1JTUxEeHo7k5GTk5+fL7+uujIwMREZGdrhNCoLb97S/+eabGDt2LPR6PRITE5GcnIzPP/+8x+dctmwZ3njjDQCOEvOZM2di6NChPZ5j/PjxGD16NP79739DFEWsWrUKy5Yt6/a+hw8fxsKFCxEbG4uYmBgkJyfjlltuAeDex6C8vByXXXYZYmNj8cEHH0CtVrv8vkRERL7AgJyIiEgBx48fx6BBgzoMUXPFk08+CZVKhd/+9rfdvv3666/HK6+8grvuugvr1q3D119/jS+//BKAI3vtC2+99RaWLl2KwYMH47XXXsOXX36Jb775BhdccEGPz3nLLbfg1KlT2LZtG958880eg+v2br/9drzxxhv4/vvvUV5ejuuvv77Lferr6zF37lzs378fjz76KD799FN88803ePLJJwG4/jEwGAz46U9/ivr6enz55ZfIyMhw6f2IiIh8iT3kREREXjKbzdi3b1+HoWauKC0txTPPPIOVK1ciOjq6y+T0uro6rF+/Hn/605/w+9//Xr795MmTHp+1tLQUzc3NHbLkJ06cAADk5eUBcKxwGzRoENatW9dhaFx3JfWSxMREXHnllXL5+/XXX99hUnp3br75Zvz2t7/F/fffj2uvvRbR0dFd7rNx40bU1NRg3bp1mDNnjnx7QUGBS/9ewFHqf8UVV+DEiRP49ttvMXLkSJffl4iIyJeYISciIvLSO++8A7PZjAULFrj1fn/605+QmpqKu+66q9u3SyXVoih2uP0f//iHR+cEAKvVipdeekn+u8ViwUsvvYTk5GRMmjSpx+fdvn07tm7d2utj33777Thw4ACuu+66DpPee5KQkICrrroKBw4ckCe8d9bdWSwWC55//vk+Hx8AbDYbFi9ejK1bt+L999/HjBkzXHo/IiIif2CGnIiIyEPNzc149tln8eijj0KtVkMURbz11lsd7lNRUYGmpia89dZbuOiiizr0iX/99dd4++23u+zJlsTExGDOnDn461//itbWVmRmZuLrr792KzvcWUZGBp588kkUFhZi2LBhWLt2Lfbt24eXX34ZYWFhAIDLL78c69atw8KFC3HZZZehoKAAL774IkaOHImmpqYeH/snP/kJqqqqXArGJatWrcJzzz3X42T4mTNnIj4+HkuWLMF9990HQRCwevXqLhcpevLggw/ik08+wRVXXIHa2tounx+pF52IiCgQGJATERF5qKqqCitWrJD/3n56eWe33norNmzY0CEgHz9+PG688cZen+Odd97Bvffei+eeew6iKOLiiy/GF1984XEPdHx8PN58803ce++9eOWVV5Camop//etfWL58uXyfpUuXory8HC+99BK++uorjBw5Em+99Rbef/99bNy4scfHFgSh15Vr3QkPD+91VVxiYiI+++wzPPjgg/jf//1fxMfH45ZbbsGCBQtwySWX9Pn4+/btAwB8+umn+PTTT7u8nQE5EREFkiC6eomZiIiIOigsLER+fj42bNiAefPmeX0/X5s3bx6qq6tx6NChgJ2BiIiI2rCHnIiIiIiIiCgAGJATERF5KCoqCjfffHO3+8M9uR8RERENLCxZJyIiGiBYsk5ERBRcGJATERERERERBQBL1omIiIiIiIgCgAE5ERERERERUQD0+z3kdrsdpaWliI6OhiAIgT4OERERERER9XOiKKKxsREZGRlQqXrOg/f7gLy0tBTZ2dmBPgYRERERERENMOfOnUNWVlaPb+/3AXl0dDQAxwciJiYmwKchIiIiIiKi/q6hoQHZ2dlyPNqTfh+QS2XqMTExDMiJiIiIiIjIb/pqm+ZQNyIiIiIiIqIAYEBOREREREREFAAMyImIiIiIiIgCgAE5ERERERERUQAwICciIiIiIiIKAAbkRERERERERAHAgJyIiIiIiIgoABiQExEREREREQUAA3IiIiIiIiKiAGBATkRERERERBQADMiJiIiIiIiIAoABOREREREREVEAMCAnIiIiIiIiCgAG5EREREREREQBwICciIiIiIiIKAAYkBMRkU+UGYzYcroaZQZjoI9CREREFJQ0gT4AERH1P2t3FmHFuoOwi4BKAFYuGoPFU3ICfSwiIiK
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1200x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df_dependence = df.groupby('Age')['BMI'].mean().reset_index()\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(12, 6))\n",
|
|||
|
"\n",
|
|||
|
"plt.plot(df_dependence['Age'], df_dependence['BMI'], marker='.')\n",
|
|||
|
"\n",
|
|||
|
"plt.title(\"Диаграмма 2\")\n",
|
|||
|
"plt.xlabel(\"Возраст\")\n",
|
|||
|
"plt.ylabel(\"ИМТ\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Присутствует связь между атрибутами, уровень инфляции влияет и зависит от многих атрибутов.\n",
|
|||
|
"Для примера на графике приведена связь между инфляцией и доходом на душу населения. На втором графике показана связь уровня ВВП и безработицы\n",
|
|||
|
"Примеры бизнес целей\n",
|
|||
|
"\n",
|
|||
|
" 1.Прогнозирование уровня инфляции на основе уровня ВВП.\n",
|
|||
|
" 2.Наблюдение за изменениями уровня безработицы с уровнем ВВП.\n",
|
|||
|
" \n",
|
|||
|
"Эффект для бизнеса: влияние на инвестиции индекса акций и цен на нефть, исследование влияния фондового индекса на инвестиции, исследования инфляции и покупательской способности.\n",
|
|||
|
"Цели технического проекта\n",
|
|||
|
"\n",
|
|||
|
"Для первой цели:\n",
|
|||
|
"\n",
|
|||
|
"Вход: Доход на душу населения\n",
|
|||
|
"Целевой признак: Уровень инфляции.\n",
|
|||
|
"\n",
|
|||
|
"Для второй цели:\n",
|
|||
|
"\n",
|
|||
|
"Вход: Уровень безработицы\n",
|
|||
|
"Целевой признак: Уровень ВВП"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Проверка на выбросы"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 20,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Пропущенные значения по столбцам:\n",
|
|||
|
"Pregnancies 0\n",
|
|||
|
"Glucose 0\n",
|
|||
|
"BloodPressure 0\n",
|
|||
|
"SkinThickness 0\n",
|
|||
|
"Insulin 0\n",
|
|||
|
"BMI 0\n",
|
|||
|
"DiabetesPedigreeFunction 0\n",
|
|||
|
"Age 0\n",
|
|||
|
"Outcome 0\n",
|
|||
|
"dtype: int64\n",
|
|||
|
"\n",
|
|||
|
"Статистический обзор данных:\n",
|
|||
|
" Pregnancies Glucose BloodPressure SkinThickness Insulin \\\n",
|
|||
|
"count 768.000000 768.000000 768.000000 768.000000 768.000000 \n",
|
|||
|
"mean 3.845052 120.894531 69.105469 20.536458 79.799479 \n",
|
|||
|
"std 3.369578 31.972618 19.355807 15.952218 115.244002 \n",
|
|||
|
"min 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
|
|||
|
"25% 1.000000 99.000000 62.000000 0.000000 0.000000 \n",
|
|||
|
"50% 3.000000 117.000000 72.000000 23.000000 30.500000 \n",
|
|||
|
"75% 6.000000 140.250000 80.000000 32.000000 127.250000 \n",
|
|||
|
"max 17.000000 199.000000 122.000000 99.000000 846.000000 \n",
|
|||
|
"\n",
|
|||
|
" BMI DiabetesPedigreeFunction Age Outcome \n",
|
|||
|
"count 768.000000 768.000000 768.000000 768.000000 \n",
|
|||
|
"mean 31.992578 0.471876 33.240885 0.348958 \n",
|
|||
|
"std 7.884160 0.331329 11.760232 0.476951 \n",
|
|||
|
"min 0.000000 0.078000 21.000000 0.000000 \n",
|
|||
|
"25% 27.300000 0.243750 24.000000 0.000000 \n",
|
|||
|
"50% 32.000000 0.372500 29.000000 0.000000 \n",
|
|||
|
"75% 36.600000 0.626250 41.000000 1.000000 \n",
|
|||
|
"max 67.100000 2.420000 81.000000 1.000000 \n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"null_values = df.isnull().sum()\n",
|
|||
|
"print(\"Пропущенные значения по столбцам:\")\n",
|
|||
|
"print(null_values)\n",
|
|||
|
"\n",
|
|||
|
"stat_summary = df.describe()\n",
|
|||
|
"print(\"\\nСтатистический обзор данных:\")\n",
|
|||
|
"print(stat_summary)\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"На основе данных выше можно выделить что нулевых данных нет\n",
|
|||
|
"Также проверим данные на выбросы и дубликаты:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 21,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"\n",
|
|||
|
"Коэффициент асимметрии для столбца 'Pregnancies': 0.9016739791518588\n",
|
|||
|
"\n",
|
|||
|
"Коэффициент асимметрии для столбца 'Glucose': 0.17375350179188992\n",
|
|||
|
"\n",
|
|||
|
"Коэффициент асимметрии для столбца 'BloodPressure': -1.8436079833551302\n",
|
|||
|
"\n",
|
|||
|
"Коэффициент асимметрии для столбца 'SkinThickness': 0.10937249648187608\n",
|
|||
|
"\n",
|
|||
|
"Коэффициент асимметрии для столбца 'Insulin': 2.272250858431574\n",
|
|||
|
"\n",
|
|||
|
"Коэффициент асимметрии для столбца 'BMI': -0.42898158845356543\n",
|
|||
|
"\n",
|
|||
|
"Коэффициент асимметрии для столбца 'DiabetesPedigreeFunction': 1.919911066307204\n",
|
|||
|
"\n",
|
|||
|
"Коэффициент асимметрии для столбца 'Age': 1.1295967011444805\n",
|
|||
|
"\n",
|
|||
|
"Коэффициент асимметрии для столбца 'Outcome': 0.635016643444986\n",
|
|||
|
"\n",
|
|||
|
"Количество дубликатов: 0\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"for column in df.select_dtypes(include=[np.number]).columns:\n",
|
|||
|
" skewness = df[column].skew()\n",
|
|||
|
" print(f\"\\nКоэффициент асимметрии для столбца '{column}': {skewness}\")\n",
|
|||
|
"\n",
|
|||
|
"duplicates = df.duplicated().sum()\n",
|
|||
|
"print(f\"\\nКоличество дубликатов: {duplicates}\")\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"На основе данных выше можно сказать, что для столбцов выбросы незначительны. Дупликатов нет"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Очистка данных от шумов:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 26,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0kAAAIjCAYAAADWYVDIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAC02klEQVR4nOzdeXhU1fkH8O9kX8jKNkGBBFwBhYJbUHAvFgttbUt/LtStiFvrvmC1BK2idW+roFRBwbVWLSjFWleU4IJihahVSHAhQUM2SEhCMvf3R5hxtjtz7r3nrvP9PA9P68y957znPefe5CST+/oURVFAREREREREAIA0uwMgIiIiIiJyEm6SiIiIiIiIwnCTREREREREFIabJCIiIiIiojDcJBEREREREYXhJomIiIiIiCgMN0lERERERERhuEkiIiIiIiIKw00SERERERFRGG6SiIiIiGz09ddfY8mSJaH/rqurw2OPPWZfQETETRIRGfPMM8/A5/PF/TdmzBi7wyMicjyfz4eLLroIL730Eurq6nD11Vdj9erVdodFlNIy7A6AiLzhuuuuw4EHHhj675tvvtnGaIiI3GOvvfbCrFmzcNJJJwEAysrK8Prrr9sbFFGK8ymKotgdBBG51zPPPINf/vKXeO2113DMMceEXj/mmGPQ2NiIDRs22BccEZGLbNq0CY2NjRgzZgzy8/PtDocopfHjdkRkSHd3NwAgLS357WTJkiXw+Xyoq6sLvRYIBHDwwQfD5/NFfCb/v//9L8466yyMGDECOTk58Pv9OOecc7B9+/aINquqquJ+1C8j4/tflB9zzDEYM2YM1q1bh4kTJyI3NxcVFRVYuHBhzFj+8Ic/YMKECSgqKkJ+fj4mTZqE1157LeK4urq6UD/PP/98xHudnZ0oKSmBz+fDHXfcERPnoEGDsHv37ohznnjiiVB7jY2Nodf/+c9/4uSTT8aQIUOQnZ2NkSNH4qabbkJvb2/SXAf7+/TTTzFjxgwUFhaif//+uOSSS9DZ2Rlx7OLFi3Hcccdh0KBByM7OxqhRo7BgwYK47f7rX//C0UcfjYKCAhQWFuLQQw/F448/HnHMO++8g6lTp6KkpAT5+fk4+OCDce+990Yc8+mnn+IXv/gFSktLkZOTg0MOOQTLly+POEbLejnrrLMi5r+kpATHHHNMzEeWRHMaXDPR7rjjjpiYysvLcdZZZ0Uc9/e//x0+nw/l5eURr3/77bc499xzMWzYMKSnp4fi7devX0xf0crLy1U/2urz+WKOX7ZsGSZMmIDc3FyUlpbi//7v//DVV1/FHWeyawMAurq6MHfuXOyzzz7Izs7G0KFDcfXVV6Orqyvm2Ndff104zmjBtRtv/OF51rI+AISuhYEDByI3Nxf7778/fv/730f0mehf8Dc7xxxzTMQPhIC+35ynpaXFXAt///vfQ3MwYMAAnHHGGfjmm28ijjnrrLNC62TkyJE4/PDD0dTUhNzc3JjxEZF1+HE7IjIkuEnKzs7Wdf7SpUvx8ccfx7z+8ssvY/PmzTj77LPh9/uxceNGPPjgg9i4cSPWrl0b803UggULIr7RjN60NTc3Y+rUqZgxYwZOPfVUPP3007jggguQlZWFc845BwDQ1taGv/3tbzj11FMxa9Ys7NixAw899BCmTJmCd999F+PGjYtoMycnB4sXL8ZPf/rT0GvPPvtszCYk3I4dO/DCCy/gZz/7Wei1xYsXIycnJ+a8JUuWoF+/frj88svRr18/vPrqq/jDH/6AtrY23H777ap9hJsxYwbKy8sxf/58rF27Fn/+85/R3NyMRx99NCJ3o0ePxvTp05GRkYEVK1bgwgsvRCAQwEUXXRQRzznnnIPRo0djzpw5KC4uxocffohVq1bhtNNOA9A3bz/+8Y9RVlaGSy65BH6/H5988gleeOEFXHLJJQCAjRs34sgjj8Ree+2Fa6+9Fvn5+Xj66afx05/+FP/4xz8ichNNbb0AwIABA3D33XcD6PtD+HvvvRdTp07FV199heLiYmk5Taanpyf0zXe0M888E//5z3/w29/+FmPHjkV6ejoefPBBfPDBB0Jtjxs3DldccUXEa48++ihefvnliNduvvlm3HDDDZgxYwZ+85vf4LvvvsNf/vIXTJ48GR9++GEoH4DYtREIBDB9+nS89dZbOO+883DggQfi448/xt13343//e9/MT8sCPrd736HQw89VDVO2dTWx3//+19MmjQJmZmZOO+881BeXo5NmzZhxYoVuPnmm3HKKadgn332CR1/2WWX4cADD8R5550Xei3848ThFi9ejOuvvx533nln6DoA+tba2WefjUMPPRTz58/Htm3bcO+99+Ltt9+OmYNof/jDHxLeR4jIAgoRkQH33HOPAkD56KOPIl4/+uijldGjR0e8tnjxYgWAUltbqyiKonR2dirDhg1TfvSjHykAlMWLF4eO7ejoiOnriSeeUAAob775Zui1uXPnKgCU7777TjXGo48+WgGg3HnnnaHXurq6lHHjximDBg1Suru7FUVRlJ6eHqWrqyvi3ObmZmXw4MHKOeecE3qttrZWAaCceuqpSkZGhtLQ0BB67/jjj1dOO+00BYBy++23x8R56qmnKj/+8Y9Dr2/ZskVJS0tTTj311JhxxMvB7Nmzlby8PKWzs1N1vOH9TZ8+PeL1Cy+8MGa+4vUzZcoUZcSIEaH/bmlpUQoKCpTDDz9c2bVrV8SxgUBAUZS+/FVUVCjDhw9Xmpub4x6jKH05OuiggyLGEAgElIkTJyr77rtv6DUt6+XMM89Uhg8fHtHngw8+qABQ3n333YRjjZfTeOtXURTl9ttvj4hJURRl+PDhyplnnhn67/vvv1/Jzs5Wjj322IiYdu3apaSlpSmzZ8+OaPPMM89U8vPzY/qKNnz4cOXkk0+Oef2iiy5Swr+c19XVKenp6crNN98ccdzHH3+sZGRkRLwuem0sXbpUSUtLU1avXh3R5sKFCxUAyttvvx3x+r///W8FgPLMM8+oxqlm3rx5CoCINRMcf3ietayPyZMnKwUFBcqWLVsi2ozuQ62vcEcffbRy9NFHK4qiKC+++KKSkZGhXHHFFRHHdHd3K4MGDVLGjBkTcb288MILCgDlD3/4Q+i16LW7YcMGJS0tLTSO8LVGRNbhx+2IyJDgx98GDhyo+dz77rsP27dvx9y5c2Pey83NDf3/zs5ONDY24ogjjgAA4Z+6h8vIyMDs2bND/52VlYXZs2fj22+/xbp16wAA6enpyMrKAtD3k/Ompib09PTgkEMOidvn+PHjMXr0aCxduhQAsGXLFrz22msxH70Kd84552DVqlVoaGgAADzyyCOorKzEfvvtF3NseA527NiBxsZGTJo0CR0dHfj000+Fxh3+myAA+O1vfwsAWLlyZdx+Wltb0djYiKOPPhqbN29Ga2srgL7fEO3YsQPXXnstcnJyItoM/lbvww8/RG1tLS699NKYn5IHj2lqasKrr76KGTNmhMbU2NiI7du3Y8qUKfj8889jPo4UlGi9AH1zFmxv/fr1ePTRR1FWVhbxGwAtOe3t7Q21F/zX0dERt++gjo4O3Hjjjbj44osxbNiwiPfa29sRCATQv3//hG0Y9eyzzyIQCGDGjBkRsfv9fuy7774xHx8VuTb+/ve/48ADD8QBBxwQ0eZxxx0HADFtBn8LEr1WRAwaNAhA328DtVBbH9999x3efPNNnHPOOTFzIvLxPzXvvvsuZsyYgZ///Ocxv4V8//338e233+LCCy+MyMHJJ5+MAw44AC+++KJqu3PmzMH48ePxy1/+UndsRGQcP25HRIZs2bIFGRkZmjdJra2tuOWWW3D55Zdj8ODBMe83NTVh3rx5ePLJJ/Htt9/GnKvVkCFDYv4QOrgxqaurC23AHnnkEdx555349NNPI/52qKKiIm67Z599Nh588EFceeWVWLJkCSZOnIh9991XNY5x48ZhzJgxePTRR3HVVVdhyZIluO6662L+VgTo+1ja9ddfj1dffRVtbW0R74nmIDqWkSNHIi0tLeLvHN5++23MnTsX1dXVMZuA1tZWFBUVYdO
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Выбросы в датасете:\n",
|
|||
|
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
|
|||
|
"131 9 122 56 0 0.0 33.3 \n",
|
|||
|
"152 9 156 86 28 155.0 34.3 \n",
|
|||
|
"267 2 128 64 42 0.0 40.0 \n",
|
|||
|
"270 10 101 86 37 0.0 45.6 \n",
|
|||
|
"314 7 109 80 31 0.0 35.9 \n",
|
|||
|
"408 8 197 74 0 0.0 25.9 \n",
|
|||
|
"416 1 97 68 21 0.0 27.2 \n",
|
|||
|
"434 1 90 68 8 0.0 24.5 \n",
|
|||
|
"487 0 173 78 32 265.0 46.5 \n",
|
|||
|
"493 4 125 70 18 122.0 28.9 \n",
|
|||
|
"588 3 176 86 27 156.0 33.3 \n",
|
|||
|
"657 1 120 80 48 200.0 38.9 \n",
|
|||
|
"744 13 153 88 37 140.0 40.6 \n",
|
|||
|
"747 1 81 74 41 57.0 46.3 \n",
|
|||
|
"750 4 136 70 0 0.0 31.2 \n",
|
|||
|
"\n",
|
|||
|
" DiabetesPedigreeFunction Age Outcome \n",
|
|||
|
"131 1.114 33 1 \n",
|
|||
|
"152 1.189 42 1 \n",
|
|||
|
"267 1.101 24 0 \n",
|
|||
|
"270 1.136 38 1 \n",
|
|||
|
"314 1.127 43 1 \n",
|
|||
|
"408 1.191 39 1 \n",
|
|||
|
"416 1.095 22 0 \n",
|
|||
|
"434 1.138 36 0 \n",
|
|||
|
"487 1.159 58 0 \n",
|
|||
|
"493 1.144 45 1 \n",
|
|||
|
"588 1.154 52 1 \n",
|
|||
|
"657 1.162 41 0 \n",
|
|||
|
"744 1.174 39 0 \n",
|
|||
|
"747 1.096 32 0 \n",
|
|||
|
"750 1.182 22 1 \n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0kAAAIjCAYAAADWYVDIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAC09UlEQVR4nOzdeXhU1fkH8O9M9oSsbAkCScA1oCAoEiyIVgtFoWotrVWKSxGtWneRuhC0grau/VVBUAHBrXUrKKXuSjG4gFghiggJqCRgCEkgIQnJ3N8fYYaZyZ2Zu5y7zvfzPDyPztw55z3vOfdOTmZyX48kSRKIiIiIiIgIAOC1OgAiIiIiIiI74SaJiIiIiIgoCDdJREREREREQbhJIiIiIiIiCsJNEhERERERURBukoiIiIiIiIJwk0RERERERBSEmyQiIiIiIqIg3CQREREREREF4SaJiIiIiIgoCDdJRGSIl156CR6PR/bf4MGDrQ6PiIiIKKJEqwMgInf705/+hOOOOy7w//fee6+F0RARERHFxk0SERnqrLPOwtixYwP//+STT6K2tta6gIiIiIhi4NftiMgQbW1tAACvN/ZlZvHixfB4PKiqqgo85vP5cMIJJ8Dj8WDx4sWBx//3v//hkksuwYABA5Camor8/Hxcdtll2LNnT0ibZWVlsl/1S0w8/LuhsWPHYvDgwVi3bh1GjRqFtLQ0FBcXY/78+V3Gctddd2H48OHIzs5GRkYGRo8ejffeey/kuKqqqkA/r732WshzLS0tyM3NhcfjwQMPPNAlzl69euHgwYMhr3n++ecD7QVvLP/1r3/h7LPPRp8+fZCSkoKBAwfinnvuQUdHR8xc+/v7+uuvMXnyZGRlZaF79+647rrr0NLSEnLsokWLcMYZZ6BXr15ISUlBSUkJ5s2bJ9vuv//9b5x22mnIzMxEVlYWTj75ZDz33HMhx3z88ceYMGECcnNzkZGRgRNOOAGPPvpoyDFff/01LrjgAuTl5SE1NRUnnXQSli9fHnKMmvVyySWXhMx/bm4uxo4di9WrV4e0qTSn/jUT7oEHHugSU1FRES655JKQ4/75z3/C4/GgqKgo5PHdu3fj8ssvR//+/ZGQkBCIt1u3bl36CldUVBTxq60ejyfk2Pb2dtxzzz0YOHAgUlJSUFRUhD/96U9obW3t0q6SOQ1e89H69fl8eOSRRzBo0CCkpqaid+/emD59Ovbu3atofOF5fP/99+HxePD+++8HHhs7dmzIL2QA4NNPP5WNBwCWLVuGESNGID09Hbm5uRgzZgzefPPNQJ/RcuqfP//4g9fcvn37MHz4cBQXF6O6ujricQBw9dVXw+PxdBkfEVmPnyQRkSH8m6SUlBRNr1+6dCm+/PLLLo+/9dZb2LZtGy699FLk5+dj06ZNWLBgATZt2oS1a9d2+WFo3rx5IT9ohm/a9u7diwkTJmDy5Mm48MIL8Y9//ANXXXUVkpOTcdlllwEAGhsb8eSTT+LCCy/EtGnTsG/fPjz11FMYN24cPvnkEwwdOjSkzdTUVCxatAjnnntu4LFXXnmlyyYk2L59+/D666/jvPPOCzy2aNEipKamdnnd4sWL0a1bN9x4443o1q0b3n33Xdx1111obGzEX//614h9BJs8eTKKioowd+5crF27Fn/729+wd+9ePPPMMyG5GzRoECZNmoTExESsWLECf/jDH+Dz+XD11VeHxHPZZZdh0KBBmDlzJnJycvD5559j1apV+O1vfwugc97OOeccFBQU4LrrrkN+fj6++uorvP7667juuusAAJs2bcKpp56KI444ArfddhsyMjLwj3/8A+eeey5efvnlkNyEi7ReAKBHjx54+OGHAQDff/89Hn30UUyYMAHfffcdcnJyhOU0lvb2dtx+++2yz02dOhVvv/02rr32WgwZMgQJCQlYsGAB1q9fr6jtoUOH4qabbgp57JlnnsFbb70V8tjvf/97LFmyBBdccAFuuukmfPzxx5g7dy6++uorvPrqq4HjlMxpsCuuuAKjR48G0LnWg9sCgOnTp2Px4sW49NJL8cc//hGVlZX4+9//js8//xxr1qxBUlKSonGqNWPGDNnHZ8+ejbKyMowaNQp33303kpOT8fHHH+Pdd9/Fz372MzzyyCPYv38/AOCrr77CnDlzQr46HGnzevDgQfzyl7/Ejh07sGbNGhQUFESM7dtvv8XChQt1jpCIDCMRERngkUcekQBIX3zxRcjjp512mjRo0KCQxxYtWiQBkCorKyVJkqSWlhapf//+0s9//nMJgLRo0aLAsc3NzV36ev755yUA0ocffhh4bNasWRIA6ccff4wY42mnnSYBkB588MHAY62trdLQoUOlXr16SW1tbZIkSVJ7e7vU2toa8tq9e/dKvXv3li677LLAY5WVlRIA6cILL5QSExOlmpqawHM//elPpd/+9rcSAOmvf/1rlzgvvPBC6Zxzzgk8vn37dsnr9UoXXnhhl3HI5WD69OlSenq61NLSEnG8wf1NmjQp5PE//OEPXeZLrp9x48ZJAwYMCPx/fX29lJmZKZ1yyinSgQMHQo71+XySJHXmr7i4WCosLJT27t0re4wkdebo+OOPDxmDz+eTRo0aJR111FGBx9Ssl6lTp0qFhYUhfS5YsEACIH3yySdRxyqXU7n1K0mS9Ne//jUkJkmSpMLCQmnq1KmB/3/88cellJQU6fTTTw+J6cCBA5LX65WmT58e0ubUqVOljIyMLn2FKywslM4+++wuj1999dVS8Nv8hg0bJADS73//+5Djbr75ZgmA9O6770qSpGxO/bZs2SIBkJYsWRJ4zL/G/FavXi0BkJ599tmQ165atUr28XDFxcXS7373u5DH3nvvPQmA9N577wUeO+2006TTTjst8P8rV66UAEjjx48PiWfLli2S1+uVzjvvPKmjoyPq+CL15ec/5xctWiT5fD7poosuktLT06WPP/444nF+kydPlgYPHiz169cvZJ0QkT3w63ZEZAj/19969uyp+rWPPfYY9uzZg1mzZnV5Li0tLfDfLS0tqK2txciRIwFA8W/dgyUmJmL69OmB/09OTsb06dOxe/durFu3DgCQkJCA5ORkAJ1fG6qrq0N7eztOOukk2T6HDRuGQYMGYenSpQCA7du347333ov6lZrLLrsMq1atQk1NDQBgyZIlKC0txdFHH93l2OAc7Nu3D7W1tRg9ejSam5vx9ddfKxp38CdBAHDttdcCAFauXCnbT0NDA2pra3Haaadh27ZtaGhoAND5CdG+fftw2223ITU1NaRN/6d6n3/+OSorK3H99dcHPrkJP6aurg7vvvsuJk+eHBhTbW0t9uzZg3HjxmHLli344YcfZMcSbb0AnXPmb2/Dhg145plnUFBQEHJDETU57ejoCLTn/9fc3Czbt19zczPuvvtuXHPNNejfv3/Ic01NTfD5fOjevXvUNvTyz+2NN94Y8rj/E6g33ngDgLI59VPyifE///lPZGdn46yzzgrJ2fDhw9GtW7cuX1sN16tXL3z//fcKRniYJEmYOXMmfvnLX+KUU04Jee61116Dz+fDXXfd1eWTZbmv5Sl1yy234Nlnn8U//vEPjBgxIuqx69atwz//+U/MnTtX0VeSich8PDOJyBDbt29HYmKi6k1SQ0MD5syZgxtvvBG9e/fu8nxdXR2uu+469O7dG2lpaejZsyeKi4sDr1WrT58+yMjICHnMvzEJ/vuSJUuW4IQTTkBqaiq6d++Onj174o033ojY56WXXopFixYB6Pzq0qhRo3DUUUdFjGPo0KEYPHgwnnnmGUiSFPhqkpxNmzbhvPPOQ3Z2NrKystCzZ09cfPHFAJTnIDyWgQMHwuv1hox5zZo1OPPMM5GRkYGcnBz07NkTf/rTn0L62bp1KwBEva27kmO+/fZbSJKEO++8Ez179gz559/87N69u8vrYq0XAPjuu+8CbZ144onYunUrXn755ZCvTKnJ6ddffx0xxkgeeughtLS0BPIXrHv37jjqqKPw5JNP4s0338Tu3btRW1sr+3dCemzfvh1erxdHHnlkyOP5+fnIycnB9u3
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.scatter(df['DiabetesPedigreeFunction'], df['Age'])\n",
|
|||
|
"plt.xlabel('Функция родословной диабета')\n",
|
|||
|
"plt.ylabel('Возраст')\n",
|
|||
|
"plt.title('Диаграмма рассеивания перед чисткой')\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"Q1 = df[\"DiabetesPedigreeFunction\"].quantile(0.25)\n",
|
|||
|
"Q3 = df[\"DiabetesPedigreeFunction\"].quantile(0.75)\n",
|
|||
|
"\n",
|
|||
|
"IQR = Q3 - Q1\n",
|
|||
|
"\n",
|
|||
|
"threshold = 1.5 * IQR\n",
|
|||
|
"lower_bound = Q1 - threshold\n",
|
|||
|
"upper_bound = Q3 + threshold\n",
|
|||
|
"\n",
|
|||
|
"outliers = (df[\"DiabetesPedigreeFunction\"] < lower_bound) | (df[\"DiabetesPedigreeFunction\"] > upper_bound)\n",
|
|||
|
"\n",
|
|||
|
"# Вывод выбросов\n",
|
|||
|
"print(\"Выбросы в датасете:\")\n",
|
|||
|
"print(df[outliers])\n",
|
|||
|
"\n",
|
|||
|
"# Заменяем выбросы на медианные значения\n",
|
|||
|
"median_score = df[\"DiabetesPedigreeFunction\"].median()\n",
|
|||
|
"df.loc[outliers, \"DiabetesPedigreeFunction\"] = median_score\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация данных после обработки\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.scatter(df['DiabetesPedigreeFunction'], df['Age'])\n",
|
|||
|
"plt.xlabel('Функция родословной диабета')\n",
|
|||
|
"plt.ylabel('Возраст')\n",
|
|||
|
"plt.title('Диаграмма рассеивания после чистки')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Разбиение набора данных на обучающую, контрольную и тестовую выборки"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 27,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Размер обучающей выборки: 460\n",
|
|||
|
"Размер контрольной выборки: 154\n",
|
|||
|
"Размер тестовой выборки: 154\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"\n",
|
|||
|
"train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"train_df, val_df = train_test_split(train_df, test_size=0.25, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"print(\"Размер обучающей выборки:\", len(train_df))\n",
|
|||
|
"print(\"Размер контрольной выборки:\", len(val_df))\n",
|
|||
|
"print(\"Размер тестовой выборки:\", len(test_df))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Видим недостаток баланса"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 28,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Распределение функции родословной диабета в обучающей выборке:\n",
|
|||
|
"DiabetesPedigreeFunction\n",
|
|||
|
"0.37250 12\n",
|
|||
|
"0.37175 10\n",
|
|||
|
"0.25800 5\n",
|
|||
|
"0.19700 4\n",
|
|||
|
"0.23800 4\n",
|
|||
|
" ..\n",
|
|||
|
"0.52600 1\n",
|
|||
|
"0.60000 1\n",
|
|||
|
"0.45400 1\n",
|
|||
|
"0.70400 1\n",
|
|||
|
"0.46300 1\n",
|
|||
|
"Name: count, Length: 332, dtype: int64\n",
|
|||
|
"\n",
|
|||
|
"Распределение функции родословной диабета в контрольной выборке:\n",
|
|||
|
"DiabetesPedigreeFunction\n",
|
|||
|
"0.37250 10\n",
|
|||
|
"0.14200 3\n",
|
|||
|
"0.37175 3\n",
|
|||
|
"0.25400 3\n",
|
|||
|
"0.14100 2\n",
|
|||
|
" ..\n",
|
|||
|
"0.24500 1\n",
|
|||
|
"0.69300 1\n",
|
|||
|
"0.12100 1\n",
|
|||
|
"0.68600 1\n",
|
|||
|
"0.12200 1\n",
|
|||
|
"Name: count, Length: 130, dtype: int64\n",
|
|||
|
"\n",
|
|||
|
"Распределение функции родословной диабета в тестовой выборке:\n",
|
|||
|
"DiabetesPedigreeFunction\n",
|
|||
|
"0.3725 7\n",
|
|||
|
"0.1480 2\n",
|
|||
|
"0.4430 2\n",
|
|||
|
"0.2070 2\n",
|
|||
|
"0.5200 2\n",
|
|||
|
" ..\n",
|
|||
|
"0.2480 1\n",
|
|||
|
"0.2360 1\n",
|
|||
|
"0.3020 1\n",
|
|||
|
"0.4850 1\n",
|
|||
|
"0.7050 1\n",
|
|||
|
"Name: count, Length: 134, dtype: int64\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"def check_balance(df, name):\n",
|
|||
|
" counts = df['DiabetesPedigreeFunction'].value_counts()\n",
|
|||
|
" print(f\"Распределение функции родословной диабета в {name}:\")\n",
|
|||
|
" print(counts)\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"check_balance(train_df, \"обучающей выборке\")\n",
|
|||
|
"check_balance(val_df, \"контрольной выборке\")\n",
|
|||
|
"check_balance(test_df, \"тестовой выборке\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"также используем oversampling и undersampling"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 33,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Оверсэмплинг:\n",
|
|||
|
"Распределение функции родословной диабета в обучающей выборке:\n",
|
|||
|
"DiabetesPedigreeFunction\n",
|
|||
|
"0.37250 12\n",
|
|||
|
"0.37175 10\n",
|
|||
|
"0.25800 5\n",
|
|||
|
"0.19700 4\n",
|
|||
|
"0.23800 4\n",
|
|||
|
" ..\n",
|
|||
|
"0.52600 1\n",
|
|||
|
"0.60000 1\n",
|
|||
|
"0.45400 1\n",
|
|||
|
"0.70400 1\n",
|
|||
|
"0.46300 1\n",
|
|||
|
"Name: count, Length: 332, dtype: int64\n",
|
|||
|
"\n",
|
|||
|
"Распределение функции родословной диабета в контрольной выборке:\n",
|
|||
|
"DiabetesPedigreeFunction\n",
|
|||
|
"0.3725 10\n",
|
|||
|
"0.4390 4\n",
|
|||
|
"0.1420 3\n",
|
|||
|
"0.7300 3\n",
|
|||
|
"0.2540 3\n",
|
|||
|
" ..\n",
|
|||
|
"0.2450 1\n",
|
|||
|
"0.6930 1\n",
|
|||
|
"0.1210 1\n",
|
|||
|
"0.6860 1\n",
|
|||
|
"0.1220 1\n",
|
|||
|
"Name: count, Length: 130, dtype: int64\n",
|
|||
|
"\n",
|
|||
|
"Распределение функции родословной диабета в тестовой выборке:\n",
|
|||
|
"DiabetesPedigreeFunction\n",
|
|||
|
"0.3725 7\n",
|
|||
|
"0.1480 2\n",
|
|||
|
"0.4430 2\n",
|
|||
|
"0.2070 2\n",
|
|||
|
"0.5200 2\n",
|
|||
|
" ..\n",
|
|||
|
"0.2480 1\n",
|
|||
|
"0.2360 1\n",
|
|||
|
"0.3020 1\n",
|
|||
|
"0.4850 1\n",
|
|||
|
"0.7050 1\n",
|
|||
|
"Name: count, Length: 134, dtype: int64\n",
|
|||
|
"\n",
|
|||
|
"Андерсэмплинг:\n",
|
|||
|
"Распределение функции родословной диабета в обучающей выборке:\n",
|
|||
|
"DiabetesPedigreeFunction\n",
|
|||
|
"0.37250 12\n",
|
|||
|
"0.37175 10\n",
|
|||
|
"0.25800 5\n",
|
|||
|
"0.29900 4\n",
|
|||
|
"0.19700 4\n",
|
|||
|
" ..\n",
|
|||
|
"0.38100 1\n",
|
|||
|
"0.30000 1\n",
|
|||
|
"0.13300 1\n",
|
|||
|
"0.23400 1\n",
|
|||
|
"0.59800 1\n",
|
|||
|
"Name: count, Length: 332, dtype: int64\n",
|
|||
|
"\n",
|
|||
|
"Распределение функции родословной диабета в контрольной выборке:\n",
|
|||
|
"DiabetesPedigreeFunction\n",
|
|||
|
"0.37250 6\n",
|
|||
|
"0.14200 3\n",
|
|||
|
"0.37175 3\n",
|
|||
|
"0.25400 3\n",
|
|||
|
"0.28400 2\n",
|
|||
|
" ..\n",
|
|||
|
"0.55900 1\n",
|
|||
|
"0.69200 1\n",
|
|||
|
"0.42300 1\n",
|
|||
|
"0.69300 1\n",
|
|||
|
"0.68600 1\n",
|
|||
|
"Name: count, Length: 117, dtype: int64\n",
|
|||
|
"\n",
|
|||
|
"Распределение функции родословной диабета в тестовой выборке:\n",
|
|||
|
"DiabetesPedigreeFunction\n",
|
|||
|
"0.3725 7\n",
|
|||
|
"0.1480 2\n",
|
|||
|
"0.2070 2\n",
|
|||
|
"0.2590 2\n",
|
|||
|
"0.3700 2\n",
|
|||
|
" ..\n",
|
|||
|
"0.8400 1\n",
|
|||
|
"0.5250 1\n",
|
|||
|
"0.5360 1\n",
|
|||
|
"0.8550 1\n",
|
|||
|
"0.6990 1\n",
|
|||
|
"Name: count, Length: 134, dtype: int64\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from imblearn.over_sampling import RandomOverSampler\n",
|
|||
|
"from imblearn.under_sampling import RandomUnderSampler\n",
|
|||
|
"\n",
|
|||
|
"def binning(target, bins):\n",
|
|||
|
" return pd.qcut(target, q=bins, labels=False)\n",
|
|||
|
"\n",
|
|||
|
"train_df['gdppercent_binned'] = binning(train_df['DiabetesPedigreeFunction'], bins=2)\n",
|
|||
|
"val_df['gdppercent_binned'] = binning(val_df['DiabetesPedigreeFunction'], bins=2)\n",
|
|||
|
"test_df['gdppercent_binned'] = binning(test_df['DiabetesPedigreeFunction'], bins=2)\n",
|
|||
|
"\n",
|
|||
|
"def oversample(df, target_column):\n",
|
|||
|
" X = df.drop(target_column, axis=1)\n",
|
|||
|
" y = df[target_column]\n",
|
|||
|
" \n",
|
|||
|
" oversampler = RandomOverSampler(random_state=42)\n",
|
|||
|
" x_resampled, y_resampled = oversampler.fit_resample(X, y) # type: ignore\n",
|
|||
|
" \n",
|
|||
|
" resampled_df = pd.concat([x_resampled, y_resampled], axis=1) \n",
|
|||
|
" return resampled_df\n",
|
|||
|
"\n",
|
|||
|
"def undersample(df, target_column):\n",
|
|||
|
" X = df.drop(target_column, axis=1)\n",
|
|||
|
" y = df[target_column]\n",
|
|||
|
" \n",
|
|||
|
" undersampler = RandomUnderSampler(random_state=42)\n",
|
|||
|
" x_resampled, y_resampled = undersampler.fit_resample(X, y) # type: ignore\n",
|
|||
|
" \n",
|
|||
|
" resampled_df = pd.concat([x_resampled, y_resampled], axis=1)\n",
|
|||
|
" return resampled_df\n",
|
|||
|
"\n",
|
|||
|
"train_df_oversampled = oversample(train_df, 'gdppercent_binned')\n",
|
|||
|
"val_df_oversampled = oversample(val_df, 'gdppercent_binned')\n",
|
|||
|
"test_df_oversampled = oversample(test_df, 'gdppercent_binned')\n",
|
|||
|
"\n",
|
|||
|
"train_df_undersampled = undersample(train_df, 'gdppercent_binned')\n",
|
|||
|
"val_df_undersampled = undersample(val_df, 'gdppercent_binned')\n",
|
|||
|
"test_df_undersampled = undersample(test_df, 'gdppercent_binned')\n",
|
|||
|
"\n",
|
|||
|
"print(\"Оверсэмплинг:\")\n",
|
|||
|
"check_balance(train_df_oversampled, \"обучающей выборке\")\n",
|
|||
|
"check_balance(val_df_oversampled, \"контрольной выборке\")\n",
|
|||
|
"check_balance(test_df_oversampled, \"тестовой выборке\")\n",
|
|||
|
"\n",
|
|||
|
"print(\"Андерсэмплинг:\")\n",
|
|||
|
"check_balance(train_df_undersampled, \"обучающей выборке\")\n",
|
|||
|
"check_balance(val_df_undersampled, \"контрольной выборке\")\n",
|
|||
|
"check_balance(test_df_undersampled, \"тестовой выборке\")"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": ".venv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.6"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|