1550 lines
246 KiB
Plaintext
1550 lines
246 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Вариант 2. Показатели сердечных заболеваний"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 228,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from typing import Any\n",
|
||
"from math import ceil\n",
|
||
"\n",
|
||
"import pandas as pd\n",
|
||
"from pandas import DataFrame, Series\n",
|
||
"from sklearn.model_selection import train_test_split\n",
|
||
"from imblearn.over_sampling import ADASYN, SMOTE\n",
|
||
"from imblearn.under_sampling import RandomUnderSampler\n",
|
||
"import matplotlib.pyplot as plt"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Загрузим данные из датасета"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 229,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"df = pd.read_csv('csv\\\\heart_2022_no_nans.csv')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Посмотрим общие сведения о датасете"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 230,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"RangeIndex: 246022 entries, 0 to 246021\n",
|
||
"Data columns (total 40 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 State 246022 non-null object \n",
|
||
" 1 Sex 246022 non-null object \n",
|
||
" 2 GeneralHealth 246022 non-null object \n",
|
||
" 3 PhysicalHealthDays 246022 non-null float64\n",
|
||
" 4 MentalHealthDays 246022 non-null float64\n",
|
||
" 5 LastCheckupTime 246022 non-null object \n",
|
||
" 6 PhysicalActivities 246022 non-null object \n",
|
||
" 7 SleepHours 246022 non-null float64\n",
|
||
" 8 RemovedTeeth 246022 non-null object \n",
|
||
" 9 HadHeartAttack 246022 non-null object \n",
|
||
" 10 HadAngina 246022 non-null object \n",
|
||
" 11 HadStroke 246022 non-null object \n",
|
||
" 12 HadAsthma 246022 non-null object \n",
|
||
" 13 HadSkinCancer 246022 non-null object \n",
|
||
" 14 HadCOPD 246022 non-null object \n",
|
||
" 15 HadDepressiveDisorder 246022 non-null object \n",
|
||
" 16 HadKidneyDisease 246022 non-null object \n",
|
||
" 17 HadArthritis 246022 non-null object \n",
|
||
" 18 HadDiabetes 246022 non-null object \n",
|
||
" 19 DeafOrHardOfHearing 246022 non-null object \n",
|
||
" 20 BlindOrVisionDifficulty 246022 non-null object \n",
|
||
" 21 DifficultyConcentrating 246022 non-null object \n",
|
||
" 22 DifficultyWalking 246022 non-null object \n",
|
||
" 23 DifficultyDressingBathing 246022 non-null object \n",
|
||
" 24 DifficultyErrands 246022 non-null object \n",
|
||
" 25 SmokerStatus 246022 non-null object \n",
|
||
" 26 ECigaretteUsage 246022 non-null object \n",
|
||
" 27 ChestScan 246022 non-null object \n",
|
||
" 28 RaceEthnicityCategory 246022 non-null object \n",
|
||
" 29 AgeCategory 246022 non-null object \n",
|
||
" 30 HeightInMeters 246022 non-null float64\n",
|
||
" 31 WeightInKilograms 246022 non-null float64\n",
|
||
" 32 BMI 246022 non-null float64\n",
|
||
" 33 AlcoholDrinkers 246022 non-null object \n",
|
||
" 34 HIVTesting 246022 non-null object \n",
|
||
" 35 FluVaxLast12 246022 non-null object \n",
|
||
" 36 PneumoVaxEver 246022 non-null object \n",
|
||
" 37 TetanusLast10Tdap 246022 non-null object \n",
|
||
" 38 HighRiskLastYear 246022 non-null object \n",
|
||
" 39 CovidPos 246022 non-null object \n",
|
||
"dtypes: float64(6), object(34)\n",
|
||
"memory usage: 75.1+ MB\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>count</th>\n",
|
||
" <th>mean</th>\n",
|
||
" <th>std</th>\n",
|
||
" <th>min</th>\n",
|
||
" <th>25%</th>\n",
|
||
" <th>50%</th>\n",
|
||
" <th>75%</th>\n",
|
||
" <th>max</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>PhysicalHealthDays</th>\n",
|
||
" <td>246022.0</td>\n",
|
||
" <td>4.119026</td>\n",
|
||
" <td>8.405844</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>3.00</td>\n",
|
||
" <td>30.00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>MentalHealthDays</th>\n",
|
||
" <td>246022.0</td>\n",
|
||
" <td>4.167140</td>\n",
|
||
" <td>8.102687</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>4.00</td>\n",
|
||
" <td>30.00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>SleepHours</th>\n",
|
||
" <td>246022.0</td>\n",
|
||
" <td>7.021331</td>\n",
|
||
" <td>1.440681</td>\n",
|
||
" <td>1.00</td>\n",
|
||
" <td>6.00</td>\n",
|
||
" <td>7.00</td>\n",
|
||
" <td>8.00</td>\n",
|
||
" <td>24.00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>HeightInMeters</th>\n",
|
||
" <td>246022.0</td>\n",
|
||
" <td>1.705150</td>\n",
|
||
" <td>0.106654</td>\n",
|
||
" <td>0.91</td>\n",
|
||
" <td>1.63</td>\n",
|
||
" <td>1.70</td>\n",
|
||
" <td>1.78</td>\n",
|
||
" <td>2.41</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>WeightInKilograms</th>\n",
|
||
" <td>246022.0</td>\n",
|
||
" <td>83.615179</td>\n",
|
||
" <td>21.323156</td>\n",
|
||
" <td>28.12</td>\n",
|
||
" <td>68.04</td>\n",
|
||
" <td>81.65</td>\n",
|
||
" <td>95.25</td>\n",
|
||
" <td>292.57</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>BMI</th>\n",
|
||
" <td>246022.0</td>\n",
|
||
" <td>28.668136</td>\n",
|
||
" <td>6.513973</td>\n",
|
||
" <td>12.02</td>\n",
|
||
" <td>24.27</td>\n",
|
||
" <td>27.46</td>\n",
|
||
" <td>31.89</td>\n",
|
||
" <td>97.65</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" count mean std min 25% 50% \\\n",
|
||
"PhysicalHealthDays 246022.0 4.119026 8.405844 0.00 0.00 0.00 \n",
|
||
"MentalHealthDays 246022.0 4.167140 8.102687 0.00 0.00 0.00 \n",
|
||
"SleepHours 246022.0 7.021331 1.440681 1.00 6.00 7.00 \n",
|
||
"HeightInMeters 246022.0 1.705150 0.106654 0.91 1.63 1.70 \n",
|
||
"WeightInKilograms 246022.0 83.615179 21.323156 28.12 68.04 81.65 \n",
|
||
"BMI 246022.0 28.668136 6.513973 12.02 24.27 27.46 \n",
|
||
"\n",
|
||
" 75% max \n",
|
||
"PhysicalHealthDays 3.00 30.00 \n",
|
||
"MentalHealthDays 4.00 30.00 \n",
|
||
"SleepHours 8.00 24.00 \n",
|
||
"HeightInMeters 1.78 2.41 \n",
|
||
"WeightInKilograms 95.25 292.57 \n",
|
||
"BMI 31.89 97.65 "
|
||
]
|
||
},
|
||
"execution_count": 230,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.info()\n",
|
||
"df.describe().transpose()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Получим информацию о пустых значениях в колонках датасета"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 231,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def get_null_columns_info(df: DataFrame) -> DataFrame:\n",
|
||
" \"\"\"\n",
|
||
" Возвращает информацию о пропущенных значениях в колонках датасета\n",
|
||
" \"\"\"\n",
|
||
" w = []\n",
|
||
" df_len = len(df)\n",
|
||
"\n",
|
||
" for column in df.columns:\n",
|
||
" column_nulls = df[column].isnull()\n",
|
||
" w.append([column, column_nulls.any(), column_nulls.sum() / df_len])\n",
|
||
"\n",
|
||
" null_df = DataFrame(w).rename(columns={0: \"Column\", 1: \"Has Null\", 2: \"Null Percent\"})\n",
|
||
"\n",
|
||
" return null_df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 232,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Column</th>\n",
|
||
" <th>Has Null</th>\n",
|
||
" <th>Null Percent</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>State</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Sex</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>GeneralHealth</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>PhysicalHealthDays</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>MentalHealthDays</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>LastCheckupTime</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>PhysicalActivities</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>SleepHours</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>RemovedTeeth</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>HadHeartAttack</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>HadAngina</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>11</th>\n",
|
||
" <td>HadStroke</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>12</th>\n",
|
||
" <td>HadAsthma</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>13</th>\n",
|
||
" <td>HadSkinCancer</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>14</th>\n",
|
||
" <td>HadCOPD</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>15</th>\n",
|
||
" <td>HadDepressiveDisorder</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16</th>\n",
|
||
" <td>HadKidneyDisease</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>17</th>\n",
|
||
" <td>HadArthritis</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>18</th>\n",
|
||
" <td>HadDiabetes</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>19</th>\n",
|
||
" <td>DeafOrHardOfHearing</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>20</th>\n",
|
||
" <td>BlindOrVisionDifficulty</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>21</th>\n",
|
||
" <td>DifficultyConcentrating</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>22</th>\n",
|
||
" <td>DifficultyWalking</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>23</th>\n",
|
||
" <td>DifficultyDressingBathing</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>24</th>\n",
|
||
" <td>DifficultyErrands</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25</th>\n",
|
||
" <td>SmokerStatus</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>26</th>\n",
|
||
" <td>ECigaretteUsage</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>27</th>\n",
|
||
" <td>ChestScan</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>28</th>\n",
|
||
" <td>RaceEthnicityCategory</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>29</th>\n",
|
||
" <td>AgeCategory</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>30</th>\n",
|
||
" <td>HeightInMeters</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>31</th>\n",
|
||
" <td>WeightInKilograms</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>32</th>\n",
|
||
" <td>BMI</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33</th>\n",
|
||
" <td>AlcoholDrinkers</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>34</th>\n",
|
||
" <td>HIVTesting</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>35</th>\n",
|
||
" <td>FluVaxLast12</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>36</th>\n",
|
||
" <td>PneumoVaxEver</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>37</th>\n",
|
||
" <td>TetanusLast10Tdap</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>38</th>\n",
|
||
" <td>HighRiskLastYear</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>39</th>\n",
|
||
" <td>CovidPos</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Column Has Null Null Percent\n",
|
||
"0 State False 0.0\n",
|
||
"1 Sex False 0.0\n",
|
||
"2 GeneralHealth False 0.0\n",
|
||
"3 PhysicalHealthDays False 0.0\n",
|
||
"4 MentalHealthDays False 0.0\n",
|
||
"5 LastCheckupTime False 0.0\n",
|
||
"6 PhysicalActivities False 0.0\n",
|
||
"7 SleepHours False 0.0\n",
|
||
"8 RemovedTeeth False 0.0\n",
|
||
"9 HadHeartAttack False 0.0\n",
|
||
"10 HadAngina False 0.0\n",
|
||
"11 HadStroke False 0.0\n",
|
||
"12 HadAsthma False 0.0\n",
|
||
"13 HadSkinCancer False 0.0\n",
|
||
"14 HadCOPD False 0.0\n",
|
||
"15 HadDepressiveDisorder False 0.0\n",
|
||
"16 HadKidneyDisease False 0.0\n",
|
||
"17 HadArthritis False 0.0\n",
|
||
"18 HadDiabetes False 0.0\n",
|
||
"19 DeafOrHardOfHearing False 0.0\n",
|
||
"20 BlindOrVisionDifficulty False 0.0\n",
|
||
"21 DifficultyConcentrating False 0.0\n",
|
||
"22 DifficultyWalking False 0.0\n",
|
||
"23 DifficultyDressingBathing False 0.0\n",
|
||
"24 DifficultyErrands False 0.0\n",
|
||
"25 SmokerStatus False 0.0\n",
|
||
"26 ECigaretteUsage False 0.0\n",
|
||
"27 ChestScan False 0.0\n",
|
||
"28 RaceEthnicityCategory False 0.0\n",
|
||
"29 AgeCategory False 0.0\n",
|
||
"30 HeightInMeters False 0.0\n",
|
||
"31 WeightInKilograms False 0.0\n",
|
||
"32 BMI False 0.0\n",
|
||
"33 AlcoholDrinkers False 0.0\n",
|
||
"34 HIVTesting False 0.0\n",
|
||
"35 FluVaxLast12 False 0.0\n",
|
||
"36 PneumoVaxEver False 0.0\n",
|
||
"37 TetanusLast10Tdap False 0.0\n",
|
||
"38 HighRiskLastYear False 0.0\n",
|
||
"39 CovidPos False 0.0"
|
||
]
|
||
},
|
||
"execution_count": 232,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"get_null_columns_info(df)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Получим информацию о выбросах"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 233,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def get_numeric_columns(df: DataFrame) -> list[str]:\n",
|
||
" \"\"\"\n",
|
||
" Возвращает список числовых колонок\n",
|
||
" \"\"\"\n",
|
||
" return list(filter(lambda column: pd.api.types.is_numeric_dtype(df[column]), df.columns))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 234,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def get_outliers_info(df: DataFrame) -> DataFrame:\n",
|
||
" \"\"\"\n",
|
||
" Возаращает информацию о выбросах в числовых колонках датасета\n",
|
||
" \"\"\"\n",
|
||
" data = {\n",
|
||
" \"Column\": [],\n",
|
||
" \"Has Outliers\": [],\n",
|
||
" \"Outliers Count\": [],\n",
|
||
" \"Min Value\": [],\n",
|
||
" \"Max Value\": [],\n",
|
||
" \"Q1\": [],\n",
|
||
" \"Q3\": []\n",
|
||
" }\n",
|
||
"\n",
|
||
" info = DataFrame(data)\n",
|
||
"\n",
|
||
" for column in get_numeric_columns(df):\n",
|
||
" Q1: float = df[column].quantile(0.25)\n",
|
||
" Q3: float = df[column].quantile(0.75)\n",
|
||
" IQR: float = Q3 - Q1\n",
|
||
"\n",
|
||
" lower_bound: float = Q1 - 1.5 * IQR\n",
|
||
" upper_bound: float = Q3 + 1.5 * IQR\n",
|
||
"\n",
|
||
" outliers: DataFrame = df[(df[column] < lower_bound) | (df[column] > upper_bound)]\n",
|
||
" outlier_count: int = outliers.shape[0]\n",
|
||
"\n",
|
||
" info.loc[len(info)] = [column, outlier_count > 0, outlier_count, df[column].min(), df[column].max(), Q1, Q3]\n",
|
||
"\n",
|
||
" return info"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Посмотрим данные по выбросам"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 235,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Column</th>\n",
|
||
" <th>Has Outliers</th>\n",
|
||
" <th>Outliers Count</th>\n",
|
||
" <th>Min Value</th>\n",
|
||
" <th>Max Value</th>\n",
|
||
" <th>Q1</th>\n",
|
||
" <th>Q3</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>PhysicalHealthDays</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>38810</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>30.00</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>3.00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>MentalHealthDays</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>32714</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>30.00</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>4.00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>SleepHours</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>3488</td>\n",
|
||
" <td>1.00</td>\n",
|
||
" <td>24.00</td>\n",
|
||
" <td>6.00</td>\n",
|
||
" <td>8.00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>HeightInMeters</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>830</td>\n",
|
||
" <td>0.91</td>\n",
|
||
" <td>2.41</td>\n",
|
||
" <td>1.63</td>\n",
|
||
" <td>1.78</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>WeightInKilograms</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>5940</td>\n",
|
||
" <td>28.12</td>\n",
|
||
" <td>292.57</td>\n",
|
||
" <td>68.04</td>\n",
|
||
" <td>95.25</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>BMI</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>7563</td>\n",
|
||
" <td>12.02</td>\n",
|
||
" <td>97.65</td>\n",
|
||
" <td>24.27</td>\n",
|
||
" <td>31.89</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Column Has Outliers Outliers Count Min Value Max Value \\\n",
|
||
"0 PhysicalHealthDays True 38810 0.00 30.00 \n",
|
||
"1 MentalHealthDays True 32714 0.00 30.00 \n",
|
||
"2 SleepHours True 3488 1.00 24.00 \n",
|
||
"3 HeightInMeters True 830 0.91 2.41 \n",
|
||
"4 WeightInKilograms True 5940 28.12 292.57 \n",
|
||
"5 BMI True 7563 12.02 97.65 \n",
|
||
"\n",
|
||
" Q1 Q3 \n",
|
||
"0 0.00 3.00 \n",
|
||
"1 0.00 4.00 \n",
|
||
"2 6.00 8.00 \n",
|
||
"3 1.63 1.78 \n",
|
||
"4 68.04 95.25 \n",
|
||
"5 24.27 31.89 "
|
||
]
|
||
},
|
||
"execution_count": 235,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"outliers_info = get_outliers_info(df)\n",
|
||
"outliers_info"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 236,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def visualize_outliers(df: DataFrame) -> None:\n",
|
||
" \"\"\"\n",
|
||
" Генерирует диаграммы BoxPlot для числовых колонок датасета\n",
|
||
" \"\"\"\n",
|
||
" columns = get_numeric_columns(df)\n",
|
||
" plt.figure(figsize=(15, 10))\n",
|
||
" rows: int = ceil(len(columns) / 3)\n",
|
||
" for index, column in enumerate(columns, 1):\n",
|
||
" plt.subplot(rows, 3, index)\n",
|
||
" plt.boxplot(df[column], vert=True, patch_artist=True)\n",
|
||
" plt.title(f\"Диаграмма размахов\\n\\\"{column}\\\"\")\n",
|
||
" plt.xlabel(column)\n",
|
||
" \n",
|
||
" plt.tight_layout()\n",
|
||
" plt.show()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Визуализируем выбросы с помощью диаграмм"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 237,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 1500x1000 with 6 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"visualize_outliers(df)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 238,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def remove_outliers(df: DataFrame, columns: list[str]) -> DataFrame:\n",
|
||
" \"\"\"\n",
|
||
" Устраняет выбросы в заданных колонках:\n",
|
||
" задает значениям выше максимального значение максимума, ниже минимального - значение минимума\n",
|
||
" \"\"\"\n",
|
||
" for column in columns:\n",
|
||
" Q1: float = df[column].quantile(0.25)\n",
|
||
" Q3: float = df[column].quantile(0.75)\n",
|
||
" IQR: float = Q3 - Q1\n",
|
||
"\n",
|
||
" lower_bound: float = Q1 - 1.5 * IQR\n",
|
||
" upper_bound: float = Q3 + 1.5 * IQR\n",
|
||
"\n",
|
||
" df[column] = df[column].apply(lambda x: lower_bound if x < lower_bound else upper_bound if x > upper_bound else x)\n",
|
||
" \n",
|
||
" return df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Удаляем выбросы"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 239,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"outliers_columns = list(outliers_info[outliers_info[\"Has Outliers\"] == True][\"Column\"])\n",
|
||
"df = remove_outliers(df, outliers_columns)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Снова получим данные о выбросах"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 240,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Column</th>\n",
|
||
" <th>Has Outliers</th>\n",
|
||
" <th>Outliers Count</th>\n",
|
||
" <th>Min Value</th>\n",
|
||
" <th>Max Value</th>\n",
|
||
" <th>Q1</th>\n",
|
||
" <th>Q3</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>PhysicalHealthDays</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.000</td>\n",
|
||
" <td>7.500</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>3.00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>MentalHealthDays</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.000</td>\n",
|
||
" <td>10.000</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>4.00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>SleepHours</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>3.000</td>\n",
|
||
" <td>11.000</td>\n",
|
||
" <td>6.00</td>\n",
|
||
" <td>8.00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>HeightInMeters</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1.405</td>\n",
|
||
" <td>2.005</td>\n",
|
||
" <td>1.63</td>\n",
|
||
" <td>1.78</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>WeightInKilograms</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>28.120</td>\n",
|
||
" <td>136.065</td>\n",
|
||
" <td>68.04</td>\n",
|
||
" <td>95.25</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>BMI</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>12.840</td>\n",
|
||
" <td>43.320</td>\n",
|
||
" <td>24.27</td>\n",
|
||
" <td>31.89</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Column Has Outliers Outliers Count Min Value Max Value \\\n",
|
||
"0 PhysicalHealthDays False 0 0.000 7.500 \n",
|
||
"1 MentalHealthDays False 0 0.000 10.000 \n",
|
||
"2 SleepHours False 0 3.000 11.000 \n",
|
||
"3 HeightInMeters False 0 1.405 2.005 \n",
|
||
"4 WeightInKilograms False 0 28.120 136.065 \n",
|
||
"5 BMI False 0 12.840 43.320 \n",
|
||
"\n",
|
||
" Q1 Q3 \n",
|
||
"0 0.00 3.00 \n",
|
||
"1 0.00 4.00 \n",
|
||
"2 6.00 8.00 \n",
|
||
"3 1.63 1.78 \n",
|
||
"4 68.04 95.25 \n",
|
||
"5 24.27 31.89 "
|
||
]
|
||
},
|
||
"execution_count": 240,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"get_outliers_info(df)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Видим, что выбросов не осталось - проверим через диаграммы"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 241,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 1500x1000 with 6 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"visualize_outliers(df)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Нормализация числовых признаков"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 242,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn import preprocessing"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 247,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>count</th>\n",
|
||
" <th>mean</th>\n",
|
||
" <th>std</th>\n",
|
||
" <th>min</th>\n",
|
||
" <th>25%</th>\n",
|
||
" <th>50%</th>\n",
|
||
" <th>75%</th>\n",
|
||
" <th>max</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>PhysicalHealthDaysNorm</th>\n",
|
||
" <td>246022.0</td>\n",
|
||
" <td>0.253306</td>\n",
|
||
" <td>0.385378</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.400000</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>MentalHealthDaysNorm</th>\n",
|
||
" <td>246022.0</td>\n",
|
||
" <td>0.244973</td>\n",
|
||
" <td>0.378598</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.400000</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>SleepHoursNorm</th>\n",
|
||
" <td>246022.0</td>\n",
|
||
" <td>0.501124</td>\n",
|
||
" <td>0.165569</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.375000</td>\n",
|
||
" <td>0.500000</td>\n",
|
||
" <td>0.625000</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>HeightInMetersNorm</th>\n",
|
||
" <td>246022.0</td>\n",
|
||
" <td>0.500401</td>\n",
|
||
" <td>0.176240</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.375000</td>\n",
|
||
" <td>0.491667</td>\n",
|
||
" <td>0.625000</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>WeightInKilogramsNorm</th>\n",
|
||
" <td>246022.0</td>\n",
|
||
" <td>0.510963</td>\n",
|
||
" <td>0.186742</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.369818</td>\n",
|
||
" <td>0.495901</td>\n",
|
||
" <td>0.621891</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>BMINorm</th>\n",
|
||
" <td>246022.0</td>\n",
|
||
" <td>0.513599</td>\n",
|
||
" <td>0.194556</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.375000</td>\n",
|
||
" <td>0.479659</td>\n",
|
||
" <td>0.625000</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" count mean std min 25% 50% \\\n",
|
||
"PhysicalHealthDaysNorm 246022.0 0.253306 0.385378 0.0 0.000000 0.000000 \n",
|
||
"MentalHealthDaysNorm 246022.0 0.244973 0.378598 0.0 0.000000 0.000000 \n",
|
||
"SleepHoursNorm 246022.0 0.501124 0.165569 0.0 0.375000 0.500000 \n",
|
||
"HeightInMetersNorm 246022.0 0.500401 0.176240 0.0 0.375000 0.491667 \n",
|
||
"WeightInKilogramsNorm 246022.0 0.510963 0.186742 0.0 0.369818 0.495901 \n",
|
||
"BMINorm 246022.0 0.513599 0.194556 0.0 0.375000 0.479659 \n",
|
||
"\n",
|
||
" 75% max \n",
|
||
"PhysicalHealthDaysNorm 0.400000 1.0 \n",
|
||
"MentalHealthDaysNorm 0.400000 1.0 \n",
|
||
"SleepHoursNorm 0.625000 1.0 \n",
|
||
"HeightInMetersNorm 0.625000 1.0 \n",
|
||
"WeightInKilogramsNorm 0.621891 1.0 \n",
|
||
"BMINorm 0.625000 1.0 "
|
||
]
|
||
},
|
||
"execution_count": 247,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"min_max_scaler = preprocessing.MinMaxScaler()\n",
|
||
"\n",
|
||
"df_norm = df.copy()\n",
|
||
"\n",
|
||
"numeric_columns = get_numeric_columns(df)\n",
|
||
"\n",
|
||
"for column in numeric_columns:\n",
|
||
" norm_column = column + \"Norm\"\n",
|
||
" df_norm[norm_column] = min_max_scaler.fit_transform(\n",
|
||
" df_norm[column].to_numpy().reshape(-1, 1)\n",
|
||
" ).reshape(df_norm[column].shape)\n",
|
||
"\n",
|
||
"df_norm = df_norm.drop(columns=numeric_columns)\n",
|
||
"\n",
|
||
"\n",
|
||
"df_norm.describe().transpose()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Конструирование признаков"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Автоматическое конструирование признаков с помощью фреймворка FeatureTools"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 221,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import featuretools as ft"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 222,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
||
" pd.to_datetime(\n",
|
||
"d:\\code\\AIM-PIbd-31-Potapov-N-S\\lab_3\\.venv\\Lib\\site-packages\\featuretools\\synthesis\\deep_feature_synthesis.py:169: UserWarning: Only one dataframe in entityset, changing max_depth to 1 since deeper features cannot be created\n",
|
||
" warnings.warn(\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Преобразуем датасет с помощью фремйворка\n",
|
||
"# https://featuretools.alteryx.com/en/stable/getting_started/afe.html\n",
|
||
"\n",
|
||
"entity_set = ft.EntitySet().add_dataframe(df, \"df\", make_index=True, index=\"id\")\n",
|
||
"\n",
|
||
"feature_matrix, feature_defs = ft.dfs(\n",
|
||
" entityset=entity_set,\n",
|
||
" target_dataframe_name=\"df\",\n",
|
||
" max_depth=2\n",
|
||
")\n",
|
||
"\n",
|
||
"feature_matrix: DataFrame\n",
|
||
"feature_defs: list[ft.Feature]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Выполняем категориальное и унитарное кодирование признаков с помощью FeatureTools"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 224,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Было признаков: 40\n",
|
||
"Стало признаков: 99\n",
|
||
"<Feature: State = Washington>\n",
|
||
"<Feature: State = Maryland>\n",
|
||
"<Feature: State = Minnesota>\n",
|
||
"<Feature: State = Ohio>\n",
|
||
"<Feature: State = New York>\n",
|
||
"<Feature: State = Texas>\n",
|
||
"<Feature: State = Florida>\n",
|
||
"<Feature: State = Kansas>\n",
|
||
"<Feature: State = Wisconsin>\n",
|
||
"<Feature: State = Maine>\n",
|
||
"<Feature: State is unknown>\n",
|
||
"<Feature: Sex = Female>\n",
|
||
"<Feature: Sex = Male>\n",
|
||
"<Feature: Sex is unknown>\n",
|
||
"<Feature: GeneralHealth = Very good>\n",
|
||
"<Feature: GeneralHealth = Good>\n",
|
||
"<Feature: GeneralHealth = Excellent>\n",
|
||
"<Feature: GeneralHealth = Fair>\n",
|
||
"<Feature: GeneralHealth = Poor>\n",
|
||
"<Feature: GeneralHealth is unknown>\n",
|
||
"<Feature: PhysicalHealthDays>\n",
|
||
"<Feature: MentalHealthDays>\n",
|
||
"<Feature: LastCheckupTime = Within past year (anytime less than 12 months ago)>\n",
|
||
"<Feature: LastCheckupTime = Within past 2 years (1 year but less than 2 years ago)>\n",
|
||
"<Feature: LastCheckupTime = Within past 5 years (2 years but less than 5 years ago)>\n",
|
||
"<Feature: LastCheckupTime = 5 or more years ago>\n",
|
||
"<Feature: LastCheckupTime is unknown>\n",
|
||
"<Feature: PhysicalActivities>\n",
|
||
"<Feature: SleepHours>\n",
|
||
"<Feature: RemovedTeeth = None of them>\n",
|
||
"<Feature: RemovedTeeth = 1 to 5>\n",
|
||
"<Feature: RemovedTeeth = 6 or more, but not all>\n",
|
||
"<Feature: RemovedTeeth = All>\n",
|
||
"<Feature: RemovedTeeth is unknown>\n",
|
||
"<Feature: HadHeartAttack>\n",
|
||
"<Feature: HadAngina>\n",
|
||
"<Feature: HadStroke>\n",
|
||
"<Feature: HadAsthma>\n",
|
||
"<Feature: HadSkinCancer>\n",
|
||
"<Feature: HadCOPD>\n",
|
||
"<Feature: HadDepressiveDisorder>\n",
|
||
"<Feature: HadKidneyDisease>\n",
|
||
"<Feature: HadArthritis>\n",
|
||
"<Feature: HadDiabetes = No>\n",
|
||
"<Feature: HadDiabetes = Yes>\n",
|
||
"<Feature: HadDiabetes = No, pre-diabetes or borderline diabetes>\n",
|
||
"<Feature: HadDiabetes = Yes, but only during pregnancy (female)>\n",
|
||
"<Feature: HadDiabetes is unknown>\n",
|
||
"<Feature: DeafOrHardOfHearing>\n",
|
||
"<Feature: BlindOrVisionDifficulty>\n",
|
||
"<Feature: DifficultyConcentrating>\n",
|
||
"<Feature: DifficultyWalking>\n",
|
||
"<Feature: DifficultyDressingBathing>\n",
|
||
"<Feature: DifficultyErrands>\n",
|
||
"<Feature: SmokerStatus = Never smoked>\n",
|
||
"<Feature: SmokerStatus = Former smoker>\n",
|
||
"<Feature: SmokerStatus = Current smoker - now smokes every day>\n",
|
||
"<Feature: SmokerStatus = Current smoker - now smokes some days>\n",
|
||
"<Feature: SmokerStatus is unknown>\n",
|
||
"<Feature: ECigaretteUsage = Never used e-cigarettes in my entire life>\n",
|
||
"<Feature: ECigaretteUsage = Not at all (right now)>\n",
|
||
"<Feature: ECigaretteUsage = Use them some days>\n",
|
||
"<Feature: ECigaretteUsage = Use them every day>\n",
|
||
"<Feature: ECigaretteUsage is unknown>\n",
|
||
"<Feature: ChestScan>\n",
|
||
"<Feature: RaceEthnicityCategory = White only, Non-Hispanic>\n",
|
||
"<Feature: RaceEthnicityCategory = Hispanic>\n",
|
||
"<Feature: RaceEthnicityCategory = Black only, Non-Hispanic>\n",
|
||
"<Feature: RaceEthnicityCategory = Other race only, Non-Hispanic>\n",
|
||
"<Feature: RaceEthnicityCategory = Multiracial, Non-Hispanic>\n",
|
||
"<Feature: RaceEthnicityCategory is unknown>\n",
|
||
"<Feature: AgeCategory = Age 65 to 69>\n",
|
||
"<Feature: AgeCategory = Age 60 to 64>\n",
|
||
"<Feature: AgeCategory = Age 70 to 74>\n",
|
||
"<Feature: AgeCategory = Age 55 to 59>\n",
|
||
"<Feature: AgeCategory = Age 50 to 54>\n",
|
||
"<Feature: AgeCategory = Age 75 to 79>\n",
|
||
"<Feature: AgeCategory = Age 80 or older>\n",
|
||
"<Feature: AgeCategory = Age 40 to 44>\n",
|
||
"<Feature: AgeCategory = Age 45 to 49>\n",
|
||
"<Feature: AgeCategory = Age 35 to 39>\n",
|
||
"<Feature: AgeCategory is unknown>\n",
|
||
"<Feature: HeightInMeters>\n",
|
||
"<Feature: WeightInKilograms>\n",
|
||
"<Feature: BMI>\n",
|
||
"<Feature: AlcoholDrinkers>\n",
|
||
"<Feature: HIVTesting>\n",
|
||
"<Feature: FluVaxLast12>\n",
|
||
"<Feature: PneumoVaxEver>\n",
|
||
"<Feature: TetanusLast10Tdap = No, did not receive any tetanus shot in the past 10 years>\n",
|
||
"<Feature: TetanusLast10Tdap = Yes, received tetanus shot but not sure what type>\n",
|
||
"<Feature: TetanusLast10Tdap = Yes, received Tdap>\n",
|
||
"<Feature: TetanusLast10Tdap = Yes, received tetanus shot, but not Tdap>\n",
|
||
"<Feature: TetanusLast10Tdap is unknown>\n",
|
||
"<Feature: HighRiskLastYear>\n",
|
||
"<Feature: CovidPos = No>\n",
|
||
"<Feature: CovidPos = Yes>\n",
|
||
"<Feature: CovidPos = Tested positive using home test without a health professional>\n",
|
||
"<Feature: CovidPos is unknown>\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Сгенерируем новые признаки\n",
|
||
"# https://featuretools.alteryx.com/en/stable/guides/tuning_dfs.html\n",
|
||
"\n",
|
||
"feature_matrix_enc, features_enc = ft.encode_features(feature_matrix, feature_defs)\n",
|
||
"feature_matrix_enc.to_csv(\"./csv/generated_features.csv\", index=False)\n",
|
||
"\n",
|
||
"print(\"Было признаков:\", len(feature_defs))\n",
|
||
"print(\"Стало признаков:\", len(features_enc))\n",
|
||
"print(*features_enc, sep='\\n')"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": ".venv",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|