MAI_ISE-31_Andrikhov-A-S/lab1.ipynb
2024-10-19 13:14:28 +04:00

4555 lines
470 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. Основные возможности работы с библиотекой pandas"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Загрузка и сохранение данных"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(\"./datasets/var2/2022/heart_2022_no_nans.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>State</th>\n",
" <th>Sex</th>\n",
" <th>GeneralHealth</th>\n",
" <th>PhysicalHealthDays</th>\n",
" <th>MentalHealthDays</th>\n",
" <th>LastCheckupTime</th>\n",
" <th>PhysicalActivities</th>\n",
" <th>SleepHours</th>\n",
" <th>RemovedTeeth</th>\n",
" <th>HadHeartAttack</th>\n",
" <th>...</th>\n",
" <th>WeightInKilograms</th>\n",
" <th>BMI</th>\n",
" <th>AlcoholDrinkers</th>\n",
" <th>HIVTesting</th>\n",
" <th>FluVaxLast12</th>\n",
" <th>PneumoVaxEver</th>\n",
" <th>TetanusLast10Tdap</th>\n",
" <th>HighRiskLastYear</th>\n",
" <th>CovidPos</th>\n",
" <th>SleepHours-HeightInMeters</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>246012</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Fair</td>\n",
" <td>7.0</td>\n",
" <td>30.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>4.0</td>\n",
" <td>None of them</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>117.93</td>\n",
" <td>33.38</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>2.12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246013</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Excellent</td>\n",
" <td>0.0</td>\n",
" <td>7.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>4.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>49.90</td>\n",
" <td>18.30</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>2.35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246014</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Female</td>\n",
" <td>Good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>12.0</td>\n",
" <td>1 to 5</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>52.16</td>\n",
" <td>19.14</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes, received Tdap</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>10.35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246015</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Female</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>1 to 5</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>77.11</td>\n",
" <td>28.29</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>5.35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246016</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>6.0</td>\n",
" <td>1 to 5</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>118.84</td>\n",
" <td>36.54</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>4.20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246017</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past 2 years (1 year but less than 2 ye...</td>\n",
" <td>Yes</td>\n",
" <td>6.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>102.06</td>\n",
" <td>32.28</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>4.22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246018</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Female</td>\n",
" <td>Fair</td>\n",
" <td>0.0</td>\n",
" <td>7.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>90.72</td>\n",
" <td>24.34</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>5.07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246019</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Good</td>\n",
" <td>0.0</td>\n",
" <td>15.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>1 to 5</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>83.91</td>\n",
" <td>29.86</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>5.32</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246020</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Female</td>\n",
" <td>Excellent</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>83.01</td>\n",
" <td>28.66</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>5.30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246021</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>5.0</td>\n",
" <td>None of them</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>108.86</td>\n",
" <td>32.55</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>3.17</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" State Sex GeneralHealth PhysicalHealthDays \\\n",
"246012 Virgin Islands Male Fair 7.0 \n",
"246013 Virgin Islands Male Excellent 0.0 \n",
"246014 Virgin Islands Female Good 0.0 \n",
"246015 Virgin Islands Female Very good 0.0 \n",
"246016 Virgin Islands Male Good 0.0 \n",
"246017 Virgin Islands Male Very good 0.0 \n",
"246018 Virgin Islands Female Fair 0.0 \n",
"246019 Virgin Islands Male Good 0.0 \n",
"246020 Virgin Islands Female Excellent 2.0 \n",
"246021 Virgin Islands Male Very good 0.0 \n",
"\n",
" MentalHealthDays LastCheckupTime \\\n",
"246012 30.0 Within past year (anytime less than 12 months ... \n",
"246013 7.0 Within past year (anytime less than 12 months ... \n",
"246014 0.0 Within past year (anytime less than 12 months ... \n",
"246015 0.0 Within past year (anytime less than 12 months ... \n",
"246016 0.0 Within past year (anytime less than 12 months ... \n",
"246017 0.0 Within past 2 years (1 year but less than 2 ye... \n",
"246018 7.0 Within past year (anytime less than 12 months ... \n",
"246019 15.0 Within past year (anytime less than 12 months ... \n",
"246020 2.0 Within past year (anytime less than 12 months ... \n",
"246021 0.0 Within past year (anytime less than 12 months ... \n",
"\n",
" PhysicalActivities SleepHours RemovedTeeth HadHeartAttack ... \\\n",
"246012 No 4.0 None of them Yes ... \n",
"246013 No 4.0 None of them No ... \n",
"246014 Yes 12.0 1 to 5 No ... \n",
"246015 Yes 7.0 1 to 5 No ... \n",
"246016 No 6.0 1 to 5 Yes ... \n",
"246017 Yes 6.0 None of them No ... \n",
"246018 Yes 7.0 None of them No ... \n",
"246019 Yes 7.0 1 to 5 No ... \n",
"246020 Yes 7.0 None of them No ... \n",
"246021 No 5.0 None of them Yes ... \n",
"\n",
" WeightInKilograms BMI AlcoholDrinkers HIVTesting FluVaxLast12 \\\n",
"246012 117.93 33.38 Yes Yes No \n",
"246013 49.90 18.30 Yes No No \n",
"246014 52.16 19.14 No No No \n",
"246015 77.11 28.29 Yes Yes No \n",
"246016 118.84 36.54 Yes Yes Yes \n",
"246017 102.06 32.28 Yes No No \n",
"246018 90.72 24.34 No No No \n",
"246019 83.91 29.86 Yes Yes Yes \n",
"246020 83.01 28.66 No Yes Yes \n",
"246021 108.86 32.55 No Yes Yes \n",
"\n",
" PneumoVaxEver TetanusLast10Tdap \\\n",
"246012 No No, did not receive any tetanus shot in the pa... \n",
"246013 No No, did not receive any tetanus shot in the pa... \n",
"246014 Yes Yes, received Tdap \n",
"246015 No No, did not receive any tetanus shot in the pa... \n",
"246016 No Yes, received tetanus shot but not sure what type \n",
"246017 No Yes, received tetanus shot but not sure what type \n",
"246018 No No, did not receive any tetanus shot in the pa... \n",
"246019 Yes Yes, received tetanus shot but not sure what type \n",
"246020 No Yes, received tetanus shot but not sure what type \n",
"246021 Yes No, did not receive any tetanus shot in the pa... \n",
"\n",
" HighRiskLastYear CovidPos SleepHours-HeightInMeters \n",
"246012 No Yes 2.12 \n",
"246013 No No 2.35 \n",
"246014 No No 10.35 \n",
"246015 No No 5.35 \n",
"246016 No No 4.20 \n",
"246017 No No 4.22 \n",
"246018 No Yes 5.07 \n",
"246019 No Yes 5.32 \n",
"246020 No No 5.30 \n",
"246021 No Yes 3.17 \n",
"\n",
"[10 rows x 41 columns]"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail(10)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>State</th>\n",
" <th>Sex</th>\n",
" <th>GeneralHealth</th>\n",
" <th>PhysicalHealthDays</th>\n",
" <th>MentalHealthDays</th>\n",
" <th>LastCheckupTime</th>\n",
" <th>PhysicalActivities</th>\n",
" <th>SleepHours</th>\n",
" <th>RemovedTeeth</th>\n",
" <th>HadHeartAttack</th>\n",
" <th>...</th>\n",
" <th>WeightInKilograms</th>\n",
" <th>BMI</th>\n",
" <th>AlcoholDrinkers</th>\n",
" <th>HIVTesting</th>\n",
" <th>FluVaxLast12</th>\n",
" <th>PneumoVaxEver</th>\n",
" <th>TetanusLast10Tdap</th>\n",
" <th>HighRiskLastYear</th>\n",
" <th>CovidPos</th>\n",
" <th>SleepHours-HeightInMeters</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Very good</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>9.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>71.67</td>\n",
" <td>27.99</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes, received Tdap</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>7.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>6.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>95.25</td>\n",
" <td>30.13</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>4.22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>8.0</td>\n",
" <td>6 or more, but not all</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>108.86</td>\n",
" <td>31.66</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>6.15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Fair</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>9.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>90.72</td>\n",
" <td>31.32</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>7.30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Good</td>\n",
" <td>3.0</td>\n",
" <td>15.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>5.0</td>\n",
" <td>1 to 5</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>79.38</td>\n",
" <td>33.07</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>3.45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>120.20</td>\n",
" <td>34.96</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>5.15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Good</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>8.0</td>\n",
" <td>6 or more, but not all</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>88.00</td>\n",
" <td>33.30</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>6.37</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Fair</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>8.0</td>\n",
" <td>1 to 5</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>74.84</td>\n",
" <td>24.37</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>6.25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Good</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>5 or more years ago</td>\n",
" <td>No</td>\n",
" <td>6.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>78.02</td>\n",
" <td>26.94</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>4.30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>63.50</td>\n",
" <td>22.60</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>5.32</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" State Sex GeneralHealth PhysicalHealthDays MentalHealthDays \\\n",
"0 Alabama Female Very good 4.0 0.0 \n",
"1 Alabama Male Very good 0.0 0.0 \n",
"2 Alabama Male Very good 0.0 0.0 \n",
"3 Alabama Female Fair 5.0 0.0 \n",
"4 Alabama Female Good 3.0 15.0 \n",
"5 Alabama Male Good 0.0 0.0 \n",
"6 Alabama Female Good 3.0 0.0 \n",
"7 Alabama Male Fair 5.0 0.0 \n",
"8 Alabama Male Good 2.0 0.0 \n",
"9 Alabama Female Very good 0.0 0.0 \n",
"\n",
" LastCheckupTime PhysicalActivities \\\n",
"0 Within past year (anytime less than 12 months ... Yes \n",
"1 Within past year (anytime less than 12 months ... Yes \n",
"2 Within past year (anytime less than 12 months ... No \n",
"3 Within past year (anytime less than 12 months ... Yes \n",
"4 Within past year (anytime less than 12 months ... Yes \n",
"5 Within past year (anytime less than 12 months ... Yes \n",
"6 Within past year (anytime less than 12 months ... Yes \n",
"7 Within past year (anytime less than 12 months ... Yes \n",
"8 5 or more years ago No \n",
"9 Within past year (anytime less than 12 months ... Yes \n",
"\n",
" SleepHours RemovedTeeth HadHeartAttack ... WeightInKilograms \\\n",
"0 9.0 None of them No ... 71.67 \n",
"1 6.0 None of them No ... 95.25 \n",
"2 8.0 6 or more, but not all No ... 108.86 \n",
"3 9.0 None of them No ... 90.72 \n",
"4 5.0 1 to 5 No ... 79.38 \n",
"5 7.0 None of them No ... 120.20 \n",
"6 8.0 6 or more, but not all No ... 88.00 \n",
"7 8.0 1 to 5 Yes ... 74.84 \n",
"8 6.0 None of them No ... 78.02 \n",
"9 7.0 None of them No ... 63.50 \n",
"\n",
" BMI AlcoholDrinkers HIVTesting FluVaxLast12 PneumoVaxEver \\\n",
"0 27.99 No No Yes Yes \n",
"1 30.13 No No Yes Yes \n",
"2 31.66 Yes No No Yes \n",
"3 31.32 No No Yes Yes \n",
"4 33.07 No No Yes Yes \n",
"5 34.96 Yes Yes Yes No \n",
"6 33.30 No No Yes Yes \n",
"7 24.37 No Yes Yes Yes \n",
"8 26.94 No No No No \n",
"9 22.60 No No Yes Yes \n",
"\n",
" TetanusLast10Tdap HighRiskLastYear \\\n",
"0 Yes, received Tdap No \n",
"1 Yes, received tetanus shot but not sure what type No \n",
"2 No, did not receive any tetanus shot in the pa... No \n",
"3 No, did not receive any tetanus shot in the pa... No \n",
"4 No, did not receive any tetanus shot in the pa... No \n",
"5 Yes, received tetanus shot but not sure what type No \n",
"6 No, did not receive any tetanus shot in the pa... No \n",
"7 No, did not receive any tetanus shot in the pa... No \n",
"8 No, did not receive any tetanus shot in the pa... No \n",
"9 No, did not receive any tetanus shot in the pa... No \n",
"\n",
" CovidPos SleepHours-HeightInMeters \n",
"0 No 7.40 \n",
"1 No 4.22 \n",
"2 Yes 6.15 \n",
"3 Yes 7.30 \n",
"4 No 3.45 \n",
"5 No 5.15 \n",
"6 No 6.37 \n",
"7 Yes 6.25 \n",
"8 Yes 4.30 \n",
"9 No 5.32 \n",
"\n",
"[10 rows x 41 columns]"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(\"new.csv\", index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Получение сведений о датафрейме с данными¶"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PhysicalHealthDays</th>\n",
" <th>MentalHealthDays</th>\n",
" <th>SleepHours</th>\n",
" <th>HeightInMeters</th>\n",
" <th>WeightInKilograms</th>\n",
" <th>BMI</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>246022.000000</td>\n",
" <td>246022.000000</td>\n",
" <td>246022.000000</td>\n",
" <td>246022.000000</td>\n",
" <td>246022.000000</td>\n",
" <td>246022.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>4.119026</td>\n",
" <td>4.167140</td>\n",
" <td>7.021331</td>\n",
" <td>1.705150</td>\n",
" <td>83.615179</td>\n",
" <td>28.668136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>8.405844</td>\n",
" <td>8.102687</td>\n",
" <td>1.440681</td>\n",
" <td>0.106654</td>\n",
" <td>21.323156</td>\n",
" <td>6.513973</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.910000</td>\n",
" <td>28.120000</td>\n",
" <td>12.020000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>6.000000</td>\n",
" <td>1.630000</td>\n",
" <td>68.040000</td>\n",
" <td>24.270000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>7.000000</td>\n",
" <td>1.700000</td>\n",
" <td>81.650000</td>\n",
" <td>27.460000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>3.000000</td>\n",
" <td>4.000000</td>\n",
" <td>8.000000</td>\n",
" <td>1.780000</td>\n",
" <td>95.250000</td>\n",
" <td>31.890000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>30.000000</td>\n",
" <td>30.000000</td>\n",
" <td>24.000000</td>\n",
" <td>2.410000</td>\n",
" <td>292.570000</td>\n",
" <td>97.650000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PhysicalHealthDays MentalHealthDays SleepHours HeightInMeters \\\n",
"count 246022.000000 246022.000000 246022.000000 246022.000000 \n",
"mean 4.119026 4.167140 7.021331 1.705150 \n",
"std 8.405844 8.102687 1.440681 0.106654 \n",
"min 0.000000 0.000000 1.000000 0.910000 \n",
"25% 0.000000 0.000000 6.000000 1.630000 \n",
"50% 0.000000 0.000000 7.000000 1.700000 \n",
"75% 3.000000 4.000000 8.000000 1.780000 \n",
"max 30.000000 30.000000 24.000000 2.410000 \n",
"\n",
" WeightInKilograms BMI \n",
"count 246022.000000 246022.000000 \n",
"mean 83.615179 28.668136 \n",
"std 21.323156 6.513973 \n",
"min 28.120000 12.020000 \n",
"25% 68.040000 24.270000 \n",
"50% 81.650000 27.460000 \n",
"75% 95.250000 31.890000 \n",
"max 292.570000 97.650000 "
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 246022 entries, 0 to 246021\n",
"Data columns (total 40 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 State 246022 non-null object \n",
" 1 Sex 246022 non-null object \n",
" 2 GeneralHealth 246022 non-null object \n",
" 3 PhysicalHealthDays 246022 non-null float64\n",
" 4 MentalHealthDays 246022 non-null float64\n",
" 5 LastCheckupTime 246022 non-null object \n",
" 6 PhysicalActivities 246022 non-null object \n",
" 7 SleepHours 246022 non-null float64\n",
" 8 RemovedTeeth 246022 non-null object \n",
" 9 HadHeartAttack 246022 non-null object \n",
" 10 HadAngina 246022 non-null object \n",
" 11 HadStroke 246022 non-null object \n",
" 12 HadAsthma 246022 non-null object \n",
" 13 HadSkinCancer 246022 non-null object \n",
" 14 HadCOPD 246022 non-null object \n",
" 15 HadDepressiveDisorder 246022 non-null object \n",
" 16 HadKidneyDisease 246022 non-null object \n",
" 17 HadArthritis 246022 non-null object \n",
" 18 HadDiabetes 246022 non-null object \n",
" 19 DeafOrHardOfHearing 246022 non-null object \n",
" 20 BlindOrVisionDifficulty 246022 non-null object \n",
" 21 DifficultyConcentrating 246022 non-null object \n",
" 22 DifficultyWalking 246022 non-null object \n",
" 23 DifficultyDressingBathing 246022 non-null object \n",
" 24 DifficultyErrands 246022 non-null object \n",
" 25 SmokerStatus 246022 non-null object \n",
" 26 ECigaretteUsage 246022 non-null object \n",
" 27 ChestScan 246022 non-null object \n",
" 28 RaceEthnicityCategory 246022 non-null object \n",
" 29 AgeCategory 246022 non-null object \n",
" 30 HeightInMeters 246022 non-null float64\n",
" 31 WeightInKilograms 246022 non-null float64\n",
" 32 BMI 246022 non-null float64\n",
" 33 AlcoholDrinkers 246022 non-null object \n",
" 34 HIVTesting 246022 non-null object \n",
" 35 FluVaxLast12 246022 non-null object \n",
" 36 PneumoVaxEver 246022 non-null object \n",
" 37 TetanusLast10Tdap 246022 non-null object \n",
" 38 HighRiskLastYear 246022 non-null object \n",
" 39 CovidPos 246022 non-null object \n",
"dtypes: float64(6), object(34)\n",
"memory usage: 75.1+ MB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Получение сведений о колонках датафрейма¶"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['State', 'Sex', 'GeneralHealth', 'PhysicalHealthDays',\n",
" 'MentalHealthDays', 'LastCheckupTime', 'PhysicalActivities',\n",
" 'SleepHours', 'RemovedTeeth', 'HadHeartAttack', 'HadAngina',\n",
" 'HadStroke', 'HadAsthma', 'HadSkinCancer', 'HadCOPD',\n",
" 'HadDepressiveDisorder', 'HadKidneyDisease', 'HadArthritis',\n",
" 'HadDiabetes', 'DeafOrHardOfHearing', 'BlindOrVisionDifficulty',\n",
" 'DifficultyConcentrating', 'DifficultyWalking',\n",
" 'DifficultyDressingBathing', 'DifficultyErrands', 'SmokerStatus',\n",
" 'ECigaretteUsage', 'ChestScan', 'RaceEthnicityCategory', 'AgeCategory',\n",
" 'HeightInMeters', 'WeightInKilograms', 'BMI', 'AlcoholDrinkers',\n",
" 'HIVTesting', 'FluVaxLast12', 'PneumoVaxEver', 'TetanusLast10Tdap',\n",
" 'HighRiskLastYear', 'CovidPos'],\n",
" dtype='object')"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вывод отельных строки и столбцов из датафрейма"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sex</th>\n",
" <th>HadHeartAttack</th>\n",
" <th>WeightInKilograms</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Female</td>\n",
" <td>No</td>\n",
" <td>71.67</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Male</td>\n",
" <td>No</td>\n",
" <td>95.25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Male</td>\n",
" <td>No</td>\n",
" <td>108.86</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Female</td>\n",
" <td>No</td>\n",
" <td>90.72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Female</td>\n",
" <td>No</td>\n",
" <td>79.38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246017</th>\n",
" <td>Male</td>\n",
" <td>No</td>\n",
" <td>102.06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246018</th>\n",
" <td>Female</td>\n",
" <td>No</td>\n",
" <td>90.72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246019</th>\n",
" <td>Male</td>\n",
" <td>No</td>\n",
" <td>83.91</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246020</th>\n",
" <td>Female</td>\n",
" <td>No</td>\n",
" <td>83.01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246021</th>\n",
" <td>Male</td>\n",
" <td>Yes</td>\n",
" <td>108.86</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>246022 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" Sex HadHeartAttack WeightInKilograms\n",
"0 Female No 71.67\n",
"1 Male No 95.25\n",
"2 Male No 108.86\n",
"3 Female No 90.72\n",
"4 Female No 79.38\n",
"... ... ... ...\n",
"246017 Male No 102.06\n",
"246018 Female No 90.72\n",
"246019 Male No 83.91\n",
"246020 Female No 83.01\n",
"246021 Male Yes 108.86\n",
"\n",
"[246022 rows x 3 columns]"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"Sex\", \"HadHeartAttack\", \"WeightInKilograms\"]]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>State</th>\n",
" <th>Sex</th>\n",
" <th>GeneralHealth</th>\n",
" <th>PhysicalHealthDays</th>\n",
" <th>MentalHealthDays</th>\n",
" <th>LastCheckupTime</th>\n",
" <th>PhysicalActivities</th>\n",
" <th>SleepHours</th>\n",
" <th>RemovedTeeth</th>\n",
" <th>HadHeartAttack</th>\n",
" <th>...</th>\n",
" <th>HeightInMeters</th>\n",
" <th>WeightInKilograms</th>\n",
" <th>BMI</th>\n",
" <th>AlcoholDrinkers</th>\n",
" <th>HIVTesting</th>\n",
" <th>FluVaxLast12</th>\n",
" <th>PneumoVaxEver</th>\n",
" <th>TetanusLast10Tdap</th>\n",
" <th>HighRiskLastYear</th>\n",
" <th>CovidPos</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Fair</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>9.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.70</td>\n",
" <td>90.72</td>\n",
" <td>31.32</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Good</td>\n",
" <td>3.0</td>\n",
" <td>15.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>5.0</td>\n",
" <td>1 to 5</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.55</td>\n",
" <td>79.38</td>\n",
" <td>33.07</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.85</td>\n",
" <td>120.20</td>\n",
" <td>34.96</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows × 40 columns</p>\n",
"</div>"
],
"text/plain": [
" State Sex GeneralHealth PhysicalHealthDays MentalHealthDays \\\n",
"3 Alabama Female Fair 5.0 0.0 \n",
"4 Alabama Female Good 3.0 15.0 \n",
"5 Alabama Male Good 0.0 0.0 \n",
"\n",
" LastCheckupTime PhysicalActivities \\\n",
"3 Within past year (anytime less than 12 months ... Yes \n",
"4 Within past year (anytime less than 12 months ... Yes \n",
"5 Within past year (anytime less than 12 months ... Yes \n",
"\n",
" SleepHours RemovedTeeth HadHeartAttack ... HeightInMeters \\\n",
"3 9.0 None of them No ... 1.70 \n",
"4 5.0 1 to 5 No ... 1.55 \n",
"5 7.0 None of them No ... 1.85 \n",
"\n",
" WeightInKilograms BMI AlcoholDrinkers HIVTesting FluVaxLast12 \\\n",
"3 90.72 31.32 No No Yes \n",
"4 79.38 33.07 No No Yes \n",
"5 120.20 34.96 Yes Yes Yes \n",
"\n",
" PneumoVaxEver TetanusLast10Tdap \\\n",
"3 Yes No, did not receive any tetanus shot in the pa... \n",
"4 Yes No, did not receive any tetanus shot in the pa... \n",
"5 No Yes, received tetanus shot but not sure what type \n",
"\n",
" HighRiskLastYear CovidPos \n",
"3 No Yes \n",
"4 No No \n",
"5 No No \n",
"\n",
"[3 rows x 40 columns]"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[3:6]"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>State</th>\n",
" <th>Sex</th>\n",
" <th>GeneralHealth</th>\n",
" <th>PhysicalHealthDays</th>\n",
" <th>MentalHealthDays</th>\n",
" <th>LastCheckupTime</th>\n",
" <th>PhysicalActivities</th>\n",
" <th>SleepHours</th>\n",
" <th>RemovedTeeth</th>\n",
" <th>HadHeartAttack</th>\n",
" <th>...</th>\n",
" <th>HeightInMeters</th>\n",
" <th>WeightInKilograms</th>\n",
" <th>BMI</th>\n",
" <th>AlcoholDrinkers</th>\n",
" <th>HIVTesting</th>\n",
" <th>FluVaxLast12</th>\n",
" <th>PneumoVaxEver</th>\n",
" <th>TetanusLast10Tdap</th>\n",
" <th>HighRiskLastYear</th>\n",
" <th>CovidPos</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>8.0</td>\n",
" <td>6 or more, but not all</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.85</td>\n",
" <td>108.86</td>\n",
" <td>31.66</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.85</td>\n",
" <td>120.20</td>\n",
" <td>34.96</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>8.0</td>\n",
" <td>1 to 5</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.83</td>\n",
" <td>122.47</td>\n",
" <td>36.62</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes, received Tdap</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Good</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>5.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.52</td>\n",
" <td>108.86</td>\n",
" <td>46.87</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot, but not Tdap</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Good</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>5.0</td>\n",
" <td>6 or more, but not all</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>1.88</td>\n",
" <td>115.67</td>\n",
" <td>32.74</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246002</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>6.0</td>\n",
" <td>1 to 5</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.88</td>\n",
" <td>106.59</td>\n",
" <td>30.17</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>Tested positive using home test without a heal...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246012</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Fair</td>\n",
" <td>7.0</td>\n",
" <td>30.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>4.0</td>\n",
" <td>None of them</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>1.88</td>\n",
" <td>117.93</td>\n",
" <td>33.38</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246016</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>6.0</td>\n",
" <td>1 to 5</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>1.80</td>\n",
" <td>118.84</td>\n",
" <td>36.54</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246017</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past 2 years (1 year but less than 2 ye...</td>\n",
" <td>Yes</td>\n",
" <td>6.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.78</td>\n",
" <td>102.06</td>\n",
" <td>32.28</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246021</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>5.0</td>\n",
" <td>None of them</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>1.83</td>\n",
" <td>108.86</td>\n",
" <td>32.55</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>44646 rows × 40 columns</p>\n",
"</div>"
],
"text/plain": [
" State Sex GeneralHealth PhysicalHealthDays \\\n",
"2 Alabama Male Very good 0.0 \n",
"5 Alabama Male Good 0.0 \n",
"10 Alabama Male Very good 0.0 \n",
"11 Alabama Female Good 3.0 \n",
"12 Alabama Male Good 5.0 \n",
"... ... ... ... ... \n",
"246002 Virgin Islands Male Good 0.0 \n",
"246012 Virgin Islands Male Fair 7.0 \n",
"246016 Virgin Islands Male Good 0.0 \n",
"246017 Virgin Islands Male Very good 0.0 \n",
"246021 Virgin Islands Male Very good 0.0 \n",
"\n",
" MentalHealthDays LastCheckupTime \\\n",
"2 0.0 Within past year (anytime less than 12 months ... \n",
"5 0.0 Within past year (anytime less than 12 months ... \n",
"10 0.0 Within past year (anytime less than 12 months ... \n",
"11 4.0 Within past year (anytime less than 12 months ... \n",
"12 0.0 Within past year (anytime less than 12 months ... \n",
"... ... ... \n",
"246002 0.0 Within past year (anytime less than 12 months ... \n",
"246012 30.0 Within past year (anytime less than 12 months ... \n",
"246016 0.0 Within past year (anytime less than 12 months ... \n",
"246017 0.0 Within past 2 years (1 year but less than 2 ye... \n",
"246021 0.0 Within past year (anytime less than 12 months ... \n",
"\n",
" PhysicalActivities SleepHours RemovedTeeth HadHeartAttack \\\n",
"2 No 8.0 6 or more, but not all No \n",
"5 Yes 7.0 None of them No \n",
"10 Yes 8.0 1 to 5 No \n",
"11 Yes 5.0 None of them No \n",
"12 Yes 5.0 6 or more, but not all Yes \n",
"... ... ... ... ... \n",
"246002 Yes 6.0 1 to 5 No \n",
"246012 No 4.0 None of them Yes \n",
"246016 No 6.0 1 to 5 Yes \n",
"246017 Yes 6.0 None of them No \n",
"246021 No 5.0 None of them Yes \n",
"\n",
" ... HeightInMeters WeightInKilograms BMI AlcoholDrinkers \\\n",
"2 ... 1.85 108.86 31.66 Yes \n",
"5 ... 1.85 120.20 34.96 Yes \n",
"10 ... 1.83 122.47 36.62 Yes \n",
"11 ... 1.52 108.86 46.87 No \n",
"12 ... 1.88 115.67 32.74 No \n",
"... ... ... ... ... ... \n",
"246002 ... 1.88 106.59 30.17 Yes \n",
"246012 ... 1.88 117.93 33.38 Yes \n",
"246016 ... 1.80 118.84 36.54 Yes \n",
"246017 ... 1.78 102.06 32.28 Yes \n",
"246021 ... 1.83 108.86 32.55 No \n",
"\n",
" HIVTesting FluVaxLast12 PneumoVaxEver \\\n",
"2 No No Yes \n",
"5 Yes Yes No \n",
"10 No Yes Yes \n",
"11 No No No \n",
"12 No Yes Yes \n",
"... ... ... ... \n",
"246002 No No No \n",
"246012 Yes No No \n",
"246016 Yes Yes No \n",
"246017 No No No \n",
"246021 Yes Yes Yes \n",
"\n",
" TetanusLast10Tdap HighRiskLastYear \\\n",
"2 No, did not receive any tetanus shot in the pa... No \n",
"5 Yes, received tetanus shot but not sure what type No \n",
"10 Yes, received Tdap No \n",
"11 Yes, received tetanus shot, but not Tdap No \n",
"12 Yes, received tetanus shot but not sure what type No \n",
"... ... ... \n",
"246002 Yes, received tetanus shot but not sure what type No \n",
"246012 No, did not receive any tetanus shot in the pa... No \n",
"246016 Yes, received tetanus shot but not sure what type No \n",
"246017 Yes, received tetanus shot but not sure what type No \n",
"246021 No, did not receive any tetanus shot in the pa... No \n",
"\n",
" CovidPos \n",
"2 Yes \n",
"5 No \n",
"10 No \n",
"11 Yes \n",
"12 No \n",
"... ... \n",
"246002 Tested positive using home test without a heal... \n",
"246012 Yes \n",
"246016 No \n",
"246017 No \n",
"246021 Yes \n",
"\n",
"[44646 rows x 40 columns]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['WeightInKilograms'] > 100]\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Группировка и агрегация данных в датафрейме¶"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>WeightInKilograms</th>\n",
" </tr>\n",
" <tr>\n",
" <th>State</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Alabama</th>\n",
" <td>85.225899</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Alaska</th>\n",
" <td>83.937201</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Arizona</th>\n",
" <td>82.626862</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Arkansas</th>\n",
" <td>85.361796</td>\n",
" </tr>\n",
" <tr>\n",
" <th>California</th>\n",
" <td>81.334135</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Colorado</th>\n",
" <td>80.805505</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Connecticut</th>\n",
" <td>82.192881</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Delaware</th>\n",
" <td>84.224436</td>\n",
" </tr>\n",
" <tr>\n",
" <th>District of Columbia</th>\n",
" <td>78.593038</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Florida</th>\n",
" <td>83.155785</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Georgia</th>\n",
" <td>84.332240</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Guam</th>\n",
" <td>77.294261</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Hawaii</th>\n",
" <td>76.419335</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Idaho</th>\n",
" <td>84.648567</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Illinois</th>\n",
" <td>83.459467</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Indiana</th>\n",
" <td>85.703237</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Iowa</th>\n",
" <td>86.970651</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kansas</th>\n",
" <td>85.864583</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Kentucky</th>\n",
" <td>86.781960</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Louisiana</th>\n",
" <td>85.162787</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Maine</th>\n",
" <td>82.949232</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Maryland</th>\n",
" <td>83.543344</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Massachusetts</th>\n",
" <td>80.591010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Michigan</th>\n",
" <td>83.629868</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Minnesota</th>\n",
" <td>84.954303</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Mississippi</th>\n",
" <td>88.322797</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Missouri</th>\n",
" <td>85.836119</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Montana</th>\n",
" <td>84.231140</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Nebraska</th>\n",
" <td>85.961696</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Nevada</th>\n",
" <td>82.784771</td>\n",
" </tr>\n",
" <tr>\n",
" <th>New Hampshire</th>\n",
" <td>80.702764</td>\n",
" </tr>\n",
" <tr>\n",
" <th>New Jersey</th>\n",
" <td>81.270844</td>\n",
" </tr>\n",
" <tr>\n",
" <th>New Mexico</th>\n",
" <td>80.529087</td>\n",
" </tr>\n",
" <tr>\n",
" <th>New York</th>\n",
" <td>80.960180</td>\n",
" </tr>\n",
" <tr>\n",
" <th>North Carolina</th>\n",
" <td>83.730953</td>\n",
" </tr>\n",
" <tr>\n",
" <th>North Dakota</th>\n",
" <td>85.924972</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td>86.938279</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Oklahoma</th>\n",
" <td>85.517429</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Oregon</th>\n",
" <td>83.802043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Pennsylvania</th>\n",
" <td>83.831872</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Puerto Rico</th>\n",
" <td>79.152187</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Rhode Island</th>\n",
" <td>80.675832</td>\n",
" </tr>\n",
" <tr>\n",
" <th>South Carolina</th>\n",
" <td>84.046443</td>\n",
" </tr>\n",
" <tr>\n",
" <th>South Dakota</th>\n",
" <td>86.868195</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Tennessee</th>\n",
" <td>86.237325</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Texas</th>\n",
" <td>84.894035</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td>83.888474</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Vermont</th>\n",
" <td>80.557657</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Virgin Islands</th>\n",
" <td>82.131440</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Virginia</th>\n",
" <td>83.822634</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Washington</th>\n",
" <td>83.077369</td>\n",
" </tr>\n",
" <tr>\n",
" <th>West Virginia</th>\n",
" <td>86.697505</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Wisconsin</th>\n",
" <td>86.167571</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Wyoming</th>\n",
" <td>83.844357</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" WeightInKilograms\n",
"State \n",
"Alabama 85.225899\n",
"Alaska 83.937201\n",
"Arizona 82.626862\n",
"Arkansas 85.361796\n",
"California 81.334135\n",
"Colorado 80.805505\n",
"Connecticut 82.192881\n",
"Delaware 84.224436\n",
"District of Columbia 78.593038\n",
"Florida 83.155785\n",
"Georgia 84.332240\n",
"Guam 77.294261\n",
"Hawaii 76.419335\n",
"Idaho 84.648567\n",
"Illinois 83.459467\n",
"Indiana 85.703237\n",
"Iowa 86.970651\n",
"Kansas 85.864583\n",
"Kentucky 86.781960\n",
"Louisiana 85.162787\n",
"Maine 82.949232\n",
"Maryland 83.543344\n",
"Massachusetts 80.591010\n",
"Michigan 83.629868\n",
"Minnesota 84.954303\n",
"Mississippi 88.322797\n",
"Missouri 85.836119\n",
"Montana 84.231140\n",
"Nebraska 85.961696\n",
"Nevada 82.784771\n",
"New Hampshire 80.702764\n",
"New Jersey 81.270844\n",
"New Mexico 80.529087\n",
"New York 80.960180\n",
"North Carolina 83.730953\n",
"North Dakota 85.924972\n",
"Ohio 86.938279\n",
"Oklahoma 85.517429\n",
"Oregon 83.802043\n",
"Pennsylvania 83.831872\n",
"Puerto Rico 79.152187\n",
"Rhode Island 80.675832\n",
"South Carolina 84.046443\n",
"South Dakota 86.868195\n",
"Tennessee 86.237325\n",
"Texas 84.894035\n",
"Utah 83.888474\n",
"Vermont 80.557657\n",
"Virgin Islands 82.131440\n",
"Virginia 83.822634\n",
"Washington 83.077369\n",
"West Virginia 86.697505\n",
"Wisconsin 86.167571\n",
"Wyoming 83.844357"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"group = df.groupby(['State'])['WeightInKilograms'].mean()\n",
"group.to_frame()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Сортировка данных в датафрейме"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>State</th>\n",
" <th>Sex</th>\n",
" <th>GeneralHealth</th>\n",
" <th>PhysicalHealthDays</th>\n",
" <th>MentalHealthDays</th>\n",
" <th>LastCheckupTime</th>\n",
" <th>PhysicalActivities</th>\n",
" <th>SleepHours</th>\n",
" <th>RemovedTeeth</th>\n",
" <th>HadHeartAttack</th>\n",
" <th>...</th>\n",
" <th>HeightInMeters</th>\n",
" <th>WeightInKilograms</th>\n",
" <th>BMI</th>\n",
" <th>AlcoholDrinkers</th>\n",
" <th>HIVTesting</th>\n",
" <th>FluVaxLast12</th>\n",
" <th>PneumoVaxEver</th>\n",
" <th>TetanusLast10Tdap</th>\n",
" <th>HighRiskLastYear</th>\n",
" <th>CovidPos</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9060</th>\n",
" <td>Arizona</td>\n",
" <td>Male</td>\n",
" <td>Fair</td>\n",
" <td>15.0</td>\n",
" <td>15.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>8.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.85</td>\n",
" <td>292.57</td>\n",
" <td>85.10</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes, received Tdap</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48969</th>\n",
" <td>Hawaii</td>\n",
" <td>Male</td>\n",
" <td>Poor</td>\n",
" <td>30.0</td>\n",
" <td>30.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>4.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.93</td>\n",
" <td>276.24</td>\n",
" <td>74.13</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75697</th>\n",
" <td>Kentucky</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>5 or more years ago</td>\n",
" <td>No</td>\n",
" <td>7.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.91</td>\n",
" <td>273.52</td>\n",
" <td>75.37</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>143147</th>\n",
" <td>New York</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>8.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.88</td>\n",
" <td>273.06</td>\n",
" <td>77.29</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76244</th>\n",
" <td>Kentucky</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.83</td>\n",
" <td>272.16</td>\n",
" <td>81.37</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>203695</th>\n",
" <td>Vermont</td>\n",
" <td>Female</td>\n",
" <td>Poor</td>\n",
" <td>30.0</td>\n",
" <td>3.0</td>\n",
" <td>Within past 2 years (1 year but less than 2 ye...</td>\n",
" <td>No</td>\n",
" <td>18.0</td>\n",
" <td>All</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.60</td>\n",
" <td>30.84</td>\n",
" <td>12.05</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>242632</th>\n",
" <td>Puerto Rico</td>\n",
" <td>Female</td>\n",
" <td>Fair</td>\n",
" <td>30.0</td>\n",
" <td>7.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>7.0</td>\n",
" <td>6 or more, but not all</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.35</td>\n",
" <td>30.39</td>\n",
" <td>16.77</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11614</th>\n",
" <td>Arkansas</td>\n",
" <td>Female</td>\n",
" <td>Poor</td>\n",
" <td>30.0</td>\n",
" <td>30.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>8.0</td>\n",
" <td>All</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.52</td>\n",
" <td>29.48</td>\n",
" <td>12.69</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>127404</th>\n",
" <td>Nebraska</td>\n",
" <td>Female</td>\n",
" <td>Poor</td>\n",
" <td>30.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>6.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.52</td>\n",
" <td>29.48</td>\n",
" <td>12.69</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>179326</th>\n",
" <td>South Carolina</td>\n",
" <td>Female</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>5 or more years ago</td>\n",
" <td>No</td>\n",
" <td>8.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.52</td>\n",
" <td>28.12</td>\n",
" <td>12.11</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>246022 rows × 40 columns</p>\n",
"</div>"
],
"text/plain": [
" State Sex GeneralHealth PhysicalHealthDays \\\n",
"9060 Arizona Male Fair 15.0 \n",
"48969 Hawaii Male Poor 30.0 \n",
"75697 Kentucky Male Very good 0.0 \n",
"143147 New York Male Very good 3.0 \n",
"76244 Kentucky Male Very good 0.0 \n",
"... ... ... ... ... \n",
"203695 Vermont Female Poor 30.0 \n",
"242632 Puerto Rico Female Fair 30.0 \n",
"11614 Arkansas Female Poor 30.0 \n",
"127404 Nebraska Female Poor 30.0 \n",
"179326 South Carolina Female Very good 0.0 \n",
"\n",
" MentalHealthDays LastCheckupTime \\\n",
"9060 15.0 Within past year (anytime less than 12 months ... \n",
"48969 30.0 Within past year (anytime less than 12 months ... \n",
"75697 0.0 5 or more years ago \n",
"143147 1.0 Within past year (anytime less than 12 months ... \n",
"76244 0.0 Within past year (anytime less than 12 months ... \n",
"... ... ... \n",
"203695 3.0 Within past 2 years (1 year but less than 2 ye... \n",
"242632 7.0 Within past year (anytime less than 12 months ... \n",
"11614 30.0 Within past year (anytime less than 12 months ... \n",
"127404 0.0 Within past year (anytime less than 12 months ... \n",
"179326 0.0 5 or more years ago \n",
"\n",
" PhysicalActivities SleepHours RemovedTeeth HadHeartAttack \\\n",
"9060 No 8.0 None of them No \n",
"48969 Yes 4.0 None of them No \n",
"75697 No 7.0 None of them No \n",
"143147 Yes 8.0 None of them No \n",
"76244 Yes 7.0 None of them No \n",
"... ... ... ... ... \n",
"203695 No 18.0 All No \n",
"242632 No 7.0 6 or more, but not all No \n",
"11614 No 8.0 All No \n",
"127404 Yes 6.0 None of them No \n",
"179326 No 8.0 None of them No \n",
"\n",
" ... HeightInMeters WeightInKilograms BMI AlcoholDrinkers \\\n",
"9060 ... 1.85 292.57 85.10 No \n",
"48969 ... 1.93 276.24 74.13 No \n",
"75697 ... 1.91 273.52 75.37 No \n",
"143147 ... 1.88 273.06 77.29 Yes \n",
"76244 ... 1.83 272.16 81.37 No \n",
"... ... ... ... ... ... \n",
"203695 ... 1.60 30.84 12.05 No \n",
"242632 ... 1.35 30.39 16.77 No \n",
"11614 ... 1.52 29.48 12.69 No \n",
"127404 ... 1.52 29.48 12.69 No \n",
"179326 ... 1.52 28.12 12.11 No \n",
"\n",
" HIVTesting FluVaxLast12 PneumoVaxEver \\\n",
"9060 No No No \n",
"48969 No No No \n",
"75697 No No No \n",
"143147 No No No \n",
"76244 Yes No No \n",
"... ... ... ... \n",
"203695 No No No \n",
"242632 No Yes Yes \n",
"11614 No Yes Yes \n",
"127404 No No Yes \n",
"179326 No No No \n",
"\n",
" TetanusLast10Tdap HighRiskLastYear \\\n",
"9060 Yes, received Tdap No \n",
"48969 No, did not receive any tetanus shot in the pa... No \n",
"75697 Yes, received tetanus shot but not sure what type No \n",
"143147 Yes, received tetanus shot but not sure what type No \n",
"76244 Yes, received tetanus shot but not sure what type No \n",
"... ... ... \n",
"203695 No, did not receive any tetanus shot in the pa... No \n",
"242632 No, did not receive any tetanus shot in the pa... No \n",
"11614 No, did not receive any tetanus shot in the pa... No \n",
"127404 Yes, received tetanus shot but not sure what type No \n",
"179326 No, did not receive any tetanus shot in the pa... No \n",
"\n",
" CovidPos \n",
"9060 No \n",
"48969 Yes \n",
"75697 No \n",
"143147 No \n",
"76244 Yes \n",
"... ... \n",
"203695 No \n",
"242632 No \n",
"11614 Yes \n",
"127404 No \n",
"179326 No \n",
"\n",
"[246022 rows x 40 columns]"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sorted_df = df.sort_values(by='WeightInKilograms', ascending = False)\n",
"sorted_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Удаление строк/столбцов"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"df_dropped_columns = df.drop(columns=['AlcoholDrinkers', 'BMI']) # Удаление столбцов 'AlcoholDrinkers' и 'BMI'"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>State</th>\n",
" <th>Sex</th>\n",
" <th>GeneralHealth</th>\n",
" <th>PhysicalHealthDays</th>\n",
" <th>MentalHealthDays</th>\n",
" <th>LastCheckupTime</th>\n",
" <th>PhysicalActivities</th>\n",
" <th>SleepHours</th>\n",
" <th>RemovedTeeth</th>\n",
" <th>HadHeartAttack</th>\n",
" <th>...</th>\n",
" <th>RaceEthnicityCategory</th>\n",
" <th>AgeCategory</th>\n",
" <th>HeightInMeters</th>\n",
" <th>WeightInKilograms</th>\n",
" <th>HIVTesting</th>\n",
" <th>FluVaxLast12</th>\n",
" <th>PneumoVaxEver</th>\n",
" <th>TetanusLast10Tdap</th>\n",
" <th>HighRiskLastYear</th>\n",
" <th>CovidPos</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Very good</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>9.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>White only, Non-Hispanic</td>\n",
" <td>Age 65 to 69</td>\n",
" <td>1.60</td>\n",
" <td>71.67</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes, received Tdap</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>6.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>White only, Non-Hispanic</td>\n",
" <td>Age 70 to 74</td>\n",
" <td>1.78</td>\n",
" <td>95.25</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>8.0</td>\n",
" <td>6 or more, but not all</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>White only, Non-Hispanic</td>\n",
" <td>Age 75 to 79</td>\n",
" <td>1.85</td>\n",
" <td>108.86</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Fair</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>9.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>White only, Non-Hispanic</td>\n",
" <td>Age 80 or older</td>\n",
" <td>1.70</td>\n",
" <td>90.72</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Good</td>\n",
" <td>3.0</td>\n",
" <td>15.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>5.0</td>\n",
" <td>1 to 5</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>White only, Non-Hispanic</td>\n",
" <td>Age 80 or older</td>\n",
" <td>1.55</td>\n",
" <td>79.38</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246017</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past 2 years (1 year but less than 2 ye...</td>\n",
" <td>Yes</td>\n",
" <td>6.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>White only, Non-Hispanic</td>\n",
" <td>Age 60 to 64</td>\n",
" <td>1.78</td>\n",
" <td>102.06</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246018</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Female</td>\n",
" <td>Fair</td>\n",
" <td>0.0</td>\n",
" <td>7.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>Black only, Non-Hispanic</td>\n",
" <td>Age 25 to 29</td>\n",
" <td>1.93</td>\n",
" <td>90.72</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246019</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Good</td>\n",
" <td>0.0</td>\n",
" <td>15.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>1 to 5</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>Multiracial, Non-Hispanic</td>\n",
" <td>Age 65 to 69</td>\n",
" <td>1.68</td>\n",
" <td>83.91</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246020</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Female</td>\n",
" <td>Excellent</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>Black only, Non-Hispanic</td>\n",
" <td>Age 50 to 54</td>\n",
" <td>1.70</td>\n",
" <td>83.01</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246021</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>5.0</td>\n",
" <td>None of them</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>Black only, Non-Hispanic</td>\n",
" <td>Age 70 to 74</td>\n",
" <td>1.83</td>\n",
" <td>108.86</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>246022 rows × 38 columns</p>\n",
"</div>"
],
"text/plain": [
" State Sex GeneralHealth PhysicalHealthDays \\\n",
"0 Alabama Female Very good 4.0 \n",
"1 Alabama Male Very good 0.0 \n",
"2 Alabama Male Very good 0.0 \n",
"3 Alabama Female Fair 5.0 \n",
"4 Alabama Female Good 3.0 \n",
"... ... ... ... ... \n",
"246017 Virgin Islands Male Very good 0.0 \n",
"246018 Virgin Islands Female Fair 0.0 \n",
"246019 Virgin Islands Male Good 0.0 \n",
"246020 Virgin Islands Female Excellent 2.0 \n",
"246021 Virgin Islands Male Very good 0.0 \n",
"\n",
" MentalHealthDays LastCheckupTime \\\n",
"0 0.0 Within past year (anytime less than 12 months ... \n",
"1 0.0 Within past year (anytime less than 12 months ... \n",
"2 0.0 Within past year (anytime less than 12 months ... \n",
"3 0.0 Within past year (anytime less than 12 months ... \n",
"4 15.0 Within past year (anytime less than 12 months ... \n",
"... ... ... \n",
"246017 0.0 Within past 2 years (1 year but less than 2 ye... \n",
"246018 7.0 Within past year (anytime less than 12 months ... \n",
"246019 15.0 Within past year (anytime less than 12 months ... \n",
"246020 2.0 Within past year (anytime less than 12 months ... \n",
"246021 0.0 Within past year (anytime less than 12 months ... \n",
"\n",
" PhysicalActivities SleepHours RemovedTeeth HadHeartAttack \\\n",
"0 Yes 9.0 None of them No \n",
"1 Yes 6.0 None of them No \n",
"2 No 8.0 6 or more, but not all No \n",
"3 Yes 9.0 None of them No \n",
"4 Yes 5.0 1 to 5 No \n",
"... ... ... ... ... \n",
"246017 Yes 6.0 None of them No \n",
"246018 Yes 7.0 None of them No \n",
"246019 Yes 7.0 1 to 5 No \n",
"246020 Yes 7.0 None of them No \n",
"246021 No 5.0 None of them Yes \n",
"\n",
" ... RaceEthnicityCategory AgeCategory HeightInMeters \\\n",
"0 ... White only, Non-Hispanic Age 65 to 69 1.60 \n",
"1 ... White only, Non-Hispanic Age 70 to 74 1.78 \n",
"2 ... White only, Non-Hispanic Age 75 to 79 1.85 \n",
"3 ... White only, Non-Hispanic Age 80 or older 1.70 \n",
"4 ... White only, Non-Hispanic Age 80 or older 1.55 \n",
"... ... ... ... ... \n",
"246017 ... White only, Non-Hispanic Age 60 to 64 1.78 \n",
"246018 ... Black only, Non-Hispanic Age 25 to 29 1.93 \n",
"246019 ... Multiracial, Non-Hispanic Age 65 to 69 1.68 \n",
"246020 ... Black only, Non-Hispanic Age 50 to 54 1.70 \n",
"246021 ... Black only, Non-Hispanic Age 70 to 74 1.83 \n",
"\n",
" WeightInKilograms HIVTesting FluVaxLast12 PneumoVaxEver \\\n",
"0 71.67 No Yes Yes \n",
"1 95.25 No Yes Yes \n",
"2 108.86 No No Yes \n",
"3 90.72 No Yes Yes \n",
"4 79.38 No Yes Yes \n",
"... ... ... ... ... \n",
"246017 102.06 No No No \n",
"246018 90.72 No No No \n",
"246019 83.91 Yes Yes Yes \n",
"246020 83.01 Yes Yes No \n",
"246021 108.86 Yes Yes Yes \n",
"\n",
" TetanusLast10Tdap HighRiskLastYear \\\n",
"0 Yes, received Tdap No \n",
"1 Yes, received tetanus shot but not sure what type No \n",
"2 No, did not receive any tetanus shot in the pa... No \n",
"3 No, did not receive any tetanus shot in the pa... No \n",
"4 No, did not receive any tetanus shot in the pa... No \n",
"... ... ... \n",
"246017 Yes, received tetanus shot but not sure what type No \n",
"246018 No, did not receive any tetanus shot in the pa... No \n",
"246019 Yes, received tetanus shot but not sure what type No \n",
"246020 Yes, received tetanus shot but not sure what type No \n",
"246021 No, did not receive any tetanus shot in the pa... No \n",
"\n",
" CovidPos \n",
"0 No \n",
"1 No \n",
"2 Yes \n",
"3 Yes \n",
"4 No \n",
"... ... \n",
"246017 No \n",
"246018 Yes \n",
"246019 Yes \n",
"246020 No \n",
"246021 Yes \n",
"\n",
"[246022 rows x 38 columns]"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_dropped_columns"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>State</th>\n",
" <th>Sex</th>\n",
" <th>GeneralHealth</th>\n",
" <th>PhysicalHealthDays</th>\n",
" <th>MentalHealthDays</th>\n",
" <th>LastCheckupTime</th>\n",
" <th>PhysicalActivities</th>\n",
" <th>SleepHours</th>\n",
" <th>RemovedTeeth</th>\n",
" <th>HadHeartAttack</th>\n",
" <th>...</th>\n",
" <th>HeightInMeters</th>\n",
" <th>WeightInKilograms</th>\n",
" <th>BMI</th>\n",
" <th>AlcoholDrinkers</th>\n",
" <th>HIVTesting</th>\n",
" <th>FluVaxLast12</th>\n",
" <th>PneumoVaxEver</th>\n",
" <th>TetanusLast10Tdap</th>\n",
" <th>HighRiskLastYear</th>\n",
" <th>CovidPos</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>8.0</td>\n",
" <td>6 or more, but not all</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.85</td>\n",
" <td>108.86</td>\n",
" <td>31.66</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Fair</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>9.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.70</td>\n",
" <td>90.72</td>\n",
" <td>31.32</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Good</td>\n",
" <td>3.0</td>\n",
" <td>15.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>5.0</td>\n",
" <td>1 to 5</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.55</td>\n",
" <td>79.38</td>\n",
" <td>33.07</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.85</td>\n",
" <td>120.20</td>\n",
" <td>34.96</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Good</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>8.0</td>\n",
" <td>6 or more, but not all</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.63</td>\n",
" <td>88.00</td>\n",
" <td>33.30</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246017</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past 2 years (1 year but less than 2 ye...</td>\n",
" <td>Yes</td>\n",
" <td>6.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.78</td>\n",
" <td>102.06</td>\n",
" <td>32.28</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246018</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Female</td>\n",
" <td>Fair</td>\n",
" <td>0.0</td>\n",
" <td>7.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.93</td>\n",
" <td>90.72</td>\n",
" <td>24.34</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246019</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Good</td>\n",
" <td>0.0</td>\n",
" <td>15.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>1 to 5</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.68</td>\n",
" <td>83.91</td>\n",
" <td>29.86</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246020</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Female</td>\n",
" <td>Excellent</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>1.70</td>\n",
" <td>83.01</td>\n",
" <td>28.66</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246021</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>5.0</td>\n",
" <td>None of them</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>1.83</td>\n",
" <td>108.86</td>\n",
" <td>32.55</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>246020 rows × 40 columns</p>\n",
"</div>"
],
"text/plain": [
" State Sex GeneralHealth PhysicalHealthDays \\\n",
"2 Alabama Male Very good 0.0 \n",
"3 Alabama Female Fair 5.0 \n",
"4 Alabama Female Good 3.0 \n",
"5 Alabama Male Good 0.0 \n",
"6 Alabama Female Good 3.0 \n",
"... ... ... ... ... \n",
"246017 Virgin Islands Male Very good 0.0 \n",
"246018 Virgin Islands Female Fair 0.0 \n",
"246019 Virgin Islands Male Good 0.0 \n",
"246020 Virgin Islands Female Excellent 2.0 \n",
"246021 Virgin Islands Male Very good 0.0 \n",
"\n",
" MentalHealthDays LastCheckupTime \\\n",
"2 0.0 Within past year (anytime less than 12 months ... \n",
"3 0.0 Within past year (anytime less than 12 months ... \n",
"4 15.0 Within past year (anytime less than 12 months ... \n",
"5 0.0 Within past year (anytime less than 12 months ... \n",
"6 0.0 Within past year (anytime less than 12 months ... \n",
"... ... ... \n",
"246017 0.0 Within past 2 years (1 year but less than 2 ye... \n",
"246018 7.0 Within past year (anytime less than 12 months ... \n",
"246019 15.0 Within past year (anytime less than 12 months ... \n",
"246020 2.0 Within past year (anytime less than 12 months ... \n",
"246021 0.0 Within past year (anytime less than 12 months ... \n",
"\n",
" PhysicalActivities SleepHours RemovedTeeth HadHeartAttack \\\n",
"2 No 8.0 6 or more, but not all No \n",
"3 Yes 9.0 None of them No \n",
"4 Yes 5.0 1 to 5 No \n",
"5 Yes 7.0 None of them No \n",
"6 Yes 8.0 6 or more, but not all No \n",
"... ... ... ... ... \n",
"246017 Yes 6.0 None of them No \n",
"246018 Yes 7.0 None of them No \n",
"246019 Yes 7.0 1 to 5 No \n",
"246020 Yes 7.0 None of them No \n",
"246021 No 5.0 None of them Yes \n",
"\n",
" ... HeightInMeters WeightInKilograms BMI AlcoholDrinkers \\\n",
"2 ... 1.85 108.86 31.66 Yes \n",
"3 ... 1.70 90.72 31.32 No \n",
"4 ... 1.55 79.38 33.07 No \n",
"5 ... 1.85 120.20 34.96 Yes \n",
"6 ... 1.63 88.00 33.30 No \n",
"... ... ... ... ... ... \n",
"246017 ... 1.78 102.06 32.28 Yes \n",
"246018 ... 1.93 90.72 24.34 No \n",
"246019 ... 1.68 83.91 29.86 Yes \n",
"246020 ... 1.70 83.01 28.66 No \n",
"246021 ... 1.83 108.86 32.55 No \n",
"\n",
" HIVTesting FluVaxLast12 PneumoVaxEver \\\n",
"2 No No Yes \n",
"3 No Yes Yes \n",
"4 No Yes Yes \n",
"5 Yes Yes No \n",
"6 No Yes Yes \n",
"... ... ... ... \n",
"246017 No No No \n",
"246018 No No No \n",
"246019 Yes Yes Yes \n",
"246020 Yes Yes No \n",
"246021 Yes Yes Yes \n",
"\n",
" TetanusLast10Tdap HighRiskLastYear \\\n",
"2 No, did not receive any tetanus shot in the pa... No \n",
"3 No, did not receive any tetanus shot in the pa... No \n",
"4 No, did not receive any tetanus shot in the pa... No \n",
"5 Yes, received tetanus shot but not sure what type No \n",
"6 No, did not receive any tetanus shot in the pa... No \n",
"... ... ... \n",
"246017 Yes, received tetanus shot but not sure what type No \n",
"246018 No, did not receive any tetanus shot in the pa... No \n",
"246019 Yes, received tetanus shot but not sure what type No \n",
"246020 Yes, received tetanus shot but not sure what type No \n",
"246021 No, did not receive any tetanus shot in the pa... No \n",
"\n",
" CovidPos \n",
"2 Yes \n",
"3 Yes \n",
"4 No \n",
"5 No \n",
"6 No \n",
"... ... \n",
"246017 No \n",
"246018 Yes \n",
"246019 Yes \n",
"246020 No \n",
"246021 Yes \n",
"\n",
"[246020 rows x 40 columns]"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_dropped_rows = df.drop([0, 1]) # Удаление строк с индексами 0 и 1\n",
"df_dropped_rows"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Создание новых столбцов на основе данных из существующих столбцов датафрейма¶"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"df['SleepHours-HeightInMeters'] = df['SleepHours'] - df['HeightInMeters']"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>State</th>\n",
" <th>Sex</th>\n",
" <th>GeneralHealth</th>\n",
" <th>PhysicalHealthDays</th>\n",
" <th>MentalHealthDays</th>\n",
" <th>LastCheckupTime</th>\n",
" <th>PhysicalActivities</th>\n",
" <th>SleepHours</th>\n",
" <th>RemovedTeeth</th>\n",
" <th>HadHeartAttack</th>\n",
" <th>...</th>\n",
" <th>WeightInKilograms</th>\n",
" <th>BMI</th>\n",
" <th>AlcoholDrinkers</th>\n",
" <th>HIVTesting</th>\n",
" <th>FluVaxLast12</th>\n",
" <th>PneumoVaxEver</th>\n",
" <th>TetanusLast10Tdap</th>\n",
" <th>HighRiskLastYear</th>\n",
" <th>CovidPos</th>\n",
" <th>SleepHours-HeightInMeters</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Very good</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>9.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>71.67</td>\n",
" <td>27.99</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes, received Tdap</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>7.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>6.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>95.25</td>\n",
" <td>30.13</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>4.22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>8.0</td>\n",
" <td>6 or more, but not all</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>108.86</td>\n",
" <td>31.66</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>6.15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Fair</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>9.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>90.72</td>\n",
" <td>31.32</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>7.30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Good</td>\n",
" <td>3.0</td>\n",
" <td>15.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>5.0</td>\n",
" <td>1 to 5</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>79.38</td>\n",
" <td>33.07</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>3.45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246017</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past 2 years (1 year but less than 2 ye...</td>\n",
" <td>Yes</td>\n",
" <td>6.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>102.06</td>\n",
" <td>32.28</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>4.22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246018</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Female</td>\n",
" <td>Fair</td>\n",
" <td>0.0</td>\n",
" <td>7.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>90.72</td>\n",
" <td>24.34</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>5.07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246019</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Good</td>\n",
" <td>0.0</td>\n",
" <td>15.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>1 to 5</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>83.91</td>\n",
" <td>29.86</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>5.32</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246020</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Female</td>\n",
" <td>Excellent</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>83.01</td>\n",
" <td>28.66</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>5.30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246021</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>5.0</td>\n",
" <td>None of them</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>108.86</td>\n",
" <td>32.55</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>3.17</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>246022 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" State Sex GeneralHealth PhysicalHealthDays \\\n",
"0 Alabama Female Very good 4.0 \n",
"1 Alabama Male Very good 0.0 \n",
"2 Alabama Male Very good 0.0 \n",
"3 Alabama Female Fair 5.0 \n",
"4 Alabama Female Good 3.0 \n",
"... ... ... ... ... \n",
"246017 Virgin Islands Male Very good 0.0 \n",
"246018 Virgin Islands Female Fair 0.0 \n",
"246019 Virgin Islands Male Good 0.0 \n",
"246020 Virgin Islands Female Excellent 2.0 \n",
"246021 Virgin Islands Male Very good 0.0 \n",
"\n",
" MentalHealthDays LastCheckupTime \\\n",
"0 0.0 Within past year (anytime less than 12 months ... \n",
"1 0.0 Within past year (anytime less than 12 months ... \n",
"2 0.0 Within past year (anytime less than 12 months ... \n",
"3 0.0 Within past year (anytime less than 12 months ... \n",
"4 15.0 Within past year (anytime less than 12 months ... \n",
"... ... ... \n",
"246017 0.0 Within past 2 years (1 year but less than 2 ye... \n",
"246018 7.0 Within past year (anytime less than 12 months ... \n",
"246019 15.0 Within past year (anytime less than 12 months ... \n",
"246020 2.0 Within past year (anytime less than 12 months ... \n",
"246021 0.0 Within past year (anytime less than 12 months ... \n",
"\n",
" PhysicalActivities SleepHours RemovedTeeth HadHeartAttack \\\n",
"0 Yes 9.0 None of them No \n",
"1 Yes 6.0 None of them No \n",
"2 No 8.0 6 or more, but not all No \n",
"3 Yes 9.0 None of them No \n",
"4 Yes 5.0 1 to 5 No \n",
"... ... ... ... ... \n",
"246017 Yes 6.0 None of them No \n",
"246018 Yes 7.0 None of them No \n",
"246019 Yes 7.0 1 to 5 No \n",
"246020 Yes 7.0 None of them No \n",
"246021 No 5.0 None of them Yes \n",
"\n",
" ... WeightInKilograms BMI AlcoholDrinkers HIVTesting FluVaxLast12 \\\n",
"0 ... 71.67 27.99 No No Yes \n",
"1 ... 95.25 30.13 No No Yes \n",
"2 ... 108.86 31.66 Yes No No \n",
"3 ... 90.72 31.32 No No Yes \n",
"4 ... 79.38 33.07 No No Yes \n",
"... ... ... ... ... ... ... \n",
"246017 ... 102.06 32.28 Yes No No \n",
"246018 ... 90.72 24.34 No No No \n",
"246019 ... 83.91 29.86 Yes Yes Yes \n",
"246020 ... 83.01 28.66 No Yes Yes \n",
"246021 ... 108.86 32.55 No Yes Yes \n",
"\n",
" PneumoVaxEver TetanusLast10Tdap \\\n",
"0 Yes Yes, received Tdap \n",
"1 Yes Yes, received tetanus shot but not sure what type \n",
"2 Yes No, did not receive any tetanus shot in the pa... \n",
"3 Yes No, did not receive any tetanus shot in the pa... \n",
"4 Yes No, did not receive any tetanus shot in the pa... \n",
"... ... ... \n",
"246017 No Yes, received tetanus shot but not sure what type \n",
"246018 No No, did not receive any tetanus shot in the pa... \n",
"246019 Yes Yes, received tetanus shot but not sure what type \n",
"246020 No Yes, received tetanus shot but not sure what type \n",
"246021 Yes No, did not receive any tetanus shot in the pa... \n",
"\n",
" HighRiskLastYear CovidPos SleepHours-HeightInMeters \n",
"0 No No 7.40 \n",
"1 No No 4.22 \n",
"2 No Yes 6.15 \n",
"3 No Yes 7.30 \n",
"4 No No 3.45 \n",
"... ... ... ... \n",
"246017 No No 4.22 \n",
"246018 No Yes 5.07 \n",
"246019 No Yes 5.32 \n",
"246020 No No 5.30 \n",
"246021 No Yes 3.17 \n",
"\n",
"[246022 rows x 41 columns]"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Удаление строк с пустыми значениями"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"State 0\n",
"Sex 0\n",
"GeneralHealth 0\n",
"PhysicalHealthDays 0\n",
"MentalHealthDays 0\n",
"LastCheckupTime 0\n",
"PhysicalActivities 0\n",
"SleepHours 0\n",
"RemovedTeeth 0\n",
"HadHeartAttack 0\n",
"HadAngina 0\n",
"HadStroke 0\n",
"HadAsthma 0\n",
"HadSkinCancer 0\n",
"HadCOPD 0\n",
"HadDepressiveDisorder 0\n",
"HadKidneyDisease 0\n",
"HadArthritis 0\n",
"HadDiabetes 0\n",
"DeafOrHardOfHearing 0\n",
"BlindOrVisionDifficulty 0\n",
"DifficultyConcentrating 0\n",
"DifficultyWalking 0\n",
"DifficultyDressingBathing 0\n",
"DifficultyErrands 0\n",
"SmokerStatus 0\n",
"ECigaretteUsage 0\n",
"ChestScan 0\n",
"RaceEthnicityCategory 0\n",
"AgeCategory 0\n",
"HeightInMeters 0\n",
"WeightInKilograms 0\n",
"BMI 0\n",
"AlcoholDrinkers 0\n",
"HIVTesting 0\n",
"FluVaxLast12 0\n",
"PneumoVaxEver 0\n",
"TetanusLast10Tdap 0\n",
"HighRiskLastYear 0\n",
"CovidPos 0\n",
"SleepHours-HeightInMeters 0\n",
"dtype: int64\n"
]
}
],
"source": [
"print(df.isna().sum())"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>State</th>\n",
" <th>Sex</th>\n",
" <th>GeneralHealth</th>\n",
" <th>PhysicalHealthDays</th>\n",
" <th>MentalHealthDays</th>\n",
" <th>LastCheckupTime</th>\n",
" <th>PhysicalActivities</th>\n",
" <th>SleepHours</th>\n",
" <th>RemovedTeeth</th>\n",
" <th>HadHeartAttack</th>\n",
" <th>...</th>\n",
" <th>WeightInKilograms</th>\n",
" <th>BMI</th>\n",
" <th>AlcoholDrinkers</th>\n",
" <th>HIVTesting</th>\n",
" <th>FluVaxLast12</th>\n",
" <th>PneumoVaxEver</th>\n",
" <th>TetanusLast10Tdap</th>\n",
" <th>HighRiskLastYear</th>\n",
" <th>CovidPos</th>\n",
" <th>SleepHours-HeightInMeters</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Very good</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>9.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>71.67</td>\n",
" <td>27.99</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes, received Tdap</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>7.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>6.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>95.25</td>\n",
" <td>30.13</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>4.22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Alabama</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>8.0</td>\n",
" <td>6 or more, but not all</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>108.86</td>\n",
" <td>31.66</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>6.15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Fair</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>9.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>90.72</td>\n",
" <td>31.32</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>7.30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Alabama</td>\n",
" <td>Female</td>\n",
" <td>Good</td>\n",
" <td>3.0</td>\n",
" <td>15.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>5.0</td>\n",
" <td>1 to 5</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>79.38</td>\n",
" <td>33.07</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>3.45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246017</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past 2 years (1 year but less than 2 ye...</td>\n",
" <td>Yes</td>\n",
" <td>6.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>102.06</td>\n",
" <td>32.28</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>4.22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246018</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Female</td>\n",
" <td>Fair</td>\n",
" <td>0.0</td>\n",
" <td>7.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>90.72</td>\n",
" <td>24.34</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>5.07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246019</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Good</td>\n",
" <td>0.0</td>\n",
" <td>15.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>1 to 5</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>83.91</td>\n",
" <td>29.86</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>5.32</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246020</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Female</td>\n",
" <td>Excellent</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>Yes</td>\n",
" <td>7.0</td>\n",
" <td>None of them</td>\n",
" <td>No</td>\n",
" <td>...</td>\n",
" <td>83.01</td>\n",
" <td>28.66</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes, received tetanus shot but not sure what type</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>5.30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246021</th>\n",
" <td>Virgin Islands</td>\n",
" <td>Male</td>\n",
" <td>Very good</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Within past year (anytime less than 12 months ...</td>\n",
" <td>No</td>\n",
" <td>5.0</td>\n",
" <td>None of them</td>\n",
" <td>Yes</td>\n",
" <td>...</td>\n",
" <td>108.86</td>\n",
" <td>32.55</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No, did not receive any tetanus shot in the pa...</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>3.17</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>246022 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" State Sex GeneralHealth PhysicalHealthDays \\\n",
"0 Alabama Female Very good 4.0 \n",
"1 Alabama Male Very good 0.0 \n",
"2 Alabama Male Very good 0.0 \n",
"3 Alabama Female Fair 5.0 \n",
"4 Alabama Female Good 3.0 \n",
"... ... ... ... ... \n",
"246017 Virgin Islands Male Very good 0.0 \n",
"246018 Virgin Islands Female Fair 0.0 \n",
"246019 Virgin Islands Male Good 0.0 \n",
"246020 Virgin Islands Female Excellent 2.0 \n",
"246021 Virgin Islands Male Very good 0.0 \n",
"\n",
" MentalHealthDays LastCheckupTime \\\n",
"0 0.0 Within past year (anytime less than 12 months ... \n",
"1 0.0 Within past year (anytime less than 12 months ... \n",
"2 0.0 Within past year (anytime less than 12 months ... \n",
"3 0.0 Within past year (anytime less than 12 months ... \n",
"4 15.0 Within past year (anytime less than 12 months ... \n",
"... ... ... \n",
"246017 0.0 Within past 2 years (1 year but less than 2 ye... \n",
"246018 7.0 Within past year (anytime less than 12 months ... \n",
"246019 15.0 Within past year (anytime less than 12 months ... \n",
"246020 2.0 Within past year (anytime less than 12 months ... \n",
"246021 0.0 Within past year (anytime less than 12 months ... \n",
"\n",
" PhysicalActivities SleepHours RemovedTeeth HadHeartAttack \\\n",
"0 Yes 9.0 None of them No \n",
"1 Yes 6.0 None of them No \n",
"2 No 8.0 6 or more, but not all No \n",
"3 Yes 9.0 None of them No \n",
"4 Yes 5.0 1 to 5 No \n",
"... ... ... ... ... \n",
"246017 Yes 6.0 None of them No \n",
"246018 Yes 7.0 None of them No \n",
"246019 Yes 7.0 1 to 5 No \n",
"246020 Yes 7.0 None of them No \n",
"246021 No 5.0 None of them Yes \n",
"\n",
" ... WeightInKilograms BMI AlcoholDrinkers HIVTesting FluVaxLast12 \\\n",
"0 ... 71.67 27.99 No No Yes \n",
"1 ... 95.25 30.13 No No Yes \n",
"2 ... 108.86 31.66 Yes No No \n",
"3 ... 90.72 31.32 No No Yes \n",
"4 ... 79.38 33.07 No No Yes \n",
"... ... ... ... ... ... ... \n",
"246017 ... 102.06 32.28 Yes No No \n",
"246018 ... 90.72 24.34 No No No \n",
"246019 ... 83.91 29.86 Yes Yes Yes \n",
"246020 ... 83.01 28.66 No Yes Yes \n",
"246021 ... 108.86 32.55 No Yes Yes \n",
"\n",
" PneumoVaxEver TetanusLast10Tdap \\\n",
"0 Yes Yes, received Tdap \n",
"1 Yes Yes, received tetanus shot but not sure what type \n",
"2 Yes No, did not receive any tetanus shot in the pa... \n",
"3 Yes No, did not receive any tetanus shot in the pa... \n",
"4 Yes No, did not receive any tetanus shot in the pa... \n",
"... ... ... \n",
"246017 No Yes, received tetanus shot but not sure what type \n",
"246018 No No, did not receive any tetanus shot in the pa... \n",
"246019 Yes Yes, received tetanus shot but not sure what type \n",
"246020 No Yes, received tetanus shot but not sure what type \n",
"246021 Yes No, did not receive any tetanus shot in the pa... \n",
"\n",
" HighRiskLastYear CovidPos SleepHours-HeightInMeters \n",
"0 No No 7.40 \n",
"1 No No 4.22 \n",
"2 No Yes 6.15 \n",
"3 No Yes 7.30 \n",
"4 No No 3.45 \n",
"... ... ... ... \n",
"246017 No No 4.22 \n",
"246018 No Yes 5.07 \n",
"246019 No Yes 5.32 \n",
"246020 No No 5.30 \n",
"246021 No Yes 3.17 \n",
"\n",
"[246022 rows x 41 columns]"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dropna() #Тк.пустых строк нет, мы ничего не удалили"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"#df.fillna(df.mean(), inplace=True)\n",
"#df.fillna(df.median(), inplace=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Мы обрабатываем пустые значения для каждого столбца отдельно\n",
"\n",
"Мы можем заполнить пропуски средним или медианой, если это числовой столбец\n",
"\n",
"Мы заполняем средним, если в колонке нет выбросов\n",
"\n",
"Если столбец категориальный, то мы можем заполнить пропуски модой (самым часто встречающимся значением)\n",
"\n",
"Если пропусков мало, то их можно просто удалить."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"2. Возможности визуализации"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 1000x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#Линейная диаграмма\n",
"plt.figure(figsize=(10, 5))\n",
"df['WeightInKilograms'].plot(title='Line Plot (WeightInKilograms)')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 800x500 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#Гистограмма\n",
"plt.figure(figsize=(8, 5))\n",
"df.plot.hist(column=[\"SleepHours\"], bins=80)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 800x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(8, 5))\n",
"df['AgeCategory'].value_counts().plot(kind='bar', title='Bar Plot (AgeCategory)')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAqQAAAHDCAYAAADyXwjWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAt9klEQVR4nO3deXhU5d3/8c8kIYsJmSxmhQB5gBYVFKsWAVGQXAYECyVqqbhAsFoBFRQoUFGxShRFBIvwyEMBFZdHBAStEUWCUcIilVbUB5GdbICQGRKzMZnfH/wYGbYQPcmdmbxf1zWXM/f5zjnf+Mf48T73OcfmdrvdAgAAAAwJMN0AAAAAmjYCKQAAAIwikAIAAMAoAikAAACMIpACAADAKAIpAAAAjCKQAgAAwCgCKQAAAIwikAIAAMAoAikAAACMIpACaPSKi4t1zz33qFWrVmrWrJlsNpvn1aZNG9PtAQB+oSDTDQDAuVRUVOjaa6/Vjh07NGzYMHXp0kWhoaGSpOnTp+vIkSOGOwQA/FIEUgCN2vLly/Xdd99p4sSJmjp1qte2N998k0AKAH6AU/YAGrUdO3ZIkq655przqrfZbBo1apTXWP/+/U87tb9kyRLZbDbl5OR4xhYuXCibzabdu3d7xmpqanTppZfKZrNp4cKFnvGhQ4eets/XXntNAQEBevrpp2vts6SkRGPGjFGbNm0UEhKili1b6s4779ShQ4fO+p3rrrtOl1122Rm3/frXv1Z6erpX3zNnzlSnTp0UGhqquLg49enTR1988YWnZsGCBbr++usVHx+vkJAQXXzxxZozZ85p+27Tpo1sNptGjx592rb09HTZbDb179/fM5aTk3Pav1tJ6tevn2w2mx5//PGz/o0AmiYCKYBGLSYmRpK8QmJDevXVV/XVV1/VWrdq1SplZmZq1KhRmjBhwjlrS0tL1aNHD7344ou64YYbNHPmTP35z3/W//3f/2n//v1n/d4dd9yh//znP9q6davX+KZNm/Tdd9/p9ttv94wNHz5co0ePVkpKip555hlNmDBBoaGhWr9+vadmzpw5at26tSZNmqTp06crJSVFI0aM0OzZs087dmhoqBYvXqzq6mrP2P79+7V69WrPEopz+fTTT/XPf/6z1joATROn7AE0agMGDNCECRP0xBNPKCEhwWsN6cnhqD5UVlbq0UcfVd++ffXBBx+ctW7z5s3KyMjQwIED9cILL9S632effVZbt27V0qVL9fvf/94z/sgjj8jtdp/1e7fccovuv/9+vfbaa16zsK+99prCw8M1aNAgSdKaNWu0cOFCPfDAA5o5c6an7uGHH/ba/9q1axUWFub5PGrUKPXp00fPP/+8Ro4c6XXsHj166Msvv9SKFSuUkZEh6fiMcpcuXZSfn1/r3zx+/Pha/z0CaLqYIQXQqCUnJ2vdunXq1KmTbr75ZqWkpCguLk5xcXFatWpVvR579uzZ+uGHH/TYY4+dtWbnzp3q16+fOnfurFdffVUBAbX/rL7zzju67LLLvMLoCTab7azfs9vtGjBggN544w1PsHS5XHrrrbc0cOBAhYeHe/Zvs9nO2PfJ+z85jDocDh06dEjXXXeddu7cKYfD4fW94OBgDRkyRAsWLPCMLVy4UMOGDav17126dKk2bdp0XksZADRNBFIAjd4ll1yiDz74QG3btlXLli310Ucf6aOPPlKXLl3q7ZgOh0NTp07VQw89pISEhDPWlJWVKT09XcXFxTp8+PA5w+TJduzYoY4dO/6svu68807t3btXubm5kqSPP/5YxcXFuuOOO7z2n5yc7FnucDaff/650tLSFB4erqioKMXFxWnSpEmSdFoglaRhw4YpOztbhYWFWrt2rQoLC3Xrrbee8xgul0uTJk3SkCFDdOmll9b1zwXQRBBIAfiEuXPnaseOHcrKylJaWprS0tJ04YUX1tvxnnnmGQUEBGjcuHFnrTl06JCqq6u1cuVKbdu2TVlZWfXWzwnp6elKSEjQa6+9Jun46frExESlpaXVaT87duxQ7969dejQIT3//PN6//339dFHH2nMmDGSjl8UdarLLrtMl112mV555RUtWLBAGRkZioyMPOdx5s+fr927d+tvf/tbnfoD0LQQSAE0eg6HQ1OmTNFVV12lIUOG1PvxCgoKNHPmTD3yyCNq3rz5WesuuOACZWdnq3///hozZoymTp2qb7/9ttb9t23b9rQLk85XYGCgbrvtNi1ZskRHjhzR8uXL9cc//lGBgYFe+y8oKNDhw4fPup+VK1eqsrJSK1as0L333qsbb7xRaWlpXqfxzyQzM1Pz5s3TkiVLaj1d/+OPP2rKlCkaMWKEWrduXbc/FECTQiAF0OhNnTpVhw4d0owZM877tPgvMWXKFCUkJOjPf/7zOevi4uLUoUMHSdITTzyhli1b6k9/+tM5L0ySpIyMDP373//WsmXLTttW23el41fbHzlyRPfee69KS0u9rq4/sX+3260pU6acdf8nAuzJx3M4HF5rRM/ktttuU35+vuLj49WzZ89z1s6cOVNlZWX661//WuvfBKBp4yp7AI3anj17NGvWLN16663q3r37eX1n7969ys7O9nw+ePCgysvLvca2bNkiSdq4caPat2+vFi1aeLatWrVKixcvVnBw8Hn3GRYWppdffllpaWmaM2eORowYcdbacePGacmSJbrllluUmZmpK664QocPH9aKFSs0d+7cs95r9ITLL79cHTt21Ntvv62LLrpIv/nNb7y29+rVS3fccYdmzZql7du3q0+fPqqpqVFubq569eqlUaNG6YYbblBwcLBuuukmT7CdN2+e4uPjVVhYeNZjR0dHq7CwUIGBgbX+z8GqVav01FNPKTY29px1AEAgBdCoTZo0SW63W88888x5f2flypVauXLlaeN9+/Y9bewvf/mL4uPjNXToUM9Y586d9cc//rHOvfbu3VvDhg3TxIkTNWDAAK+Qe7KIiAjl5ubqscce07Jly7Ro0SLFx8erd+/eatmy5Xkd684779T48eO9LmY62YIFC3TppZdq/vz5GjdunOx2u6688kp169ZN0vEb6S9ZskSPPPKIxo4dq8TERN13332Ki4tTZmbmOY8dFRV1Xj0mJSWd8Wb6AHAqm/t8zg8BgJ/q2bOnhg4d6hVIfcHMmTM1ZswY7d69W61atTLdDgD8IqwhBQAf43a7NX/+fF133XWEUQB+gVP2AJq03/72t2c9td7YlJWVacWKFVqzZo2++uorvfvuu6ZbAgBLcMoeAHzE7t27lZqaqqioKI0YMUJPPfWU6ZYAwBIEUgAAABjFGlIAAAAYRSAFAACAUT55UVNNTY0KCgrUvHnzBnlqCwAAAOrG7Xbr6NGjSk5OVkDAuedAfTKQFhQUKCUlxXQbAAAAqMW+fftqfeiHTwbS5s2bSzr+B0ZGRhruBgAAAKdyOp1KSUnx5LZz8clAeuI0fWRkJIEUAACgETuf5ZVc1AQAAACj6hxIP/30U910001KTk6WzWbT8uXLvba73W49+uijSkpKUlhYmNLS0rR9+3avmsOHD2vIkCGKjIxUVFSUhg8frtLS0l/0hwAAAMA31TmQlpWV6bLLLtPs2bPPuH3atGmaNWuW5s6dqw0bNig8PFzp6emqqKjw1AwZMkRff/21PvroI7333nv69NNPdc899/z8vwIAAAA+6xc9qclms2nZsmUaOHCgpOOzo8nJyXr44Yc1duxYSZLD4VBCQoIWLlyowYMH69tvv9XFF1+sTZs26corr5QkZWdn68Ybb9T+/fuVnJxc63GdTqfsdrscDgdrSAEAABqhuuQ1S9eQ7tq1S0VFRUpLS/OM2e12denSRXl5eZKkvLw8RUVFecKoJKWlpSkgIEAbNmw4434rKyvldDq9XgAAAPAPlgbSoqIiSVJCQoLXeEJCgmdbUVGR4uPjvbYHBQUpJibGU3OqrKws2e12z4t7kAIAAPgPn7jKfuLEiXI4HJ7Xvn37TLcEAAAAi1gaSBMTEyVJxcXFXuPFxcWebYmJiTpw4IDX9mPHjunw4cOemlOFhIR47jnKvUcBAAD8i6WBNDU1VYmJiVq9erVnzOl0asOGDerataskqWvXriopKdHmzZs9NZ988olqamrUpUsXK9sBAACAD6jzk5pKS0v1/fffez7v2rVLW7ZsUUxMjFq1aqXRo0frySefVPv27ZWamqrJkycrOTnZcyX+RRddpD59+uhPf/qT5s6dq+rqao0aNUqDBw8+ryvsAQAA4F/qHEi/+OIL9erVy/P5oYcekiTdddddWrhwocaPH6+ysjLdc889Kikp0TXXXKPs7GyFhoZ6vrN48WKNGjVKvXv3VkBAgDIyMjRr1iwL/hwA8D3l5eUaN26ctm/frvbt2+vZZ59VWFiY6bYAoMH8ovuQmsJ9SAH4i4EDB+rdd989bXzAgAGnPQkPAHyJsfuQAgDO39nCqCS9++67nqVOAODvCKQAYEB5ebknjPbr1095eXk6evSo8vLy1K9fP0nHQ2l5ebnJNgGgQRBIAcCAhx9+WJLUrl07rVixQldffbUiIiJ09dVXa8WKFWrbtq1XHQD4MwIpABiwadMmScefRBcQ4P1THBAQoKeeesqrDgD8GYEUAAyIjo6WJOXl5cnlciknJ0dvvPGGcnJy5HK5lJeX51UHAP6Mq+wBwIAPP/xQffr0UWBgoFq2bKk9e/Z4trVu3Vr79++Xy+VSdna20tPTDXYKAD9PXfIagRQADHC5XLrgggtUVVUlSUpLS1OvXr20Zs0affzxx5KOPza5rKxMgYGBJlsFgJ+lLnmtzjfGBwBYw2636+DBg5Kkjz/+2BNET94OAE0Ba0gBwIDc3FxPGD31qUwnPh84cEC5ubkN3hsANDQCKQAYkJ+fL0nq27evHA6H1qxZo9dff11r1qyRw+FQ3759veoAwJ8RSAHAgBOzo4MGDTrjbZ9OPKXpRB0A+DPWkAKAAXFxcZKkl156SU8++eRpV9nHxMR41QGAP2OGFAAMaNGihSTpyy+/VEVFhV5++WUVFBTo5ZdfVkVFhb788kuvOgDwZ9z2CQAMqKqqUnh4uMLDwxUdHa3du3d7tqWmpurw4cMqKytTWVmZgoODzTUKAD8Tt30CgEZu3bp1OnbsmJxOp3r06KGxY8cqLCxM5eXlys7O1vvvvy+3261169apZ8+eptsFgHpFIAUAAwoLCyVJr776qh555BG99957nm2pqal69dVXdfvtt3vqAMCfEUgBwICkpCRJUtu2bfX9998rNzdXhYWFSkpKUo8ePbRx40avOgDwZ6whBQADXC6X2rVrp06dOmn58uVet36qqanRwIEDtXXrVm3fvp1HhwLwSawhBYBGLjAwUNOnT9fNN9+sAQMGqE+fPqetIV2yZAlhFECTwAwpABg0fvx4zZgxQ8eOHfOMBQUFacyYMZo2bZrBzgDgl2GGFAB8wNKlS/Xcc8+pX79+6tu3r2eG9IMPPtBzzz2nq6++WoMGDTLdJgDUO2ZIAcCAk9eQvvPOO/r88889FzV1795dGRkZrCEF4NPqktd4UhMAGJCbm6vdu3erW7du+tWvfqVevXrptttuU69evfSrX/1KXbt21a5du5Sbm2u6VQCod5yyBwADTtxfdNKkSQoNDfXaVlxcrL/+9a9edQDgz5ghBQAD4uPjJUlut1unrpw6eexEHQD4MwIpABhQU1Pjed+7d2/l5eXp6NGjysvLU+/evc9YBwD+ikAKAAbk5OR43ttsNs+sqNvtls1mO2MdAPgrAikAGLB3715J0vDhw7V161Z169ZNkZGR6tatm77++msNGzbMqw4A/BkXNQGAAa1atZIkffPNN/ruu+9Ou+3Ttdde61UHAP6MGVIAMOD666+XJOXl5WnQoEEKCQlR//79FRISokGDBmn9+vVedQDgz7gxPgAY4HK5lJSUpIMHD3qe0HTCBRdcoB9//FHx8fEqKCjgxvgAfBI3xgeARi4wMFBz586VpDPe9kmS5syZQxgF0CQQSAHAkEGDBumdd95RQkKC13hCQoLeeecdnmMPoMnglD0AGOZyuZSbm+u5qKlHjx7MjALweZyyBwAAgM8gkAKAQUuXLlXbtm3Vq1cv3XbbberVq5fatm2rpUuXmm4NABoMgRQADFm6dKkyMjJOu/n93r17lZGRQSgF0GQQSAHAAJfLpczMTElSXFyc5s2bp8LCQs2bN09xcXGSpMzMTLlcLpNtAkCDIJACgAGffPKJHA6HoqOjlZ+fr7vvvluJiYm6++67lZ+fr+joaDkcDn3yySemWwWAekcgBQADXn31VUnSE088IZvNppycHL3xxhvKycmRzWbT448/7lUHAP6MZ9kDgAFHjx6VJBUUFKhdu3bavXu3Z1ubNm00ePBgrzoA8GfMkAKAAT169JAkZWVlqWPHjsrLy9PRo0eVl5enjh076umnn/aqAwB/RiAFAAPuu+8+z3u3233a60x1AOCvOGUPAAZs2LDB8/6DDz7Q+++/7/l88lOaNmzYoJ49ezZkawDQ4JghBQADCgsLJUkPPvigbDbbadsffPBBrzoA8GcEUgAwICkpSZI0ePBg/fjjj5oxY4ZGjRqlGTNm6Mcff9Qf/vAHrzoA8GcEUgAwoEePHmrTpo2mTp0qm82mzp07q1u3burcubNsNpuysrKUmprKRU0AmgTWkAKAAYGBgZo+fbpuvvlmRUZGqqKiwrMtNDRUlZWVWrJkidd6UgDwV8yQAoBBbrdblZWVXmOVlZVeV9oDgL+zuX3wV8/pdMput8vhcCgyMtJ0OwBQZy6XS0lJSTp48KBuvPFGhYWF6ciRI4qOjlZ5ebn++c9/Kj4+XgUFBcySAvBJdclrnLIHAANycnJ08OBBtWjRQh9++KFcLpdnW2BgoFq0aKH8/Hzl5OSod+/eBjsFgPrHKXsAMCAnJ0eSlJ+frwsvvFDz5s1TYWGh5s2bpwsvvFD5+fledQDgz5ghBQADjh07JkmKjo7W/v37FRR0/Of47rvv1tChQxUXF6eSkhJPHQD4MwIpABhQUlIiSYqJiZHb7VZOTo4KCwuVlJSk7t27KzY2ViUlJZ46APBnBFIAMCAg4PiKqR07dpzxtk8nPp+oAwB/xi8dABjQvn17z/uqqiqvbdXV1WesAwB/RSAFAAPuvfdeSVJQUJBSUlK8tqWkpHjWlJ6oAwB/RiAFAAM2bNgg6fjFTXv37vXatmfPHs/FTCfqAMCfEUgBwIDCwkLP+1OfT3Ly55PrAMBfcVETABgQHx/v9blly5aei5n2799/1joA8EcEUgAwoLy83OvzySH0XHUA4I84ZQ8ABjz++OOW1gGALyOQAoABP/zwg6V1AODLOGUPAAZccMEFnvfx8fG688479V//9V/auXOnXnnlFR04cOC0OgDwV8yQAoABbdq08bzv3LmzqqurtWXLFlVXV6tz585nrAMAf8UMKQAYUFRU5Hm/atUqrVq1qtY6APBXzJACgAEtW7a0tA4AfBmBFAAM6N69u+d9TEyMkpOTFRUVpeTkZMXExJyxDgD8FafsAcCAgICf5gMOHz7seV9SUnLWOgDwV/zSAYABe/bssbQOAHyZ5YHU5XJp8uTJSk1NVVhYmNq2bau//e1vXs9mdrvdevTRR5WUlKSwsDClpaVp+/btVrcCAI1Wq1atJJ19BvTE+Ik6APBnlp+yf+aZZzRnzhwtWrRIl1xyib744gsNGzZMdrtdDzzwgCRp2rRpmjVrlhYtWqTU1FRNnjxZ6enp+uabbxQaGmp1SwDQaNXU1Cg2NlZRUVEqLy9XWFiYSkpKuCE+gCbF8kC6bt06DRgwQP369ZN0/B56b7zxhjZu3Cjp+OzoCy+8oEceeUQDBgyQJL3yyitKSEjQ8uXLNXjwYKtbAoBGZ+fOnZ73P/zww1kD6Ml1AOCvLD9l361bN61evVrfffedJOnf//63PvvsM/Xt21eStGvXLhUVFSktLc3zHbvdri5duigvL8/qdgCgUTrf+4tyH1IATYHlM6QTJkyQ0+lUhw4dFBgYKJfLpaeeekpDhgyR9NOPa0JCgtf3EhISzvrDW1lZqcrKSs9np9NpddsA0KDi4uI870NDQ1VRUXHGzyfXAYC/snyG9H//93+1ePFivf766/rXv/6lRYsW6bnnntOiRYt+9j6zsrJkt9s9r5SUFAs7BoCG9/XXX3venxxGT/18ch0A+CvLA+m4ceM0YcIEDR48WJ06ddIdd9yhMWPGKCsrS5KUmJgoSSouLvb6XnFxsWfbqSZOnCiHw+F57du3z+q2AaBBlZeXW1oHAL7M8kD6448/nnYbk8DAQNXU1EiSUlNTlZiYqNWrV3u2O51ObdiwQV27dj3jPkNCQhQZGen1AgAAgH+wfA3pTTfdpKeeekqtWrXSJZdcoi+//FLPP/+8MjMzJUk2m02jR4/Wk08+qfbt23tu+5ScnKyBAwda3Q4ANEp2u93SOgDwZZYH0hdffFGTJ0/WiBEjdODAASUnJ+vee+/Vo48+6qkZP368ysrKdM8996ikpETXXHONsrOzuQcpgCajqqrK0joA8GU298mPUPIRTqdTdrtdDoeD0/cAfFLv3r31ySef1Fp3/fXXey1xAgBfUZe8xrPsAcCA2NhYS+sAwJcRSAHAgPN9NCiPEAXQFBBIAcCAAwcOWFoHAL6MQAoABjRv3tzz3mazeW07+fPJdQDgrwikAGDApZde6nl/6rWlJ38+uQ4A/BWBFAAMONuDQH5uHQD4MgIpABhw5MgRS+sAwJcRSAHAgOjoaEvrAMCXEUgBwIB169Z53gcHB6t9+/aeV3Bw8BnrAMBfWf7oUABA7T777DPP+6qqKm3fvr3WOgDwV8yQAoAB3IcUAH5CIAUAA+Lj4y2tAwBfRiAFAANSUlIsrQMAX0YgBQADvv32W0vrAMCXEUgBwIDDhw9bWgcAvoxACgAGBAWd301OzrcOAHwZgRQADIiIiLC0DgB8GYEUAAAARhFIAcCA0tJSS+sAwJcRSAHAAJfLZWkdAPgyAikAGBAYGGhpHQD4MgIpABhQUlJiaR0A+DICKQAAAIwikAIAAMAoAikAGBAdHW1pHQD4MgIpABhAIAWAnxBIAcCAgIDz+/k93zoA8GX80gGAATabzdI6APBlBFIAMKB9+/aW1gGALyOQAoABhw8ftrQOAHwZgRQADPjhhx8srQMAX0YgBQADmjVrZmkdAPgyAikAGNC6dWtL6wDAlxFIAcAA1pACwE8IpABgwO7duy2tAwBfRiAFAAO4DykA/IRACgAGOBwOS+sAwJcRSAHAgPLyckvrAMCXEUgBAABgFIEUAAxgDSkA/IRACgAGhIaGWloHAL6MQAoABgQHB1taBwC+jEAKAAZwyh4AfkIgBQADSkpKLK0DAF9GIAUAAIBRBFIAAAAYRSAFAAMCAwMtrQMAX0YgBQADmjVrZmkdAPgyAikAGFBVVWVpHQD4MgIpABjgdrstrQMAX0YgBQADWEMKAD8hkAKAAYmJiZbWAYAvI5ACgAE//PCDpXUA4MsIpABgQHl5uaV1AODLCKQAAAAwikAKAAAAowikAAAAMIpACgAAAKMIpAAAADCKQAoAAACjCKQAAAAwikAKAAAAowikAAAAMIpACgAAAKMIpAAAADCKQAoAAACjCKQAAAAwikAKAAAAowikAAAAMIpACgAAAKMIpAAAADCqXgJpfn6+br/9dsXGxiosLEydOnXSF1984dnudrv16KOPKikpSWFhYUpLS9P27dvroxUAAAA0cpYH0iNHjqh79+5q1qyZPvjgA33zzTeaPn26oqOjPTXTpk3TrFmzNHfuXG3YsEHh4eFKT09XRUWF1e0AAACgkbO53W63lTucMGGCPv/8c+Xm5p5xu9vtVnJysh5++GGNHTtWkuRwOJSQkKCFCxdq8ODBtR7D6XTKbrfL4XAoMjLSyvYBoEHYbLbzrrX4ZxoAGkRd8prlM6QrVqzQlVdeqVtuuUXx8fG6/PLLNW/ePM/2Xbt2qaioSGlpaZ4xu92uLl26KC8vz+p2AAAA0MhZHkh37typOXPmqH379vrwww9133336YEHHtCiRYskSUVFRZKkhIQEr+8lJCR4tp2qsrJSTqfT6wUAAAD/EGT1DmtqanTllVdq6tSpkqTLL79cW7du1dy5c3XXXXf9rH1mZWVpypQpVrYJAACARsLyGdKkpCRdfPHFXmMXXXSR9u7dK0lKTEyUJBUXF3vVFBcXe7adauLEiXI4HJ7Xvn37rG4bAAAAhlgeSLt3765t27Z5jX333Xdq3bq1JCk1NVWJiYlavXq1Z7vT6dSGDRvUtWvXM+4zJCREkZGRXi8AAAD4B8tP2Y8ZM0bdunXT1KlTdeutt2rjxo16+eWX9fLLL0s6fmXp6NGj9eSTT6p9+/ZKTU3V5MmTlZycrIEDB1rdDgAAABo5ywPpVVddpWXLlmnixIl64oknlJqaqhdeeEFDhgzx1IwfP15lZWW65557VFJSomuuuUbZ2dkKDQ21uh0AAAA0cpbfh7QhcB9SAL6O+5AC8HdG70MKAAAA1AWBFAAAAEYRSAEAAGAUgRQAAABGEUgBAABgFIEUAAAARhFIAQAAYBSBFAAAAEYRSAEAAGAUgRQAAABGEUgBAABgFIEUAAAARhFIAQAAYBSBFAAAAEYRSAEAAGAUgRQAAABGEUgBAABgFIEUAAAARhFIAQAAYBSBFAAAAEYRSAEAAGAUgRQAAABGEUgBAABgFIEUAAAARhFIAQAAYBSBFAAAAEYRSAEAAGAUgRQAAABGEUgBAABgFIEUAAAARhFIAQAAYBSBFAAAAEYRSAEAAGAUgRQAAABGEUgBAABgFIEUAAAARhFIAQAAYBSBFAAAAEYRSAEAAGAUgRQAAABGEUgBAABgFIEUAAAARhFIAQAAYBSBFAAAAEYRSAEAAGAUgRQAAABGEUgBAABgFIEUAAAARhFIAQAAYBSBFAAAAEYRSAEAAGAUgRQAAABGEUgBAABgFIEUAAAARhFIAQAAYBSBFAAAAEYRSAEAAGAUgRQAAABGEUgBAABgFIEUAAAARhFIAQAAYFSQ6QYAwFeUV7m042Bpgx93a77Dkv20jYtQWHCgJfsCACsRSAHgPO04WKr+L37W4Me16pjv3X+NOrawW7IvALCSze12u003UVdOp1N2u10Oh0ORkZGm2wHQRFg5Q9qpZdR51361v8SSYzJDCqAh1SWvMUMKAOcpLDjQshnGFi1aKD8//7zqmNUE4O+4qAkADNi/f7+ldQDgywikAGBIbSumfHBFFQD8LARSADDI7XarRYsWXmMtWrQgjAJoUgikAGDY/v379dX+ErX+y3v6an8Jp+kBNDkEUgAAABhFIAUAAIBR9R5In376adlsNo0ePdozVlFRoZEjRyo2NlYRERHKyMhQcXFxfbcCAACARqheA+mmTZv03//937r00ku9xseMGaOVK1fq7bff1tq1a1VQUKBBgwbVZysAAABopOotkJaWlmrIkCGaN2+eoqOjPeMOh0Pz58/X888/r+uvv15XXHGFFixYoHXr1mn9+vX11Q4AAAAaqXoLpCNHjlS/fv2UlpbmNb5582ZVV1d7jXfo0EGtWrVSXl7eGfdVWVkpp9Pp9QIAAIB/qJdHh7755pv617/+pU2bNp22raioSMHBwYqKivIaT0hIUFFR0Rn3l5WVpSlTptRHqwAAADDM8hnSffv26cEHH9TixYsVGhpqyT4nTpwoh8Phee3bt8+S/QIAAMA8ywPp5s2bdeDAAf3mN79RUFCQgoKCtHbtWs2aNUtBQUFKSEhQVVWVSkpKvL5XXFysxMTEM+4zJCREkZGRXi8AAAD4B8tP2ffu3VtfffWV19iwYcPUoUMH/eUvf1FKSoqaNWum1atXKyMjQ5K0bds27d27V127drW6HQAAADRylgfS5s2bq2PHjl5j4eHhio2N9YwPHz5cDz30kGJiYhQZGan7779fXbt21dVXX211OwAAAGjk6uWiptrMmDFDAQEBysjIUGVlpdLT0/XSSy+ZaAUAAACGNUggzcnJ8focGhqq2bNna/bs2Q1xeAAAADRiPMseAAAARhFIAQAAYBSBFAAAAEYRSAEAAGAUgRQAAABGEUgBAABgFIEUAAAARhFIAQAAYBSBFAAAAEYRSAEAAGAUgRQAAABGEUgBAABgFIEUAAAARhFIAQAAYBSBFAAAAEYRSAEAAGAUgRQAAABGEUgBAABgFIEUAAAARhFIAQAAYBSBFAAAAEYRSAEAAGAUgRQAAABGEUgBAABgFIEUAAAARhFIAQAAYFSQ6QYAwGq7DpWprPKY6Tbq5PsDpV7/9DXhIUFKvTDcdBsAfBSBFIBf2XWoTL2eyzHdxs82+q0tplv42daM7UkoBfCzEEgB+JUTM6Mv/KGz2sVHGO7m/FVUu7T/SLlaRocptFmg6Xbq5PsDpRr91hafm5UG0HgQSAH4pXbxEerYwm66jTq5so3pDgDADC5qAgAAgFEEUgAAABhFIAUAAIBRBFIAAAAYRSAFAACAUQRSAAAAGEUgBQAAgFEEUgAAABhFIAUAAIBRBFIAAAAYRSAFAACAUQRSAAAAGEUgBQAAgFEEUgAAABgVZLoBALCaLcipXc5tCgiNMN1Kk7DLWSpbkNN0GwB8GIEUgN9pFrVBkzZONd1Gk9IsqrekG023AcBHEUgB+J3qki6a3u82tY1nhrQh7DhQqgcW7zDdBgAfRiAF4HfcxyKVGvlrXRxrN91Kk1BT4ZD72EHTbQDwYVzUBAAAAKMIpAAAADCKQAoAAACjCKQAAAAwikAKAAAAowikAAAAMIpACgAAAKMIpAAAADCKQAoAAACjeFITAL9SXu2SJG3NdxjupG4qql3af6RcLaPDFNos0HQ7dfL9gVLTLQDwcQRSAH5lx/8PRxOWfmW4k6YnPIT/pAD4efj1AOBXbrgkUZLUNj5CYT400/j9gVKNfmuLXvhDZ7WLjzDdTp2FhwQp9cJw020A8FEEUgB+JSY8WIN/28p0Gz9bu/gIdWxhN90GADQoLmoCAACAUQRSAAAAGEUgBQAAgFEEUgAAABhFIAUAAIBRBFIAAAAYRSAFAACAUQRSAAAAGEUgBQAAgFEEUgAAABhleSDNysrSVVddpebNmys+Pl4DBw7Utm3bvGoqKio0cuRIxcbGKiIiQhkZGSouLra6FQAAAPgAywPp2rVrNXLkSK1fv14fffSRqqurdcMNN6isrMxTM2bMGK1cuVJvv/221q5dq4KCAg0aNMjqVgAAAOADgqzeYXZ2ttfnhQsXKj4+Xps3b9a1114rh8Oh+fPn6/XXX9f1118vSVqwYIEuuugirV+/XldffbXVLQEAAKARq/c1pA6HQ5IUExMjSdq8ebOqq6uVlpbmqenQoYNatWqlvLy8M+6jsrJSTqfT6wUAAAD/UK+BtKamRqNHj1b37t3VsWNHSVJRUZGCg4MVFRXlVZuQkKCioqIz7icrK0t2u93zSklJqc+2AQAA0IDqNZCOHDlSW7du1ZtvvvmL9jNx4kQ5HA7Pa9++fRZ1CAAAANMsX0N6wqhRo/Tee+/p008/VcuWLT3jiYmJqqqqUklJidcsaXFxsRITE8+4r5CQEIWEhNRXqwAAADDI8hlSt9utUaNGadmyZfrkk0+Umprqtf2KK65Qs2bNtHr1as/Ytm3btHfvXnXt2tXqdgAAANDIWT5DOnLkSL3++ut699131bx5c8+6ULvdrrCwMNntdg0fPlwPPfSQYmJiFBkZqfvvv19du3blCnsAAIAmyPJAOmfOHElSz549vcYXLFigoUOHSpJmzJihgIAAZWRkqLKyUunp6XrppZesbgUAAAA+wPJA6na7a60JDQ3V7NmzNXv2bKsPDwAAAB/Ds+wBAABgFIEUAAAARhFIAQAAYBSBFAAAAEYRSAEAAGAUgRQAAABGEUgBAABgFIEUAAAARhFIAQAAYBSBFAAAAEYRSAEAAGAUgRQAAABGEUgBAABgFIEUAAAARhFIAQAAYBSBFAAAAEYRSAEAAGAUgRQADHO5XNq0Lldl36zVpnW5crlcplsCgAZFIAUAg5YuXarWrVsr89abdGjls8q89Sa1bt1aS5cuNd0aADQYAikAGLJ06VJlZGQoPz/fazw/P18ZGRmEUgBNhs3tdrtNN1FXTqdTdrtdDodDkZGRptsBgDpzuVwKCgqqte7YsWMKDAxsgI4AwFp1yWu1/xoCACRJ5VUu7ThYasm+Pnx/xXnVvTD/daX3+50lx2wbF6GwYMItgMaHGVIAOE9b8x3q/+Jnluxrz7O/l2qqay8MaKbW45ZZcsz37r9GHVvYLdkXANSmLnmNQAoA58nKGdJOLaM87+3RMRo1frLaXnGddmxeq79P+5scRw57tn+1v8SSYzJDCqAhEUgBoJGz2Wye95WVlQoODvZ8rqqqUkhIiOezD/5MA0Cd8hpX2QOAYb/73e+Ul5eno0ePKi8vT7/7nTVrRgHAVzBDCgAG2O12OZ3OWusiIyPlcDgaoCMAsBYzpADQyI0bN87SOgDwZcyQAoABp64TPZtT15cCgK9ghhQAGrng4OBaZz/HjRtHGAXQJHBjfAAwZNq0aZKkZ5991mvcZrNp7Nixnu0A4O84ZQ8AhlVVVemll17Sjh071LZtW40YMYKZUQA+j/uQAgAAwCjWkAIAAMBnEEgBAABgFIEUAAAARhFIAQAAYBSBFAAAAEYRSAEAAGAUgRQAAABGEUgBAABgFIEUAAAARvnks+xPPFzK6XQa7gQAAABnciKnnc9DQX0ykB49elSSlJKSYrgTAAAAnMvRo0dlt9vPWeOTz7KvqalRQUGBmjdvLpvNZrodAPjFnE6nUlJStG/fvlqf+QwAvsDtduvo0aNKTk5WQMC5V4n6ZCAFAH/jdDplt9vlcDgIpACaHC5qAgAAgFEEUgAAABhFIAWARiAkJESPPfaYQkJCTLcCAA2ONaQAAAAwihlSAAAAGEUgBQAAgFEEUgAAABhFIAUAAIBRBFIAqEdDhw6VzWbzvGJjY9WnTx/95z//8dSc2LZ+/Xqv71ZWVio2NlY2m005OTle9cuXL2+gvwAA6h+BFADqWZ8+fVRYWKjCwkKtXr1aQUFB6t+/v1dNSkqKFixY4DW2bNkyRURENGSrAGAEgRQA6llISIgSExOVmJiozp07a8KECdq3b58OHjzoqbnrrrv05ptvqry83DP2j3/8Q3fddZeJlgGgQRFIAaABlZaW6rXXXlO7du0UGxvrGb/iiivUpk0bvfPOO5KkvXv36tNPP9Udd9xhqlUAaDAEUgCoZ++9954iIiIUERGh5s2ba8WKFXrrrbcUEOD9E5yZmal//OMfkqSFCxfqxhtvVFxcnImWAaBBEUgBoJ716tVLW7Zs0ZYtW7Rx40alp6erb9++2rNnj1fd7bffrry8PO3cuVMLFy5UZmamoY4BoGERSAGgnoWHh6tdu3Zq166drrrqKv3P//yPysrKNG/ePK+62NhY9e/fX8OHD1dFRYX69u1rqGMAaFgEUgBoYDabTQEBAV4XMJ2QmZmpnJwc3XnnnQoMDDTQHQA0vCDTDQCAv6usrFRRUZEk6ciRI/r73/+u0tJS3XTTTafV9unTRwcPHlRkZGRDtwkAxhBIAaCeZWdnKykpSZLUvHlzdejQQW+//bZ69ux5Wq3NZtOFF17YwB0CgFk2t9vtNt0EAAAAmi7WkAIAAMAoAikAAACMIpACAADAKAIpAAAAjCKQAgAAwCgCKQAAAIwikAIAAMAoAikAAACMIpACAADAKAIpAAAAjCKQAgAAwCgCKQAAAIz6f8MMEl7n2MmdAAAAAElFTkSuQmCC",
"text/plain": [
"<Figure size 800x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(8, 5))\n",
"df[\"BMI\"].plot(kind = \"box\", title='Ящик с усами')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 800x500 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(8, 5))\n",
"df[['AgeCategory', 'BMI']].plot(kind='area', alpha=0.2, title='Area Plot (AgeCategory, BMI)')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: xlabel='BMI', ylabel='WeightInKilograms'>"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df.plot.scatter(x=\"BMI\", y=\"WeightInKilograms\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 800x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(8, 5))\n",
"df['AgeCategory'].value_counts().plot(kind='pie', autopct='%1.1f%%', title='Pie Chart (AgeCategory)')\n",
"plt.show()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}