MAI_PIbd-33_Tikhonenkov_A_E/lab1.ipynb

627 lines
420 KiB
Plaintext
Raw Permalink Normal View History

2024-11-22 22:56:37 +04:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Работа с Pandas DataFrame\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"https://pandas.pydata.org/docs/user_guide/10min.html"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Работа с данными - чтение и запись CSV"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.read_csv(\"data/healthcare-dataset-stroke-data.csv\", index_col=\"id\")\n",
"\n",
"df.to_csv(\"lab1.csv\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Работа с данными - основные команды"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 5110 entries, 9046 to 44679\n",
"Data columns (total 11 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 gender 5110 non-null object \n",
" 1 age 5110 non-null float64\n",
" 2 hypertension 5110 non-null int64 \n",
" 3 heart_disease 5110 non-null int64 \n",
" 4 ever_married 5110 non-null object \n",
" 5 work_type 5110 non-null object \n",
" 6 Residence_type 5110 non-null object \n",
" 7 avg_glucose_level 5110 non-null float64\n",
" 8 bmi 4909 non-null float64\n",
" 9 smoking_status 5110 non-null object \n",
" 10 stroke 5110 non-null int64 \n",
"dtypes: float64(3), int64(3), object(5)\n",
"memory usage: 479.1+ KB\n",
" count mean std min 25% 50% \\\n",
"age 5110.0 43.226614 22.612647 0.08 25.000 45.000 \n",
"hypertension 5110.0 0.097456 0.296607 0.00 0.000 0.000 \n",
"heart_disease 5110.0 0.054012 0.226063 0.00 0.000 0.000 \n",
"avg_glucose_level 5110.0 106.147677 45.283560 55.12 77.245 91.885 \n",
"bmi 4909.0 28.893237 7.854067 10.30 23.500 28.100 \n",
"stroke 5110.0 0.048728 0.215320 0.00 0.000 0.000 \n",
"\n",
" 75% max \n",
"age 61.00 82.00 \n",
"hypertension 0.00 1.00 \n",
"heart_disease 0.00 1.00 \n",
"avg_glucose_level 114.09 271.74 \n",
"bmi 33.10 97.60 \n",
"stroke 0.00 1.00 \n",
" gender age hypertension heart_disease avg_glucose_level bmi \\\n",
"id \n",
"9046 Male 67.0 0 1 228.69 36.6 \n",
"51676 Female 61.0 0 0 202.21 NaN \n",
"31112 Male 80.0 0 1 105.92 32.5 \n",
"60182 Female 49.0 0 0 171.23 34.4 \n",
"1665 Female 79.0 1 0 174.12 24.0 \n",
"\n",
" smoking_status stroke \n",
"id \n",
"9046 formerly smoked 1 \n",
"51676 never smoked 1 \n",
"31112 never smoked 1 \n",
"60182 smokes 1 \n",
"1665 never smoked 1 \n",
" gender age hypertension heart_disease avg_glucose_level bmi \\\n",
"id \n",
"18234 Female 80.0 1 0 83.75 NaN \n",
"44873 Female 81.0 0 0 125.20 40.0 \n",
"19723 Female 35.0 0 0 82.99 30.6 \n",
"37544 Male 51.0 0 0 166.29 25.6 \n",
"44679 Female 44.0 0 0 85.28 26.2 \n",
"\n",
" smoking_status stroke \n",
"id \n",
"18234 never smoked 0 \n",
"44873 never smoked 0 \n",
"19723 never smoked 0 \n",
"37544 formerly smoked 0 \n",
"44679 Unknown 0 \n",
" gender age hypertension heart_disease avg_glucose_level bmi \\\n",
"id \n",
"72369 Female 14.0 0 0 65.41 19.5 \n",
"3135 Female 73.0 0 0 69.35 NaN \n",
"563 Female 41.0 0 0 216.71 36.2 \n",
"19364 Female 7.0 0 0 74.96 18.8 \n",
"55459 Female 60.0 0 0 91.82 28.3 \n",
"\n",
" smoking_status stroke \n",
"id \n",
"72369 Unknown 0 \n",
"3135 never smoked 0 \n",
"563 never smoked 0 \n",
"19364 Unknown 0 \n",
"55459 formerly smoked 0 \n",
" gender age hypertension heart_disease avg_glucose_level bmi \\\n",
"id \n",
"33622 Male 62.0 1 0 211.49 41.1 \n",
"51554 Male 42.0 0 0 177.91 NaN \n",
"2296 Male 78.0 1 0 90.19 NaN \n",
"13602 Male 73.0 1 0 102.06 NaN \n",
"56156 Other 26.0 0 0 143.33 22.4 \n",
"\n",
" smoking_status stroke \n",
"id \n",
"33622 Unknown 0 \n",
"51554 Unknown 0 \n",
"2296 Unknown 0 \n",
"13602 Unknown 0 \n",
"56156 formerly smoked 0 \n"
]
}
],
"source": [
"df.info()\n",
"\n",
"print(df.describe().transpose())\n",
"\n",
"cleared_df = df.drop([\"ever_married\", \"work_type\", \"Residence_type\"], axis=1)\n",
"print(cleared_df.head())\n",
"print(cleared_df.tail())\n",
"\n",
"sorted_df = cleared_df.sort_values(by=\"gender\")\n",
"print(sorted_df.head())\n",
"print(sorted_df.tail())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Работа с данными - работа с элементами"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id\n",
"9046 67.0\n",
"51676 61.0\n",
"31112 80.0\n",
"60182 49.0\n",
"1665 79.0\n",
" ... \n",
"18234 80.0\n",
"44873 81.0\n",
"19723 35.0\n",
"37544 51.0\n",
"44679 44.0\n",
"Name: age, Length: 5110, dtype: float64\n",
"gender Male\n",
"age 62.0\n",
"hypertension 0\n",
"heart_disease 0\n",
"ever_married Yes\n",
"work_type Private\n",
"Residence_type Rural\n",
"avg_glucose_level 107.61\n",
"bmi 31.3\n",
"smoking_status Unknown\n",
"stroke 0\n",
"Name: 63864, dtype: object\n",
"Rural\n"
]
},
{
"ename": "KeyError",
"evalue": "\"['Возраст'] not in index\"",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[13], line 7\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(df\u001b[38;5;241m.\u001b[39mloc[\u001b[38;5;241m63864\u001b[39m])\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(df\u001b[38;5;241m.\u001b[39mloc[\u001b[38;5;241m63864\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mResidence_type\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m----> 7\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloc\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m63864\u001b[39;49m\u001b[43m:\u001b[49m\u001b[38;5;241;43m63898\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mВозраст\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mResidence_type\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m]\u001b[49m)\n\u001b[0;32m 9\u001b[0m \u001b[38;5;28mprint\u001b[39m(df[\u001b[38;5;241m0\u001b[39m:\u001b[38;5;241m3\u001b[39m])\n\u001b[0;32m 11\u001b[0m \u001b[38;5;28mprint\u001b[39m(df\u001b[38;5;241m.\u001b[39miloc[\u001b[38;5;241m0\u001b[39m])\n",
"File \u001b[1;32md:\\Users\\Leo\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\mai-S9i2J6c7-py3.12\\Lib\\site-packages\\pandas\\core\\indexing.py:1184\u001b[0m, in \u001b[0;36m_LocationIndexer.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 1182\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_is_scalar_access(key):\n\u001b[0;32m 1183\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39m_get_value(\u001b[38;5;241m*\u001b[39mkey, takeable\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_takeable)\n\u001b[1;32m-> 1184\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_tuple\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1185\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 1186\u001b[0m \u001b[38;5;66;03m# we by definition only have the 0th axis\u001b[39;00m\n\u001b[0;32m 1187\u001b[0m axis \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxis \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n",
"File \u001b[1;32md:\\Users\\Leo\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\mai-S9i2J6c7-py3.12\\Lib\\site-packages\\pandas\\core\\indexing.py:1377\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_tuple\u001b[1;34m(self, tup)\u001b[0m\n\u001b[0;32m 1374\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_multi_take_opportunity(tup):\n\u001b[0;32m 1375\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_multi_take(tup)\n\u001b[1;32m-> 1377\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_tuple_same_dim\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtup\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32md:\\Users\\Leo\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\mai-S9i2J6c7-py3.12\\Lib\\site-packages\\pandas\\core\\indexing.py:1020\u001b[0m, in \u001b[0;36m_LocationIndexer._getitem_tuple_same_dim\u001b[1;34m(self, tup)\u001b[0m\n\u001b[0;32m 1017\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m com\u001b[38;5;241m.\u001b[39mis_null_slice(key):\n\u001b[0;32m 1018\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[1;32m-> 1020\u001b[0m retval \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mretval\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_axis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mi\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1021\u001b[0m \u001b[38;5;66;03m# We should never have retval.ndim < self.ndim, as that should\u001b[39;00m\n\u001b[0;32m 1022\u001b[0m \u001b[38;5;66;03m# be handled by the _getitem_lowerdim call above.\u001b[39;00m\n\u001b[0;32m 1023\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m retval\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mndim\n",
"File \u001b[1;32md:\\Users\\Leo\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\mai-S9i2J6c7-py3.12\\Lib\\site-packages\\pandas\\core\\indexing.py:1420\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_axis\u001b[1;34m(self, key, axis)\u001b[0m\n\u001b[0;32m 1417\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(key, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mndim\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m key\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m 1418\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot index with multidimensional key\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m-> 1420\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_iterable\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1422\u001b[0m \u001b[38;5;66;03m# nested tuple slicing\u001b[39;00m\n\u001b[0;32m 1423\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_nested_tuple(key, labels):\n",
"File \u001b[1;32md:\\Users\\Leo\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\mai-S9i2J6c7-py3.12\\Lib\\site-packages\\pandas\\core\\indexing.py:1360\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_iterable\u001b[1;34m(self, key, axis)\u001b[0m\n\u001b[0;32m 1357\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_key(key, axis)\n\u001b[0;32m 1359\u001b[0m \u001b[38;5;66;03m# A collection of keys\u001b[39;00m\n\u001b[1;32m-> 1360\u001b[0m keyarr, indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_listlike_indexer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1361\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39m_reindex_with_indexers(\n\u001b[0;32m 1362\u001b[0m {axis: [keyarr, indexer]}, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, allow_dups\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m 1363\u001b[0m )\n",
"File \u001b[1;32md:\\Users\\Leo\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\mai-S9i2J6c7-py3.12\\Lib\\site-packages\\pandas\\core\\indexing.py:1558\u001b[0m, in \u001b[0;36m_LocIndexer._get_listlike_indexer\u001b[1;34m(self, key, axis)\u001b[0m\n\u001b[0;32m 1555\u001b[0m ax \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39m_get_axis(axis)\n\u001b[0;32m 1556\u001b[0m axis_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39m_get_axis_name(axis)\n\u001b[1;32m-> 1558\u001b[0m keyarr, indexer \u001b[38;5;241m=\u001b[39m \u001b[43max\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_indexer_strict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1560\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m keyarr, indexer\n",
"File \u001b[1;32md:\\Users\\Leo\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\mai-S9i2J6c7-py3.12\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:6200\u001b[0m, in \u001b[0;36mIndex._get_indexer_strict\u001b[1;34m(self, key, axis_name)\u001b[0m\n\u001b[0;32m 6197\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 6198\u001b[0m keyarr, indexer, new_indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reindex_non_unique(keyarr)\n\u001b[1;32m-> 6200\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raise_if_missing\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkeyarr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 6202\u001b[0m keyarr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtake(indexer)\n\u001b[0;32m 6203\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, Index):\n\u001b[0;32m 6204\u001b[0m \u001b[38;5;66;03m# GH 42790 - Preserve name from an Index\u001b[39;00m\n",
"File \u001b[1;32md:\\Users\\Leo\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\mai-S9i2J6c7-py3.12\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:6252\u001b[0m, in \u001b[0;36mIndex._raise_if_missing\u001b[1;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[0;32m 6249\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNone of [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m] are in the [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00maxis_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 6251\u001b[0m not_found \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(ensure_index(key)[missing_mask\u001b[38;5;241m.\u001b[39mnonzero()[\u001b[38;5;241m0\u001b[39m]]\u001b[38;5;241m.\u001b[39munique())\n\u001b[1;32m-> 6252\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnot_found\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not in index\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[1;31mKeyError\u001b[0m: \"['Возраст'] not in index\""
]
}
],
"source": [
"print(df[\"age\"])\n",
"\n",
"print(df.loc[63864])\n",
"\n",
"print(df.loc[63864, \"Residence_type\"])\n",
"\n",
"print(df.loc[63864:63898, [\"age\", \"Residence_type\"]])\n",
"\n",
"print(df[0:3])\n",
"\n",
"print(df.iloc[0])\n",
"\n",
"print(df.iloc[3:5, 0:2])\n",
"\n",
"print(df.iloc[[3, 4], [0, 1]])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Работа с данными - отбор и группировка"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Male' 'Female' 'Other']\n",
"Male count = 2115\n",
"Female count = 2994\n",
"Other count = 1\n",
"Total count = 5110\n",
" bmi smoking_status Count\n",
"0 10.3 Unknown 1\n",
"1 11.3 Unknown 1\n",
"2 11.5 never smoked 1\n",
"3 12.0 Unknown 1\n",
"4 12.3 Unknown 1\n",
"... ... ... ...\n",
"1185 66.8 Unknown 1\n",
"1186 71.9 never smoked 1\n",
"1187 78.0 smokes 1\n",
"1188 92.0 never smoked 1\n",
"1189 97.6 Unknown 1\n",
"\n",
"[1190 rows x 3 columns]\n"
]
}
],
"source": [
"s_values = df[\"gender\"].unique()\n",
"print(s_values)\n",
"\n",
"s_total = 0\n",
"for s_value in s_values:\n",
" count = df[df[\"gender\"] == s_value].shape[0]\n",
" s_total += count\n",
" print(s_value, \"count =\", count)\n",
"print(\"Total count = \", s_total)\n",
"\n",
"print(df.groupby([\"bmi\", \"smoking_status\"]).size().reset_index(name=\"Count\")) # type: ignore"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Виртуализация - Исходные данные\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" age work_type smoking_status\n",
"id \n",
"9046 67.0 Private formerly smoked\n",
"51676 61.0 Self-employed never smoked\n",
"31112 80.0 Private never smoked\n",
"60182 49.0 Private smokes\n",
"1665 79.0 Self-employed never smoked\n",
"... ... ... ...\n",
"18234 80.0 Private never smoked\n",
"44873 81.0 Self-employed never smoked\n",
"19723 35.0 Self-employed never smoked\n",
"37544 51.0 Private formerly smoked\n",
"44679 44.0 Govt_job Unknown\n",
"\n",
"[5110 rows x 3 columns]\n"
]
}
],
"source": [
"data = df[[\"age\", \"work_type\", \"smoking_status\"]].copy()\n",
"data.dropna(subset=[\"smoking_status\"], inplace=True)\n",
"print(data)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Визуализация - Линейная диаграмма"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjYAAAHHCAYAAACskBIUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB5fElEQVR4nO3dd1QU198G8Gd3gV16laYUKwi2iA1R7BI1aiLGXqNGjS2mGVN+aooa38SSaGKNGsXYk9hbYsOKErvYEQuKSgeBZfe+fyAbV1AXBAaW53MOJ9nZmTtfLsPyOHPnjkwIIUBERERkBORSF0BERERUVBhsiIiIyGgw2BAREZHRYLAhIiIio8FgQ0REREaDwYaIiIiMBoMNERERGQ0GGyIiIjIaDDZERERkNBhsiKhEDBo0CFZWVlKXUeKio6Mhk8nw/fffv3C9ffv2QSaTYd++fSVTGJGRYrChMuvnn3+GTCZD48aNpS6l1NJoNHB3d4dMJsP27dulLqdEZGVlYc6cOXjttddgY2MDOzs7+Pv7491330VUVJTU5ZVaZ8+eRffu3eHl5QWVSoWKFSuiXbt2+Omnn/TWmzp1Kv78889C7+fChQuYPHkyoqOjX61goudgsKEyKywsDN7e3jh+/DiuXr0qdTml0j///IPY2Fh4e3sjLCxM6nJKRGhoKD788EPUqlUL06dPx5QpUxAcHIzt27fj6NGjUpf3XMHBwXj8+DGCg4NLfN+HDx9GgwYNcPr0aQwbNgxz587F0KFDIZfLMWfOHL11iyLYTJkyhcGGio2J1AUQFcaNGzdw+PBhbNy4EcOHD0dYWBgmTZpUojVotVpkZWVBpVKV6H4LYuXKlahfvz4GDhyIzz77DGlpabC0tJS6rGITERGBLVu24Ntvv8Vnn32m997cuXORmJgoTWEGkMvlkh1L3377LWxtbREREQE7Ozu99+Li4iSpiaiweMaGyqSwsDDY29ujU6dO6N69u97ZCLVaDQcHBwwePDjPdsnJyVCpVPjoo490yzIzMzFp0iRUq1YNSqUSHh4e+OSTT5CZmam3rUwmw+jRoxEWFgZ/f38olUrs2LEDAPD999+jadOmcHR0hLm5OQICArB+/fo8+3/8+DHGjh0LJycnWFtbo0uXLrhz5w5kMhkmT56st+6dO3fwzjvvwMXFBUqlEv7+/vj1118N7qPHjx/jjz/+QK9evdCjRw88fvwYf/31V77rrlu3Dn5+flCpVKhVqxb++OMPDBo0CN7e3nrrabVazJ49G/7+/lCpVHBxccHw4cORkJBgcF3Xr19HSEgILC0t4e7ujq+++gpCCACAEALe3t7o2rVrnu0yMjJga2uL4cOHP7fta9euAQCCgoLyvKdQKODo6Kh7PXnyZMhkMly+fBn9+vWDra0tKlSogC+//BJCCNy6dQtdu3aFjY0NXF1d8cMPP+RpMy4uDkOGDIGLiwtUKhXq1q2L5cuXv7QPhBB49913YWZmho0bNwLIf4xNy5YtUatWLVy4cAGtWrWChYUFKlasiBkzZuRp8+bNm+jSpQssLS3h7OyM8ePHY+fOnQaN27l27Rr8/f3zhBoAcHZ21v2/TCZDWloali9fDplMBplMhkGDBun2/95778HHxwfm5uZwdHTE22+/rXdmZtmyZXj77bcBAK1atdK1kVtffr8HAODt7a3bD5DzOz5lyhRUr14dKpUKjo6OaNasGXbv3v3C75PKCUFUBvn6+oohQ4YIIYQ4cOCAACCOHz+ue/+dd94RdnZ2IjMzU2+75cuXCwAiIiJCCCGERqMR7du3FxYWFuL9998XCxYsEKNHjxYmJiaia9euetsCEDVr1hQVKlQQU6ZMEfPmzRP//vuvEEKISpUqiffee0/MnTtXzJw5UzRq1EgAEFu2bNFro0ePHgKA6N+/v5g3b57o0aOHqFu3rgAgJk2apFvv3r17olKlSsLDw0N89dVX4pdffhFdunQRAMSsWbMM6qPVq1cLmUwmYmJihBBCtG7dWnTs2DHPelu2bBEymUzUqVNHzJw5U3z55ZfC3t5e1KpVS3h5eemtO3ToUGFiYiKGDRsm5s+fLyZMmCAsLS1Fw4YNRVZW1gvrGThwoFCpVKJ69eqif//+Yu7cueKNN94QAMSXX36pW+/zzz8Xpqam4tGjR3rbr127VgAQBw4ceO4+Dh8+LACIYcOGCbVa/cJ6Jk2aJACIevXqid69e4uff/5ZdOrUSQAQM2fOFD4+PmLkyJHi559/FkFBQQKA2L9/v2779PR0UbNmTWFqairGjx8vfvzxR9G8eXMBQMyePVu33o0bNwQA8X//939CCCGys7PFgAEDhFKp1Ds+9u7dKwCIvXv36pa1aNFCuLu7Cw8PDzFu3Djx888/i9atWwsAYtu2bbr1UlNTRZUqVYS5ubn49NNPxezZs0WjRo10x9bTbeanffv2wtraWpw9e/aF661YsUIolUrRvHlzsWLFCrFixQpx+PBhIYQQ69atE3Xr1hX/+9//xMKFC8Vnn30m7O3thZeXl0hLSxNCCHHt2jUxduxYAUB89tlnujbu3bsnhBB5fg9yeXl5iYEDB+pef/bZZ0Imk4lhw4aJRYsWiR9++EH07t1bTJ8+/YX1U/nAYENlzokTJwQAsXv3biGEEFqtVlSqVEmMGzdOt87OnTsFALF582a9bTt27CiqVKmie71ixQohl8vFwYMH9dabP3++ACAOHTqkWwZAyOVycf78+Tw1paen673OysoStWrVEq1bt9YtO3nypAAg3n//fb11Bw0alOcDfciQIcLNzU08fPhQb91evXoJW1vbPPvLzxtvvCGCgoJ0rxcuXChMTExEXFyc3nq1a9cWlSpVEikpKbpl+/btEwD0gs3BgwcFABEWFqa3/Y4dO/Jd/qyBAwcKAGLMmDG6ZVqtVnTq1EmYmZmJBw8eCCGEuHTpkgAgfvnlF73tu3TpIry9vYVWq33uPrRarWjRooUAIFxcXETv3r3FvHnzxM2bN/Osmxts3n33Xd2y7OxsUalSJSGTyfT+SCYkJAhzc3O9P66zZ88WAMTKlSt1y7KyskRgYKCwsrISycnJQgj9YKNWq0XPnj2Fubm52Llzp149zws2AMRvv/2mW5aZmSlcXV1FaGiobtkPP/wgAIg///xTt+zx48fC19fXoGCza9cuoVAohEKhEIGBgeKTTz4RO3fuzDesWlpa6vVDrvyOySNHjuSpf926dc+tydBgU7duXdGpU6cXfk9UfvFSFJU5YWFhcHFxQatWrQDknL7u2bMnVq9eDY1GAwBo3bo1nJycsGbNGt12CQkJ2L17N3r27Klbtm7dOtSsWRO+vr54+PCh7qt169YAgL179+rtu0WLFvDz88tTk7m5ud5+kpKS0Lx5c0RGRuqW5162eu+99/S2HTNmjN5rIQQ2bNiAzp07QwihV1dISAiSkpL02s3Po0ePsHPnTvTu3Vu3LDQ0FDKZDGvXrtUtu3v3Ls6ePYsBAwbo3YrdokUL1K5dW6/NdevWwdbWFu3atdOrKSAgAFZWVnn66nlGjx6t+//cy3tZWVnYs2cPAKBGjRpo3Lix3uXF+Ph4bN++HX379oVMJntu2zKZDDt37sQ333wDe3t7/P777xg1ahS8vLzQs2fPfMfYDB06VPf/CoUCDRo0gBACQ4YM0S23s7ODj48Prl+/rlu2bds2uLq66vWxqakpxo4di9TUVOzfv19vP1lZWXj77bexZcsWbNu2De3btzegtwArKyv069dP99rMzAyNGjXSq2XHjh2oWLEiunTpolumUqkwbNgwg/bRrl07HDlyBF26dMHp06cxY8YMhISEoGLFiti0aZNBbTz9O6BWq/Ho0SNUq1YNdnZ2Lz1eC8rOzg7nz5/HlStXirRdMg4MNlSmaDQarF69Gq1atcKNGzdw9epVXL16FY0bN8b9+/fx999/AwBMTEwQGhqKv/76SzdWZuPGjVCr1XrB5sqVKzh//jwqVKig91WjRg0AeQdOVq5cOd+6tmzZgiZNmkClUsHBwQEVKlTAL7/8gqSkJN06N2/ehFwuz9NGtWrV9F4
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"average_age = data.groupby(\"smoking_status\")[\"age\"].mean()\n",
"average_age.plot(\n",
" kind=\"line\",\n",
" marker=\"o\",\n",
" title=\"Average Age by Smoking Status\",\n",
" xlabel=\"Smoking Status\",\n",
" ylabel=\"Average Age\",\n",
")\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Визуализация - столбчатая диаграмма"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAAJOCAYAAACqS2TfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACdmklEQVR4nOzdd3xO9///8eeVLUhiZdk7sTWUoGYIoq1aNSqoVRUtSlFao1WqVdVSSmu0pS2Kqr1XxG4IxceOlpiV1MiQnN8ffrm+rsaWKxfJ43675dZe57zP+7zO5WQ8r/f7nGMyDMMQAAAAAABId3a2LgAAAAAAgMyK0A0AAAAAgJUQugEAAAAAsBJCNwAAAAAAVkLoBgAAAADASgjdAAAAAABYCaEbAAAAAAArIXQDAAAAAGAlhG4AAAAAAKyE0A0AyNQ6d+6sHDlyPLBd3bp1VbduXesXlIUVKVJEzZo1s3UZVle3bl2VK1fO1mUAAJ4ShG4AQLqKiopSq1atVLhwYbm4uCh//vxq2LChvvrqK1uX9tRKTEzUxIkTVblyZbm5ucnDw0Nly5ZVjx49dPjwYXO7bdu2acSIEbp69epj7+vrr7/WrFmznrzop9CFCxdkMpn09ttvp1n39ttvy2Qyafjw4WnWhYaGytHRUTdu3MiIMu9q48aNMplMD/UFAHi2ONi6AABA5rFt2zbVq1dPhQoVUvfu3eXt7a0zZ85o+/btmjhxovr06WPrEu9p9erVNtt3y5YttWLFCrVr107du3dXUlKSDh8+rKVLl6pGjRry8/OTdPv9HTlypDp37iwPD4/H2tfXX3+tvHnzqnPnzul3AE8JT09PlSxZUlu3bk2zLjw8XA4ODgoPD7/rusqVK8vV1TUjyrwrf39//fDDDxbLhgwZohw5cmjo0KE2qgoAkB4I3QCAdDN69Gi5u7tr165daULhhQsXbFPUQ3JycrLJfnft2qWlS5dq9OjReu+99yzWTZo06YlGtbOiWrVq6fvvv9e1a9fMlxVcv35d+/btU5s2bbRkyRIlJyfL3t5eknTu3DmdOHFCL7/88hPv+/r168qePftjbevl5aXXXnvNYtnYsWOVN2/eNMsBAM8WppcDANLN8ePHVbZs2buOwnp6elq8NplMCgsL0/z581WmTBlly5ZNgYGBioqKkiR98803KlGihFxcXFS3bl2dOnUqTZ/z589XQECAsmXLZg4nf//99wPrjIyMVL58+VS3bl1du3ZNUtprulOn+86bN0+jR49WgQIF5OLiogYNGujYsWNp+pw8ebKKFSumbNmy6fnnn9eWLVse6jrx48ePS5Jq1qyZZp29vb3y5MkjSRoxYoQGDhwoSSpatKh5qnHq+zJz5kzVr19fnp6ecnZ2VpkyZTRlyhSL/ooUKaKDBw9q06ZN5u1T6xsxYsRdpy7PmjXLYj+StHv3bgUHBytv3rzKli2bihYtqtdff/2+x3mn1atXq1KlSnJxcVGZMmW0cOFC87oTJ07IZDJpwoQJabbbtm2bTCaTfvrpp3v2XatWLSUnJ2v79u3mZTt27NCtW7c0YMAAXbt2TZGRkeZ1qSPftWrVMi97mPMq9V4Bx48fV9OmTZUzZ0516NDhvsfs6uqqdu3a6datW/d+c+7BMAwVKVLkrh8OxMfHy93dXT179pT0f+fuL7/8ovfee0/e3t7Knj27XnrpJZ05cybN9jt27FDjxo3l7u4uV1dX1alT564zAgAAj4fQDQBIN4ULF9aePXt04MCBh2q/ZcsWvfPOO+rUqZNGjBihQ4cOqVmzZpo8ebK+/PJLvfnmmxo4cKAiIiLShLpZs2apTZs2sre315gxY9S9e3ctXLhQtWrVuu/o8K5du1S/fn1VrlxZK1aseOBN1saOHatFixZpwIABGjJkiLZv354mXE2ZMkVhYWEqUKCAxo0bpxdeeEHNmzfXX3/99cD3oHDhwpKkOXPm3DeMtWjRQu3atZMkTZgwQT/88IN++OEH5cuXz1xD4cKF9d5772n8+PEqWLCg3nzzTU2ePNncxxdffKECBQrIz8/PvP2jTl2+cOGCGjVqpFOnTmnw4MH66quv1KFDB4uQez9Hjx7Vq6++qiZNmmjMmDFycHBQ69attWbNGklSsWLFVLNmTc2ZMyfNtnPmzFHOnDnvOyqdGp7vnGIeHh6uUqVKqXLlyipQoIBFoPxv6H6U8+rWrVsKDg6Wp6enPvvsM7Vs2fKuNS1dulQvvfSSWrdurR9//FEODo8+0dBkMum1117TihUrdOXKFYt1v//+u+Li4tKMiI8ePVrLli3ToEGD9NZbb2nNmjUKCgrSzZs3zW3Wr1+v2rVrKy4uTsOHD9fHH3+sq1evqn79+tq5c+cj1wkAuAsDAIB0snr1asPe3t6wt7c3AgMDjXfffddYtWqVkZiYmKatJMPZ2dk4efKkedk333xjSDK8vb2NuLg48/IhQ4YYksxtExMTDU9PT6NcuXLGzZs3ze2WLl1qSDI++OAD87JOnToZ2bNnNwzDMLZu3Wq4ubkZISEhRnx8vEU9derUMerUqWN+vWHDBkOS4e/vbyQkJJiXT5w40ZBkREVFGYZhGAkJCUaePHmMqlWrGklJSeZ2s2bNMiRZ9Hk3KSkpRp06dQxJhpeXl9GuXTtj8uTJxunTp9O0/fTTTy3ehzvduHEjzbLg4GCjWLFiFsvKli1715qGDx9u3O3PgpkzZ1rsc9GiRYYkY9euXfc9rrspXLiwIcn49ddfzctiY2MNHx8fo3LlyuZlqefBoUOHzMsSExONvHnzGp06dXrgfjw9PY0GDRqYXwcHBxtdunQxDMMw2rRpY7Ru3dq8rkqVKkbJkiXN+3iU80qSMXjw4DT7r1OnjlG2bFnDMAzj119/NRwdHY3u3bsbycnJD6z9Tv/9tzpy5IghyZgyZYpFu5deeskoUqSIkZKSYhjG/527+fPnt/g+mjdvniHJmDhxomEYt8+9kiVLGsHBweZtDeP2uVS0aFGjYcOGj1QvAODuGOkGAKSbhg0bKiIiQi+99JL27duncePGKTg4WPnz59eSJUvStG/QoIGKFClifl2tWjVJt28sljNnzjTLT5w4Ien29OYLFy7ozTfflIuLi7ldSEiI/Pz8tGzZsjT72rBhg4KDg9WgQQMtXLhQzs7OD3VMXbp0sbje+4UXXkhTy+XLl9W9e3eLEcwOHTooV65cD+zfZDJp1apV+uijj5QrVy799NNP6t27twoXLqxXX331oa/pzpYtm/n/Y2NjdenSJdWpU0cnTpxQbGzsQ/XxMFIvHVi6dKmSkpIeeXtfX1+98sor5tdubm4KDQ3VH3/8oZiYGElSmzZt5OLiYjHavWrVKl26dOmhrm+uWbOmduzYoeTkZKWkpGj79u2qUaOGeV3q6PaNGzcUGRlpHuV+nPOqV69e96zjp59+0quvvqqePXvqm2++kZ3dk/3ZVapUKVWrVs3ifbly5YpWrFihDh06pLk8IDQ01OL7qFWrVvLx8dHy5csl3b7M4ujRo2rfvr0uX76sS5cu6dKlS7p+/boaNGigzZs3KyUl5YlqBgAwvRwAkM6qVq2qhQsX6p9//tHOnTs1ZMgQ/fvvv2rVqpX+/PNPi7aFChWyeO3u7i5JKliw4F2X//PPP5Kk06dPS5JKly6dZv9+fn7m9ani4+MVEhKiypUra968eY9007T/1pgapP9bS4kSJSzaOTg4WHygcD/Ozs4aOnSoDh06pLNnz+qnn35S9erVNW/ePIWFhT1UH+Hh4QoKClL27Nnl4eGhfPnymW/Mlp6hu06dOmrZsqVGjhypvHnz6uWXX9bMmTOVkJDwUNuXKFEiTTgsVaqUJJmvG/fw8NCLL76ouXPnmtvMmTNH+fPnV/369R+4j1q1apmv3T5w4IBiY2PN18zXqFFDZ8+e1alTp8zXeqeG7kc9rxwcHFSgQIG71nDy5Em99tpratmypb766qt0e9RXaGiowsPDzbXMnz9fSUlJ6tixY5q2JUuWtHhtMplUokQJ8/t89OhRSVKnTp2UL18+i69vv/1WCQkJ6XruAEBWRegGAFi
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"pivot_table = data.groupby([\"work_type\", \"smoking_status\"]).size().unstack()\n",
"\n",
"pivot_table.plot(kind=\"bar\", stacked=True, figsize=(10, 6))\n",
"\n",
"plt.title(\"Smoking Status by Work Type\")\n",
"plt.xlabel(\"Work Type\")\n",
"plt.ylabel(\"Count\")\n",
"plt.xticks(rotation=45)\n",
"plt.legend(title=\"Smoking Status\")\n",
"plt.grid(axis='y')\n",
"plt.tight_layout() \n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Визуализация - Гистограмма"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAHHCAYAAABZbpmkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABCK0lEQVR4nO3de1xUdeL/8fcAw1WBRAFdQS0tZNU0r6RZJkFGbSZdTE0yNzfCMrG23C1Ty0xb7baabT/DeiRZbtaWa5qX0krKyxaVGmq6YcolVEDuA3N+f7TOtwktxIFhjq/n48Hj4XzOZ855nzkp786cM2MxDMMQAACASXm5OwAAAEBTouwAAABTo+wAAABTo+wAAABTo+wAAABTo+wAAABTo+wAAABTo+wAAABTo+wAAABTo+wAaNH++9//ymKxaNmyZU2+rWXLlslisei///2vY6xz58669tprm3zbkvTRRx/JYrHoo48+apbtAecKyg5wDli8eLEsFosGDhzo7iiyWCyOHx8fH7Vp00Z9+/bVlClTtHv3bpdtZ/Hixc1SkBqjJWcDzMjCd2MB5jd48GAdOXJE//3vf7Vv3z517drVbVksFouuuuoqjR8/XoZhqKSkRNnZ2Vq5cqXKy8s1b948paenO+YbhqHq6mpZrVZ5e3s3eDs9evRQ27Ztz+gsSV1dnWw2m/z8/GSxWCT9dGanR48eWr16dYPX09hsdrtdNTU18vX1lZcX/y8KuAp/mwCTO3jwoLZu3aqFCxeqXbt2Wr58ubsj6cILL9S4ceN02223afLkyXrppZf03XffqX///po2bZrWrFnjmGuxWOTv739GRedMlZeXS5K8vb3l7+/vKDrNzcvLS/7+/hQdwMX4GwWY3PLly3XeeecpKSlJN95442nLztGjR3XbbbcpODhYoaGhSklJUXZ29imvl/n222914403qk2bNvL391e/fv307rvvnlXOsLAwrVixQj4+PpozZ45j/FTX7OTn52vChAnq2LGj/Pz81L59e11//fWOa206d+6sXbt2afPmzY63zK644gpJ/3ddzubNm3X33XcrPDxcHTt2dFr282t2Tvrggw/Uu3dv+fv7KzY2VqtWrXJaPnPmzFOWpF+u89eyne6anZUrV6pv374KCAhQ27ZtNW7cOB0+fNhpzu23365WrVrp8OHDGjlypFq1aqV27drp/vvvV11d3W+8+oC5+bg7AICmtXz5co0aNUq+vr669dZb9cILL2j79u3q37+/Y47dbtd1112nbdu2KTU1VTExMfrXv/6llJSUeuvbtWuXBg8erN/97nd66KGHFBQUpDfffFMjR47UW2+9pRtuuKHRWaOjo3X55Zfrww8/VGlpqYKDg085Lzk5Wbt27dI999yjzp07q7CwUOvXr1dubq46d+6sZ555Rvfcc49atWqlv/71r5KkiIgIp3XcfffdateunWbMmOE4s3M6+/bt0y233KK77rpLKSkpysjI0E033aS1a9fqqquuOqN9bEi2n1u2bJkmTJig/v37a+7cuSooKNCzzz6rTz/9VF988YVCQ0Mdc+vq6pSYmKiBAwfqb3/7mzZs2KAFCxboggsuUGpq6hnlBEzFAGBaO3bsMCQZ69evNwzDMOx2u9GxY0djypQpTvPeeustQ5LxzDPPOMbq6uqMK6+80pBkZGRkOMaHDx9u9OzZ06iqqnKM2e1249JLLzW6dev2m5kkGWlpaaddPmXKFEOSkZ2dbRiGYRw8eNApw/Hjxw1JxlNPPfWr2/n9739vXH755fXGMzIyDEnGkCFDjNra2lMuO3jwoGOsU6dOhiTjrbfecoyVlJQY7du3N/r06eMYe/TRR41T/ZN6qnWeLtuHH35oSDI+/PBDwzAMo6amxggPDzd69OhhVFZWOuatXr3akGTMmDHDMZaSkmJIMmbPnu20zj59+hh9+/atty3gXMLbWICJLV++XBERERo2bJikn65/ueWWW7RixQqntzbWrl0rq9WqO++80zHm5eWltLQ0p/UdO3ZMmzZt0s0336wTJ06oqKhIRUVFOnr0qBITE7Vv3756b6+cqVatWkmSTpw4ccrlAQEB8vX11UcffaTjx483ejt33nlng68D6tChg9MZq+DgYI0fP15ffPGF8vPzG53ht+zYsUOFhYW6++675e/v7xhPSkpSTEyM/v3vf9d7zl133eX0+LLLLtOBAweaLCPgCSg7gEnV1dVpxYoVGjZsmA4ePKj9+/dr//79GjhwoAoKCrRx40bH3O+//17t27dXYGCg0zp+edfW/v37ZRiGHnnkEbVr187p59FHH5UkFRYWnlXusrIySVLr1q1PudzPz0/z5s3T+++/r4iICA0dOlTz588/49LRpUuXBs/t2rVrvetxLrzwQkk65fU9rvL9999Lki666KJ6y2JiYhzLT/L391e7du2cxs4777yzKoWAGXDNDmBSmzZtUl5enlasWKEVK1bUW758+XIlJCSc0Trtdrsk6f7771diYuIp55ztbe3ffPONvL29f7WM3Hfffbruuuv0zjvvaN26dXrkkUc0d+5cbdq0SX369GnQdgICAs4q5y+d7g6u5rw4uCnvWAM8GWUHMKnly5crPDxcixYtqrds1apVevvtt7VkyRIFBASoU6dO+vDDD1VRUeF0dmf//v1Ozzv//PMlSVarVfHx8S7PnJubq82bNysuLu60Z3ZOuuCCCzRt2jRNmzZN+/btU+/evbVgwQK99tprkk5fPhrj5Bmtn69z7969kn66u0r66QyKJBUXFztdNPzLsy9nkq1Tp06SpJycHF155ZVOy3JychzLAfw63sYCTKiyslKrVq3StddeqxtvvLHez+TJk3XixAnH7eKJiYmy2Wx66aWXHOuw2+31ilJ4eLiuuOIKvfjii8rLy6u33R9//LHRmY8dO6Zbb71VdXV1jruUTqWiokJVVVVOYxdccIFat26t6upqx1hQUJCKi4sbnefnjhw5orffftvxuLS0VK+++qp69+6tyMhIRwZJ2rJli2NeeXm5XnnllXrra2i2fv36KTw8XEuWLHHat/fff1979uxRUlJSY3cJOKdwZgcwoXfffVcnTpzQH/7wh1MuHzRokOMDBm+55RaNHDlSAwYM0LRp07R//37FxMTo3Xff1bFjxyQ5n4lYtGiRhgwZop49e+rOO+/U+eefr4KCAmVlZemHH35Qdnb2b+bbu3evXnvtNRmGodLSUscnKJeVlWnhwoW6+uqrf/W5w4cP180336zY2Fj5+Pjo7bffVkFBgUaPHu2Y17dvX73wwgt6/PHH1bVrV4WHh9c7O9JQF154oSZOnKjt27crIiJCL7/8sgoKCpSRkeGYk5CQoOjoaE2cOFEPPPCAvL299fLLL6tdu3bKzc11Wl9Ds1mtVs2bN08TJkzQ5ZdfrltvvdVx63nnzp01derURu0PcM5x891gAJrAddddZ/j7+xvl5eWnnXP77bcbVqvVKCoqMgzDMH788UdjzJgxRuvWrY2QkBDj9ttvNz799FNDkrFixQqn53733XfG+PHjjcjISMNqtRq/+93vjGuvvdb45z//+ZvZJDl+vLy8jNDQUKNPnz7GlClTjF27dtWb/8tbz4uKioy0tDQjJibGCAoKMkJCQoyBAwcab775ptPz8vPzjaSkJKN169aGJMet3idvBd++fXu9bZ3u1vOkpCRj3bp1Rq9evQw/Pz8jJibGWLlyZb3n79y50xg4cKDh6+trREdHGwsXLjzlOk+X7Ze3np/0xhtvGH369DH8/PyMNm3aGGPHjjV++OEHpzkpKSlGUFBQvUynuyUeOJfw3VgATuudd97RDTfcoE8++USDBw92dxwAaBTKDgBJP13n8/M7lOrq6pSQkKAdO3YoPz/f5XcvAUBz4ZodAJKke+65R5WVlYqLi1N1dbVWrVqlrVu36oknnqDoAPBonNkBIEnKzMzUggULtH//flVVValr165KTU3V5MmT3R0NAM4KZQcAAJgan7MDAABMjbIDAABMjQuU9dMnxR45ckStW7d26UfMAwCApmMYhk6cOKEOHTrIy+v0528
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.hist(data[\"age\"], bins=10, edgecolor=\"black\")\n",
"plt.title(\"Age Distribution\")\n",
"plt.xlabel(\"Age\")\n",
"plt.ylabel(\"Frequency\")\n",
"plt.grid(axis=\"y\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Визуализация - Ящик с усами"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0kAAAIjCAYAAADWYVDIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABU0klEQVR4nO3deVhV5f7//9cWZFIQRJlMRQ0FzQmcqUDFyBzLKXM2zY85nLThxNHUBrXMHCrTshI7TlmZxzTNjkOe1JxIswJFj6QnZ1NBxQnu3x/92N+1QxEM24LPx3XtS/e91rrXew83m9dea93YjDFGAAAAAABJUglnFwAAAAAAtxNCEgAAAABYEJIAAAAAwIKQBAAAAAAWhCQAAAAAsCAkAQAAAIAFIQkAAAAALAhJAAAAAGBBSAIAAAAAC0ISAPxF0tLSZLPZlJiY6OxSHKxatUr16tWTh4eHbDabzpw54+yS8i3nOZ08ebKzS/nL9e3bV6VLl77herGxsYqNjb31BQFAMUJIAlDkJCYmymazOdwCAgLUvHlzrVy58i+vZ/369Q61lCxZUlWrVlXv3r313//+t1D2sWnTJo0bN67QA8ypU6fUtWtXeXp6asaMGfrnP/+pUqVK3XC7d955RzabTY0bNy7Uem5nu3fvVufOnVW5cmV5eHioQoUKatWqld566y1nl3bbunz5sqZPn6769evLx8dHvr6+qlWrlp544gmlpKTY1yuM9/c777xz230BAaDocnV2AQBws1566SVVqVJFxhgdO3ZMiYmJeuihh/TFF1+obdu2f3k9w4cPV8OGDXXlyhUlJSXpvffe04oVK7R7926FhIT8qb43bdqkF198UX379pWvr2/hFCxp27ZtysjI0Msvv6y4uLh8bzd//nyFhoZq69at2rdvn+6+++5Cq+l2tGnTJjVv3lyVKlXSwIEDFRQUpEOHDum7777T9OnTNWzYMGeXeF2rV6922r47deqklStXqnv37ho4cKCuXLmilJQULV++XM2aNVN4eLikwnl/v/POOypXrpz69u1beA8AwB2LkASgyGrdurUaNGhgv//4448rMDBQCxcudEpIuu+++9S5c2dJUr9+/VS9enUNHz5cc+fOVUJCwl9eT34cP35ckgr0i+mBAwe0adMmLVmyRIMGDdL8+fM1duzYW1Th7WH8+PEqU6aMtm3bluu5ynkOb1dubm5O2e+2bdu0fPlyjR8/Xv/4xz8clr399ttF6rROAHceTrcDUGz4+vrK09NTrq6O3/+cP39eTz/9tCpWrCh3d3fVqFFDkydPljFGkpSZmanw8HCFh4crMzPTvt1vv/2m4OBgNWvWTFlZWQWup0WLFpJ+DxV5Wbt2re677z6VKlVKvr6+6tChg5KTk+3Lx40bp2effVaSVKVKFftpfWlpaXn2+8knnygqKkqenp4qV66cevbsqV9//dW+PDY2Vn369JEkNWzYUDabLV/fws+fP19+fn5q06aNOnfurPnz519zvVOnTqlXr17206z69OmjXbt2XfO6rJSUFHXu3Flly5aVh4eHGjRooGXLlt2wFqupU6eqcuXK8vT0VExMjH788Uf7sjlz5shms+n777/Ptd2ECRPk4uLi8Nz80f79+1WrVq1rhsmAgACH+zabTUOHDtUnn3yimjVrytPTU02bNtXu3bslSe+++67uvvtueXh4KDY29pqv441eu+vZuXOnypcvr9jYWJ07d05S7muSck4PXbx4scaPH6+77rpLHh4eatmypfbt25erzxkzZqhq1ary9PRUo0aN9J///Cdf1znt379fkhQdHZ1rmYuLi/z9/SXd+P09Z84ctWjRQgEBAXJ3d1fNmjU1c+ZMh/5CQ0P1008/6ZtvvrFvn1PfuHHjZLPZctWQc9qu9fnfvn274uPjVa5cOXl6eqpKlSrq379/no8TQPHEkSQARdbZs2d18uRJGWN0/PhxvfXWWzp37px69uxpX8cYo/bt22vdunV6/PHHVa9ePX311Vd69tln9euvv2rq1Kny9PTU3LlzFR0drVGjRmnKlCmSpCFDhujs2bNKTEyUi4tLgevL+SUx55fBa/n3v/+t1q1bq2rVqho3bpwyMzP11ltvKTo6WklJSQoNDdUjjzyivXv3auHChZo6darKlSsnSSpfvvx1+01MTFS/fv3UsGFDTZw4UceOHdP06dO1ceNGff/99/L19dWoUaNUo0YNvffee/ZTF6tVq3bDxzV//nw98sgjcnNzU/fu3TVz5kxt27ZNDRs2tK+TnZ2tdu3aaevWrRo8eLDCw8P1r3/9yx7KrH766SdFR0erQoUKev7551WqVCktXrxYHTt21GeffaaHH374hjV99NFHysjI0JAhQ3Tx4kVNnz5dLVq00O7duxUYGKjOnTtryJAhmj9/vurXr5/r8cTGxqpChQrX7b9y5cravHmzfvzxR91zzz03rOc///mPli1bpiFDhkiSJk6cqLZt2+q5557TO++8oyeffFKnT5/WpEmT1L9/f61du9a+bX5eu2vZtm2b4uPj1aBBA/3rX/+Sp6dnnjW++uqrKlGihJ555hmdPXtWkyZNUo8ePbRlyxb7OjNnztTQoUN13333acSIEUpLS1PHjh3l5+enu+66K8/+K1euLOn35zc6OjrXlxc5bvT+njlzpmrVqqX27dvL1dVVX3zxhZ588kllZ2fbn99p06Zp2LBhKl26tEaNGiVJCgwMzLO+Pzp+/LgeeOABlS9fXs8//7x8fX2VlpamJUuWFKgfAMWEAYAiZs6cOUZSrpu7u7tJTEx0WHfp0qVGknnllVcc2jt37mxsNpvZt2+fvS0hIcGUKFHCbNiwwXzyySdGkpk2bdoN61m3bp2RZD788ENz4sQJc/jwYbNixQoTGhpqbDab2bZtmzHGmAMHDhhJZs6cOfZt69WrZwICAsypU6fsbbt27TIlSpQwvXv3tre9/vrrRpI5cODADeu5fPmyCQgIMPfcc4/JzMy0ty9fvtxIMmPGjLG35TyXOTXeyPbt240k8/XXXxtjjMnOzjZ33XWX+dvf/uaw3meffZbr+cvKyjItWrTI9Ry0bNnS1K5d21y8eNHelp2dbZo1a2bCwsLyrCfnOfX09DT/+9//7O1btmwxksyIESPsbd27dzchISEmKyvL3paUlJSrnmtZvXq1cXFxMS4uLqZp06bmueeeM1999ZW5fPlyrnVz3ovW1+rdd981kkxQUJBJT0+3tyckJDi8rgV57fr06WNKlSpljDHm22+/NT4+PqZNmzYOz6MxxsTExJiYmBj7/Zz3a0REhLl06ZK9ffr06UaS2b17tzHGmEuXLhl/f3/TsGFDc+XKFft6iYmJRpJDn9eSnZ1tYmJijCQTGBhounfvbmbMmGF++eWXXOvm9f6+cOFCrrb4+HhTtWpVh7ZatWpds6axY8eaa/26k/Pez9nn559/XqCxAKB443Q7AEXWjBkz9PXXX+vrr7/WvHnz1Lx5cw0YMMDhm98vv/xSLi4uGj58uMO2Tz/9tIwxDrPhjRs3TrVq1VKfPn305JNPKiYmJtd2eenfv7/Kly+vkJAQtWnTRufPn9fcuXMdrpuyOnLkiHbu3Km+ffuqbNmy9vY6deqoVatW+vLLL/O9b6vt27fr+PHjevLJJ+Xh4WFvb9OmjcLDw7VixYqb6lf6/ahAYGCgmjdvLun3U8u6deumRYsWOZySuGrVKpUsWVIDBw60t5UoUcL+zX+O3377TWvXrlXXrl2VkZGhkydP6uTJkzp16pTi4+OVmpqar9PMOnbs6HAkqFGjRmrcuLHDc9i7d28dPnxY69atc3g8np6e6tSpU579t2rVSps3b1b79u21a9cuTZo0SfHx8apQocI1Twts2bKlQkND7fdzZgHs1KmTvL29c7XnzIJ4M6/dunXrFB8fr5YtW2rJkiVyd3fP87Hk6Nevn8P1Svfdd1+uWk6dOqWBAwc6HAXq0aOH/Pz8bti/zWbTV199pVdeeUV+fn5auHChhgwZosqVK6tbt275vibJekQs5+hxTEyM/vvf/+rs2bP56iM/co7QLV+
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"\n",
"data = df[[\"age\", \"work_type\", \"smoking_status\"]].copy()\n",
"data.dropna(subset=[\"smoking_status\"], inplace=True)\n",
"\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
"\n",
"box_data = [\n",
" data[data[\"smoking_status\"] == status][\"age\"]\n",
" for status in data[\"smoking_status\"].unique()\n",
"]\n",
"plt.boxplot(box_data)\n",
"\n",
"plt.xticks(\n",
" range(1, len(data[\"smoking_status\"].unique()) + 1),\n",
" list(data[\"smoking_status\"].unique()), )\n",
"\n",
"plt.title(\"Box Plot of Age by Smoking Status\")\n",
"plt.xlabel(\"Smoking Status\")\n",
"plt.ylabel(\"Age\")\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Визуализация - диаграммы с областями"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkkAAAHHCAYAAACr0swBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gUVdvA4d9sS++9E0qA0MEGKL1IUREERQRBsYIKdqzo+4qKBQsq2AARVPTDRo/Ul16U3lMIJT2bnu3z/RGysiRANtklyXLu68qlO3vmzHN2l82TmTPPkWRZlhEEQRAEQRBsKOo7AEEQBEEQhIZIJEmCIAiCIAjVEEmSIAiCIAhCNUSSJAiCIAiCUA2RJAmCIAiCIFRDJEmCIAiCIAjVEEmSIAiCIAhCNUSSJAiCIAiCUA2RJAmCIAiCIFRDJEmCcJHp06cjSRK5ubn1HUqdlJSUMHHiRMLDw5EkiSlTptR3SDWWlpaGJEm8//77l223YcMGJEliw4YNVyewa9D8+fORJIndu3fXdyhOVflZ+uWXX+o7FKEBEUmSUGeff/45kiRx44031ncol2U2m5k3bx69evUiMDAQNzc3mjRpwoQJE+r1F8CMGTP47bffnNLv/Pnzeeyxx1i4cCFjx469ZFuDwcDHH39Mp06d8PX1xd/fnzZt2vDwww9z9OhRh8fmKg4cOMBdd91FXFwc7u7uREVF0b9/fz799FObdnV9jw8fPsz06dNJS0urW8AN1OOPP45CoSA/P99me35+PgqFAjc3N3Q6nc1zKSkpSJLESy+9dDVDraJXr15IknTFn+nTp9drnELtqOo7AKHxW7RoEU2aNGHnzp2cPHmS5s2b13dIVZSXlzN8+HBWrVpFjx49eOmllwgMDCQtLY0lS5awYMEC0tPTiY6OvuqxzZgxg7vuuothw4Y5tN9169Zx00038frrr1+x7YgRI1i5ciWjR4/moYcewmg0cvToUZYtW0a3bt1o1aqVQ2NzlB49elBeXo5Go7nqx966dSu9e/cmNjaWhx56iPDwcE6fPs327dv5+OOPeeKJJ6xt6/oeHz58mDfeeINevXrRpEkTxwygAbn55pv54osv2LJlC7fddpt1+9atW1EoFBiNRnbv3s3NN99sfW7Lli3WfevTyy+/zMSJE62Pd+3axSeffMJLL71E69atrdvbt29fH+EJdSSSJKFOUlNT2bp1K0uXLuWRRx5h0aJFNfqlbDKZsFgsV+2X23PPPceqVauYNWtWlctOr7/+OrNmzboqcVSSZRmdToeHh4fTjpGdnU1iYuIV2+3atYtly5bx1ltvVfmrfPbs2RQUFDgpwrpTKBS4u7vXy7Hfeust/Pz82LVrF/7+/jbPZWdn10tMjVVlorN582abJGnLli20b9+e8vJyNm/ebJMQbd68GYVCQbdu3ep07Mrvotrq37+/zWN3d3c++eQT+vfvT69eveoUm1D/xOU2oU4WLVpEQEAAQ4YM4a677mLRokVV2lw4v+Sjjz6iWbNmuLm5cfjwYQCOHj3KXXfdRWBgIO7u7lx33XX88ccfNn3k5+fz7LPP0q5dO7y9vfH19WXQoEHs27fvijGeOXOGuXPn0r9//2rn5SiVSp599tkqZ5EKCgoYP348/v7++Pn5MWHCBMrKymzazJs3jz59+hAaGoqbmxuJiYl88cUXVY7RpEkThg4dyurVq7nuuuvw8PBg7ty5SJJEaWkpCxYssJ6WHz9+/GXHk52dzYMPPkhYWBju7u506NCBBQsWWJ+vnFuRmprK8uXLrf1e6lJNcnIyAN27d6/2tQkKCrI+rpyvdfz4ce677z78/PwICQnh1VdfRZZlTp8+zR133IGvry/h4eF88MEHdsd/KbIs8/DDD6PRaFi6dKnNWC+ck9SrVy/atm3L4cOH6d27N56enkRFRTFz5swqfZ46dYrbb78dLy8vQkNDmTp1KqtXr67RPKfk5GTatGlTJUECCA0Ntf7/5d7jU6dO8fjjj9OyZUs8PDwICgpi5MiRNu/V/PnzGTlyJAC9e/e29lEZ36Uu5TRp0sTms2Q0GnnjjTdo0aIF7u7uBAUFcfPNN5OUlHTZcVYqKyvjkUceISgoCF9fX8aNG4dWq7U+f//99xMcHIzRaKyy74ABA2jZsuUl+46NjSUmJsZ6dqjSli1b6N69O926dav2uQtf/5p8rq70XXQxvV7P0KFD8fPzY+vWrZd9fS5l3rx5SJLEP//8U+W5GTNmoFQqOXv2LPDvZ3fPnj1069YNDw8P4uPjmTNnTrWxvf766zRv3hw3NzdiYmJ4/vnn0ev1tYpTuARZEOqgVatW8oMPPijLsixv2rRJBuSdO3fatElNTZUBOTExUW7atKn8zjvvyLNmzZJPnTolHzx4UPbz85MTExPld999V549e7bco0cPWZIkeenSpdY+du3aJTdr1kx+8cUX5blz58pvvvmmHBUVJfv5+clnz569bIxffvmlDMjfffddjcb0+uuvy4DcqVMnefjw4fLnn38uT5w4UQbk559/3qbt9ddfL48fP16eNWuW/Omnn8oDBgyQAXn27Nk27eLi4uTmzZvLAQEB8osvvijPmTNHXr9+vbxw4ULZzc1NvuWWW+SFCxfKCxculLdu3XrJ2MrKyuTWrVvLarVanjp1qvzJJ5/It9xyiwzIH330kSzLspyZmSkvXLhQDg4Oljt27Gjtt6SkpNo+t27dKgPyQw89JBuNxhq9Nh07dpRHjx4tf/755/KQIUNkQP7www/lli1byo899pj8+eefy927d5cBeePGjXbFL8v/fmbee+89WZZl2WQyyePGjZPd3NzkZcuWWdutX79eBuT169dbt/Xs2VOOjIyUY2Ji5Keeekr+/PPP5T59+siAvGLFCmu7kpISuWnTprKHh4f84osvyh999JF8ww03yB06dKjSZ3UGDBgg+/j4yAcOHLhsu8u9xz///LPcoUMH+bXXXpO//PJL+aWXXpIDAgLkuLg4ubS0VJZlWU5OTpaffPJJGZBfeuklax+ZmZmyLMsyIL/++utVjhsXFyfff//91scvvfSSLEmS/NBDD8lfffWV/MEHH8ijR4+W33nnncvGP2/ePBmQ27VrJ99yyy3yJ598Ik+aNElWKBRyjx49ZIvFIsuyLCclJcmA/Oeff9rsn5GRISuVSvnNN9+87HFGjx4tu7m5yTqdTpZlWdbr9bK7u7u8ePFi+euvv5YDAwOtx8rPz5clSZIfe+wxWZbt/1xV911U+Vn6+eefrX32799fDggIqPKddjk///yzzeenqKhI9vDwkJ955pkqbRMTE+U+ffpYH1d+dkNDQ+XJkyfLn3zyiXzzzTfLgPzNN99Y25nNZnnAgAGyp6enPGXKFHnu3Lny5MmTZZVKJd9xxx01jlW4MpEkCbW2e/duGZCTkpJkWZZli8UiR0dHy0899ZRNu8ovJl9fXzk7O9vmub59+8rt2rWzfjFW9tOtWze5RYsW1m06nU42m81V+nVzc7vil+/UqVNlQP7nn39qNK7KROCBBx6w2X7nnXfKQUFBNtvKysqq7D9w4EC5adOmNtvi4uJkQF61alWV9l5eXja/zC7no48+kgH5+++/t24zGAxy165dZW9vb7moqMjmmEOGDLlinxaLRe7Zs6cMyGFhYfLo0aPlzz77TD516lSVtpWvzcMPP2zdZjKZ5OjoaFmSJJtfuFqtVvbw8LAZW03jvzBJMhqN8t133y17eHjIq1evtonnUknSxUmxXq+Xw8PD5REjRli3ffDBBzIg//bbb9Zt5eXlcqtWrWqUJK1Zs0ZWKpWyUqmUu3btKj///PPy6tWrZYPBUKXtpd7j6j4/27ZtqxL/xb94L1TTJKlDhw41+jxcrDJJ6tKli83YZs6cKQPy77//LstyxS/u6Oho+e6777bZ/8MPP5QlSZJTUlIue5zPPvtMBuT//e9/siz/+zqcOnVKPnz4sAzIhw4dkmVZlpctWyYD8qJFi2RZtv9zVd130YVJUnFxsdyzZ085ODi4xt8blap7r0aPHi1HRkbafIf9/fffMiDPmzfPuq3
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = df[[\"age\", \"work_type\", \"smoking_status\"]].copy()\n",
"data.dropna(subset=[\"smoking_status\"], inplace=True)\n",
"\n",
"grouped_data = (\n",
" data.groupby([\"work_type\", \"smoking_status\"]).size().unstack(fill_value=0)\n",
")\n",
"\n",
"grouped_data.plot(kind=\"area\", alpha=0.5, stacked=True)\n",
"\n",
"plt.title(\"Area Chart of Smoking Status by Work Type\")\n",
"plt.xlabel(\"Work Type\")\n",
"plt.ylabel(\"Number of Observations\")\n",
"plt.legend(title=\"Smoking Status\")\n",
"plt.grid(True)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Визуализация - диаграммы рассеяния"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAj4AAAHHCAYAAAC/R1LgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOy9d5gkZ3W3fVfuPDnszuagtFoFJESQACW0CAmBAUvAhxH5NYgMBmPAJAPGxmD8YgOvjQEjCUkgEBklhAAlLBTQ7krapE2TU+eu/Hx/PN2tmZ3ZMJtX+9zXNdJOV3V1VXX11K/P+Z1zNCGEQKFQKBQKheI4QD/SO6BQKBQKhUJxuFDCR6FQKBQKxXGDEj4KhUKhUCiOG5TwUSgUCoVCcdyghI9CoVAoFIrjBiV8FAqFQqFQHDco4aNQKBQKheK4QQkfhUKhUCgUxw1K+CgUCoVCoThuUMJHoVAo9pPf/va3aJrGb3/72yO9K8c16n1QzAUlfBTHJd/5znfQNG3aT3d3NxdccAG/+tWvZqzfWOetb33rrNv72Mc+1lxnbGys+fgb3/hGMpnMITuO/eHKK69E0zQ+8pGPHOldOSopFot87nOf4+yzz6alpQXHcVi8eDFXXXUVv/jFL4707h0xjsZrWaHYH5TwURzXfOYzn+F73/se//M//8OHP/xhRkdHeelLX8rPf/7zGesmEgluvvlmfN+fsez73/8+iUTicOzyAVEsFvnZz37GkiVL+P73v48a1TedTZs2ceaZZ/LJT36SpUuX8tnPfpavf/3rvPnNb2br1q1cfvnlfO973zvSu6lQKA4A80jvgEJxJLn00ks5++yzm7+/5S1voaenh+9///tcfvnl09Z9yUtewk9/+lN+9atf8fKXv7z5+L333stTTz3Fq171Km6++ebDtu/7w80330wURfz3f/83F154Ib/73e940YtedFj3wXVdbNtG14+u711hGPIXf/EXDA8Pc/fdd3PuuedOW/7JT36S2267jSiKjtAeKhSKg8HR9ZdHoTjCtLa2kkwmMc2Z3wn6+vp44QtfyPXXXz/t8euuu47Vq1dz6qmn7tdrfulLX0LTNLZt2zZj2Uc/+lFs22ZychKAjRs38qpXvYre3l4SiQQLFizgNa95DYVCYZ9e67rrruPFL34xF1xwASeffDLXXXddc9mDDz6Ipml897vfnfG8W2+9FU3TpkXC+vv7efOb30xPTw+O47Bq1Sr++7//e9rzGt6LG264gY9//OP09fWRSqUoFotMTEzwoQ99iNWrV5PJZMjlclx66aU8+uijM15/27ZtXHHFFaTTabq7u3n/+9/f3KddfR0PPPAAL3nJS2hpaSGVSvGiF72Ie+65Z6/n5gc/+AFr167lE5/4xAzR0+CSSy7h0ksv3eN2lixZwhvf+MYZj59//vmcf/750x5zXZdPfepTnHDCCSQSCebNm8crX/lKNm/e3FynUqnwwQ9+kIULF+I4DieeeCJf+tKXZkTrbr/9ds477zxaW1vJZDKceOKJ/N3f/d20dTzP45Of/CQrVqzAcRwWLlzIhz/8YTzP2+MxzYW9nf8f/vCHaJrG3XffPeO53/zmN9E0jbVr1zYfe+KJJ3j1q19Ne3s7iUSCs88+m5/+9KcHbX8Vxx8q4qM4rikUCoyNjSGEYGRkhP/7f/8v5XKZ17/+9bOu/7rXvY73vve9lMtlMpkMYRjygx/8gA984AO4rrtf+3DllVfy4Q9/mJtuuom/+Zu/mbbspptu4pJLLqGtrQ3f91mzZg2e5/Hud7+b3t5e+vv7+fnPf04+n6elpWWPrzMwMMBdd93VFDavfe1r+cpXvsLXvvY1bNvm7LPPZtmyZdx0001cffXV055744030tbWxpo1awAYHh7muc99Lpqm8a53vYuuri5+9atf8Za3vIViscj73ve+ac//7Gc/i23bfOhDH8LzPGzbZv369dxyyy385V/+JUuXLmV4eJhvfvObvOhFL2L9+vXMnz8fkDf+Cy+8kMHBQd773vfS29vL9ddfz1133TXjGH/zm99w6aWXctZZZ/HJT34SXdf59re/zYUXXsjvf/97zjnnnN2en5/97GcAu33vDzZRFHH55Zdz55138prXvIb3vve9lEolbr/9dtauXcvy5csRQnDFFVdw11138Za3vIUzzjiDW2+9lb/5m7+hv7+fr3zlKwCsW7eOyy+/nNNOO43PfOYzOI7Dpk2bpgmOOI654oor+MMf/sDb3/52Tj75ZB577DG+8pWvsGHDBm655ZYDPqZ9Of+XXXYZmUyGm266aUa08cYbb2TVqlXNLxHr1q3j3HPPpa+vj7/9278lnU5z00038YpXvIKbb76Zv/iLvzjgfVYchwiF4jjk29/+tgBm/DiOI77zne/MWB8Q11xzjZiYmBC2bYvvfe97QgghfvGLXwhN08TWrVvFJz/5SQGI0dHR5vOuvvpqkU6n97o/z3ve88RZZ5017bE//vGPAhD/8z//I4QQ4uGHHxaA+MEPfrBfx/ylL31JJJNJUSwWhRBCbNiwQQDixz/+cXOdj370o8KyLDExMdF8zPM80draKt785jc3H3vLW94i5s2bJ8bGxqa9xmte8xrR0tIiqtWqEEKIu+66SwBi2bJlzccauK4roiia9thTTz0lHMcRn/nMZ5qP/cu//IsAxC233NJ8rFariZNOOkkA4q677hJCCBHHsVi5cqVYs2aNiOO4uW61WhVLly4VL37xi/d4fs4880zR2to64/FyuSxGR0ebP4VCobmscXyNfRBCiMWLF4urr756xnZe9KIXiRe96EXN3//7v/9bAOLLX/7yjHUb+3/LLbcIQPzDP/zDtOWvfvWrhaZpYtOmTUIIIb7yla/MuPZ25Xvf+57QdV38/ve/n/b4N77xDQGIe+65Z7fPFWLv1/Jczv9rX/ta0d3dLcIwbD42ODgodF2f9t5fdNFFYvXq1cJ13Wmv8/znP1+sXLmy+dhs74NCsTtUqktxXPPv//7v3H777dx+++1ce+21XHDBBbz1rW/lRz/60azrt7W18ZKXvITvf//7AFx//fU8//nPZ/HixQe0H1dddRV/+tOfpqU4brzxRhzHafqJGhGdW2+9lWq1OufXuO6667jsssvIZrMArFy5krPOOmtauuuqq64iCIJpx3/bbbeRz+e56qqrABBCcPPNN/Oyl70MIQRjY2PNnzVr1lAoFHjooYemvfbVV19NMpmc9pjjOE2fTxRFjI+PN1M0U5//61//mr6+Pq644ormY4lEgre97W3TtvfII4+wceNGXve61zE+Pt7cp0qlwkUXXcTvfvc74jje7fkpFouzVi197GMfo6urq/nzute9brfbmAs333wznZ2dvPvd756xTNM0AH75y19iGAbvec97pi3/4Ac/iBCiWYHY2toKwE9+8pPdHuMPfvADTj75ZE466aRp79mFF14IMGsEbS7M5fxfddVVjIyMTEtT/vCHPySO4+Z1NjExwW9+8xuuvPJKSqVSc3vj4+OsWbOGjRs30t/ff0D7rDhOOcLCS6E4IjQiPv/7v/877fEoisRpp50m5s2bJzzPaz5OPeIjhBA33HCDsCxLbNu2TaTTafHv//7vQghxQBGf/v5+oeu6+NznPieEkN9qFy1aJF7xildMW+8DH/iAAEQymRSXXHKJ+NrXviby+fxet79+/XoBiK985Sti48aNzZ8PfvCDIpFITItinHTSSdO+nb/+9a8XnZ2dIggCIYQQw8PDs0bLpv786Ec/EkI8/U28EbWaShRF4stf/rJYsWKFMAxj2vMvuOCC5nonnHCCeOELXzjj+T/5yU+mfcu/8cYb97pfUyNZu3LGGWfMGvF58sknxe233y5uv/120dPTIy677LLmsgOJ+Jx00kni3HPP3e3+CCHEmjVrxMKFC2c8ns/nBSA+9KEPCSFkVOXcc88VgOjs7BRXXXWVuPHGG6dF1E4++eQ9npv3vOc9e9yXvV3Lczn/ruuKlpYW8ba3va35/PPOO0+cccYZzd8feOCBvW7voYceEkKoiI9ibiiPj0IxBV3XueC
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(df[\"bmi\"], df[\"avg_glucose_level\"], alpha=0.5)\n",
"plt.title(\"BMI vs Average Glucose Level\")\n",
"plt.xlabel(\"BMI\")\n",
"plt.ylabel(\"Average Glucose Level\")\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Визуализация - круговая диаграмма"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoAAAAH4CAYAAADaVFwSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABVnUlEQVR4nO3dd3hUVeLG8Xdm0hMSIAmE3psUKYKCIiogINgVe0EB9ae74q7rLmvFuq59F7GggoqKXVdEQFSkiAhI7yWhhAAJ6T2Zub8/IpGY0JJJztyZ7+d55pHMTO68GUnycs495zosy7IEAACAgOE0HQAAAAB1iwIIAAAQYCiAAAAAAYYCCAAAEGAogAAAAAGGAggAABBgKIAAAAABhgIIAAAQYCiAAAAAAYYCCAAAEGAogIAPmj59uhwOR/ktLCxMHTt21F133aUDBw6YjgcAsLkg0wEAHN2jjz6qNm3aqLCwUIsXL9Yrr7yi2bNna/369YqIiDAdDwBgUxRAwIeNGDFCp512miRp7Nixio2N1fPPP68vv/xS11xzjeF0AAC7YgoYsJHzzjtPkpSYmChJSk9P17333qvu3bsrKipK0dHRGjFihNasWVPpcwsLC/XII4+oY8eOCgsLU5MmTXTZZZdpx44dkqSkpKQK085/vJ1zzjnlx1qwYIEcDoc+/PBD/fOf/1RCQoIiIyN10UUXac+ePZVee9myZRo+fLhiYmIUERGhQYMGacmSJVV+jeecc06Vr//II49Ueu6MGTPUp08fhYeHq2HDhrr66qurfP1jfW1H8ng8evHFF9W1a1eFhYWpcePGuu2225SRkVHhea1bt9aoUaMqvc5dd91V6ZhVZX/mmWcqvaeSVFRUpIcffljt27dXaGioWrRoofvuu09FRUVVvldHOtr7dviWlJRU4flTpkxR165dFRoaqqZNm+rOO+9UZmZmpeOe6Hv37LPPasCAAYqNjVV4eLj69OmjTz755Li5AZjBCCBgI4fLWmxsrCRp586d+uKLL3TllVeqTZs2OnDggF577TUNGjRIGzduVNOmTSVJbrdbo0aN0nfffaerr75ad999t3JycvTtt99q/fr1ateuXflrXHPNNbrgggsqvO7EiROrzPPEE0/I4XDo73//uw4ePKgXX3xRQ4YM0erVqxUeHi5J+v777zVixAj16dNHDz/8sJxOp6ZNm6bzzjtPixYtUr9+/Sodt3nz5nrqqackSbm5ubrjjjuqfO0HH3xQo0eP1tixY5Wamqr//ve/Ovvss7Vq1SrVr1+/0ueMHz9eAwcOlCR99tln+vzzzys8ftttt2n69OkaM2aM/vznPysxMVGTJ0/WqlWrtGTJEgUHB1f5PpyMzMzM8q/tSB6PRxdddJEWL16s8ePHq0uXLlq3bp1eeOEFbd26VV988cVxj33k+3bY7Nmz9cEHH1S475FHHtGkSZM0ZMgQ3XHHHdqyZYteeeUVLV++/Khf5/Heu5deekkXXXSRrrvuOhUXF2vmzJm68sorNWvWLI0cOfK42QHUMQuAz5k2bZolyZo/f76Vmppq7dmzx5o5c6YVGxtrhYeHW3v37rUsy7IKCwstt9td4XMTExOt0NBQ69FHHy2/76233rIkWc8//3yl1/J4POWfJ8l65plnKj2na9eu1qBBg8o//uGHHyxJVrNmzazs7Ozy+z/66CNLkvXSSy+VH7tDhw7WsGHDyl/HsiwrPz/fatOmjTV06NBKrzVgwACrW7du5R+npqZakqyHH364/L6kpCTL5XJZTzzxRIXPXbdunRUUFFTp/m3btlmSrLfffrv8vocfftg68kfgokWLLEnWe++9V+Fz58yZU+n+Vq1aWSNHjqyU/c4777T++GP1j9nvu+8+q1GjRlafPn0qvKfvvvuu5XQ6rUWLFlX4/FdffdWSZC1ZsqTS6x1p0KBBVteuXSvd/8wzz1iSrMTERMuyLOvgwYNWSEiIdf7551f4uzN58mRLkvXWW29V+PwTee8sq+z/6ZGKi4utbt26Weedd94xcwMwgylgwIcNGTJE8fHxatGiha6++mpFRUXp888/V7NmzSRJoaGhcjrLvo3dbrcOHTqkqKgoderUSb/++mv5cT799FPFxcXpT3/6U6XX+ONU3sm48cYbVa9evfKPr7jiCjVp0kSzZ8+WJK1evVrbtm3Ttddeq0OHDiktLU1paWnKy8vT4MGDtXDhQnk8ngrHLCwsVFhY2DFf97PPPpPH49Ho0aPLj5mWlqaEhAR16NBBP/zwQ4XnFxcXSyp7v47m448/VkxMjIYOHVrhmH369FFUVFSlY5aUlFR4XlpamgoLC4+ZOzk5Wf/973/14IMPKioqqtLrd+nSRZ07d65wzMPT/n98/eqaP3++iouLNWHChPK/O5I0btw4RUdH6+uvv67w/BN57ySVj/hKUkZGhrKysjRw4MAKfw8B+A6mgAEf9vLLL6tjx44KCgpS48aN1alTpwq/tD0ej1566SVNmTJFiYmJcrvd5Y8dniaWyqaOO3XqpKAg737Ld+jQocLHDodD7du3Lz/fbNu2bZKkm2666ajHyMrKUoMGDco/TktLq3TcP9q2bZssyzrq8/44hXn43LY/lq4/HjMrK0uNGjWq8vGDBw9W+HjevHmKj48/Zs4/evjhh9W0aVPddtttlc6P27ZtmzZt2nTUY/7x9atr165dkqROnTpVuD8kJERt27Ytf/ywE3nvJGnWrFl6/PHHtXr16grnLNbkHxgAag8FEPBh/fr1K18FXJUnn3xSDz74oG655RY99thjatiwoZxOpyZMmFBpZM2EwxmeeeYZ9ezZs8rnHFksiouLlZKSoqFDhx73uA6HQ998841cLtcxjylJ+/fvlyQlJCQc85iNGjXSe++9V+Xjfyxmp59+uh5//PEK902ePFlffvlllZ+/adMmTZ8+XTNmzKjyHDuPx6Pu3bvr+eefr/LzW7RocdTstelE3rtFixbpoosu0tlnn60pU6aoSZMmCg4O1rRp0/T+++/XVVQAJ4ECCNjYJ598onPPPVdvvvlmhfszMzMVFxdX/nG7du20bNkylZSUeGUhw2GHR/gOsyxL27dvV48ePcpfV5Kio6M1ZMiQ4x5vzZo1KikpOWbpPXxcy7LUpk0bdezY8bjH3bhxoxwOR6VRrz8ec/78+TrzzDMrTGceTVxcXKWv6VgLNSZOnKiePXvqqquuOurrr1mzRoMHD67VUbNWrVpJkrZs2aK2bduW319cXKzExMRKX9OJvHeffvqpwsLCNHfu3ApTxdOmTfNyegDewjmAgI25XC5ZllXhvo8//ljJyckV7rv88suVlpamyZMnVzrGHz//ZLzzzjvKyckp//iTTz5RSkqKRowYIUnq06eP2rVrp2effVa5ubmVPj81NbVSdpfLVeUWK0e67LLL5HK5NGnSpEr5LcvSoUOHyj8uLS3Vp59+qn79+h1zGnP06NFyu9167LHHKj1WWlpa5RYpJ2rp0qX68ssv9a9//euo5W706NFKTk7W1KlTKz1WUFCgvLy8ar/+kYYMGaKQkBD95z//qfDevfnmm8rKyqqwYvdE3zuXyyWHw1HhFISkpKQTWrkMwAxGAAEbGzVqlB599FGNGTNGAwYM0Lp16/Tee+9VGNmRyhZrvPPOO/rLX/6iX375RQMHDlReXp7mz5+v//u//9PFF19crddv2LChzjrrLI0ZM0YHDhzQiy++qPbt22vcuHGSJKfTqTfeeEMjRoxQ165dNWbMGDVr1kzJycn64YcfFB0dra+++kp5eXl6+eWX9Z///EcdO3bUggULyl/jcHFcu3atli5dqv79+6tdu3Z6/PHHNXHiRCUlJemSSy5RvXr1lJiYqM8//1zjx4/Xvffeq/nz5+vBBx/U2rVr9dVXXx3zaxk0aJBuu+02PfXUU1q9erXOP/98BQcHa9u2bfr444/10ksv6YorrqjW+zRv3jwNHTr0mKOgN9xwgz766CPdfvvt+uGHH3TmmWfK7XZr8+bN+uijjzR37tzjjoyeiPj4eE2cOFGTJk3S8OHDddFFF2nLli2aMmWK+vb
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"gender_counts = df[\"gender\"].value_counts()\n",
"\n",
"labels = [str(label) for label in gender_counts.index]\n",
"\n",
"plt.figure(figsize=(8, 6))\n",
"plt.pie(gender_counts, labels=labels, autopct=\"%1.1f%%\", startangle=90)\n",
"plt.title(\"Distribution of Gender\")\n",
"plt.axis(\"equal\")\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}