2024-09-13 21:28:22 +04:00
|
|
|
|
{
|
|
|
|
|
"cells": [
|
2024-09-20 17:38:49 +04:00
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Работа с Pandas DataFrame\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"https://pandas.pydata.org/docs/user_guide/10min.html"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Работа с данными - чтение и запись CSV"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-09-13 21:28:22 +04:00
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-20 17:38:49 +04:00
|
|
|
|
"execution_count": 48,
|
2024-09-13 21:28:22 +04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import pandas as pd\n",
|
2024-09-20 17:38:49 +04:00
|
|
|
|
"\n",
|
|
|
|
|
"df = pd.read_csv(\"data/healthcare-dataset-stroke-data.csv\", index_col=\"id\")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"df.to_csv(\"test.csv\")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Работа с данными - основные команды"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 49,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
|
|
|
"Index: 5110 entries, 9046 to 44679\n",
|
|
|
|
|
"Data columns (total 11 columns):\n",
|
|
|
|
|
" # Column Non-Null Count Dtype \n",
|
|
|
|
|
"--- ------ -------------- ----- \n",
|
|
|
|
|
" 0 gender 5110 non-null object \n",
|
|
|
|
|
" 1 age 5110 non-null float64\n",
|
|
|
|
|
" 2 hypertension 5110 non-null int64 \n",
|
|
|
|
|
" 3 heart_disease 5110 non-null int64 \n",
|
|
|
|
|
" 4 ever_married 5110 non-null object \n",
|
|
|
|
|
" 5 work_type 5110 non-null object \n",
|
|
|
|
|
" 6 Residence_type 5110 non-null object \n",
|
|
|
|
|
" 7 avg_glucose_level 5110 non-null float64\n",
|
|
|
|
|
" 8 bmi 4909 non-null float64\n",
|
|
|
|
|
" 9 smoking_status 5110 non-null object \n",
|
|
|
|
|
" 10 stroke 5110 non-null int64 \n",
|
|
|
|
|
"dtypes: float64(3), int64(3), object(5)\n",
|
|
|
|
|
"memory usage: 479.1+ KB\n",
|
|
|
|
|
" count mean std min 25% 50% \\\n",
|
|
|
|
|
"age 5110.0 43.226614 22.612647 0.08 25.000 45.000 \n",
|
|
|
|
|
"hypertension 5110.0 0.097456 0.296607 0.00 0.000 0.000 \n",
|
|
|
|
|
"heart_disease 5110.0 0.054012 0.226063 0.00 0.000 0.000 \n",
|
|
|
|
|
"avg_glucose_level 5110.0 106.147677 45.283560 55.12 77.245 91.885 \n",
|
|
|
|
|
"bmi 4909.0 28.893237 7.854067 10.30 23.500 28.100 \n",
|
|
|
|
|
"stroke 5110.0 0.048728 0.215320 0.00 0.000 0.000 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" 75% max \n",
|
|
|
|
|
"age 61.00 82.00 \n",
|
|
|
|
|
"hypertension 0.00 1.00 \n",
|
|
|
|
|
"heart_disease 0.00 1.00 \n",
|
|
|
|
|
"avg_glucose_level 114.09 271.74 \n",
|
|
|
|
|
"bmi 33.10 97.60 \n",
|
|
|
|
|
"stroke 0.00 1.00 \n",
|
|
|
|
|
" gender age hypertension heart_disease avg_glucose_level bmi \\\n",
|
|
|
|
|
"id \n",
|
|
|
|
|
"9046 Male 67.0 0 1 228.69 36.6 \n",
|
|
|
|
|
"51676 Female 61.0 0 0 202.21 NaN \n",
|
|
|
|
|
"31112 Male 80.0 0 1 105.92 32.5 \n",
|
|
|
|
|
"60182 Female 49.0 0 0 171.23 34.4 \n",
|
|
|
|
|
"1665 Female 79.0 1 0 174.12 24.0 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" smoking_status stroke \n",
|
|
|
|
|
"id \n",
|
|
|
|
|
"9046 formerly smoked 1 \n",
|
|
|
|
|
"51676 never smoked 1 \n",
|
|
|
|
|
"31112 never smoked 1 \n",
|
|
|
|
|
"60182 smokes 1 \n",
|
|
|
|
|
"1665 never smoked 1 \n",
|
|
|
|
|
" gender age hypertension heart_disease avg_glucose_level bmi \\\n",
|
|
|
|
|
"id \n",
|
|
|
|
|
"18234 Female 80.0 1 0 83.75 NaN \n",
|
|
|
|
|
"44873 Female 81.0 0 0 125.20 40.0 \n",
|
|
|
|
|
"19723 Female 35.0 0 0 82.99 30.6 \n",
|
|
|
|
|
"37544 Male 51.0 0 0 166.29 25.6 \n",
|
|
|
|
|
"44679 Female 44.0 0 0 85.28 26.2 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" smoking_status stroke \n",
|
|
|
|
|
"id \n",
|
|
|
|
|
"18234 never smoked 0 \n",
|
|
|
|
|
"44873 never smoked 0 \n",
|
|
|
|
|
"19723 never smoked 0 \n",
|
|
|
|
|
"37544 formerly smoked 0 \n",
|
|
|
|
|
"44679 Unknown 0 \n",
|
|
|
|
|
" gender age hypertension heart_disease avg_glucose_level bmi \\\n",
|
|
|
|
|
"id \n",
|
|
|
|
|
"72369 Female 14.0 0 0 65.41 19.5 \n",
|
|
|
|
|
"3135 Female 73.0 0 0 69.35 NaN \n",
|
|
|
|
|
"563 Female 41.0 0 0 216.71 36.2 \n",
|
|
|
|
|
"19364 Female 7.0 0 0 74.96 18.8 \n",
|
|
|
|
|
"55459 Female 60.0 0 0 91.82 28.3 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" smoking_status stroke \n",
|
|
|
|
|
"id \n",
|
|
|
|
|
"72369 Unknown 0 \n",
|
|
|
|
|
"3135 never smoked 0 \n",
|
|
|
|
|
"563 never smoked 0 \n",
|
|
|
|
|
"19364 Unknown 0 \n",
|
|
|
|
|
"55459 formerly smoked 0 \n",
|
|
|
|
|
" gender age hypertension heart_disease avg_glucose_level bmi \\\n",
|
|
|
|
|
"id \n",
|
|
|
|
|
"33622 Male 62.0 1 0 211.49 41.1 \n",
|
|
|
|
|
"51554 Male 42.0 0 0 177.91 NaN \n",
|
|
|
|
|
"2296 Male 78.0 1 0 90.19 NaN \n",
|
|
|
|
|
"13602 Male 73.0 1 0 102.06 NaN \n",
|
|
|
|
|
"56156 Other 26.0 0 0 143.33 22.4 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" smoking_status stroke \n",
|
|
|
|
|
"id \n",
|
|
|
|
|
"33622 Unknown 0 \n",
|
|
|
|
|
"51554 Unknown 0 \n",
|
|
|
|
|
"2296 Unknown 0 \n",
|
|
|
|
|
"13602 Unknown 0 \n",
|
|
|
|
|
"56156 formerly smoked 0 \n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"df.info()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(df.describe().transpose())\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"cleared_df = df.drop([\"ever_married\", \"work_type\", \"Residence_type\"], axis=1)\n",
|
|
|
|
|
"print(cleared_df.head())\n",
|
|
|
|
|
"print(cleared_df.tail())\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"sorted_df = cleared_df.sort_values(by=\"gender\")\n",
|
|
|
|
|
"print(sorted_df.head())\n",
|
|
|
|
|
"print(sorted_df.tail())"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Работа с данными - работа с элементами"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 50,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"id\n",
|
|
|
|
|
"9046 67.0\n",
|
|
|
|
|
"51676 61.0\n",
|
|
|
|
|
"31112 80.0\n",
|
|
|
|
|
"60182 49.0\n",
|
|
|
|
|
"1665 79.0\n",
|
|
|
|
|
" ... \n",
|
|
|
|
|
"18234 80.0\n",
|
|
|
|
|
"44873 81.0\n",
|
|
|
|
|
"19723 35.0\n",
|
|
|
|
|
"37544 51.0\n",
|
|
|
|
|
"44679 44.0\n",
|
|
|
|
|
"Name: age, Length: 5110, dtype: float64\n",
|
|
|
|
|
"gender Male\n",
|
|
|
|
|
"age 62.0\n",
|
|
|
|
|
"hypertension 0\n",
|
|
|
|
|
"heart_disease 0\n",
|
|
|
|
|
"ever_married Yes\n",
|
|
|
|
|
"work_type Private\n",
|
|
|
|
|
"Residence_type Rural\n",
|
|
|
|
|
"avg_glucose_level 107.61\n",
|
|
|
|
|
"bmi 31.3\n",
|
|
|
|
|
"smoking_status Unknown\n",
|
|
|
|
|
"stroke 0\n",
|
|
|
|
|
"Name: 63864, dtype: object\n",
|
|
|
|
|
"Rural\n",
|
|
|
|
|
" age Residence_type\n",
|
|
|
|
|
"id \n",
|
|
|
|
|
"63864 62.0 Rural\n",
|
|
|
|
|
"24177 57.0 Urban\n",
|
|
|
|
|
"57274 14.0 Urban\n",
|
|
|
|
|
"37213 60.0 Rural\n",
|
|
|
|
|
"59992 63.0 Urban\n",
|
|
|
|
|
"... ... ...\n",
|
|
|
|
|
"65277 78.0 Rural\n",
|
|
|
|
|
"52679 82.0 Rural\n",
|
|
|
|
|
"36728 74.0 Urban\n",
|
|
|
|
|
"46797 31.0 Rural\n",
|
|
|
|
|
"63898 53.0 Urban\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[198 rows x 2 columns]\n",
|
|
|
|
|
" gender age hypertension heart_disease ever_married work_type \\\n",
|
|
|
|
|
"id \n",
|
|
|
|
|
"9046 Male 67.0 0 1 Yes Private \n",
|
|
|
|
|
"51676 Female 61.0 0 0 Yes Self-employed \n",
|
|
|
|
|
"31112 Male 80.0 0 1 Yes Private \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" Residence_type avg_glucose_level bmi smoking_status stroke \n",
|
|
|
|
|
"id \n",
|
|
|
|
|
"9046 Urban 228.69 36.6 formerly smoked 1 \n",
|
|
|
|
|
"51676 Rural 202.21 NaN never smoked 1 \n",
|
|
|
|
|
"31112 Rural 105.92 32.5 never smoked 1 \n",
|
|
|
|
|
"gender Male\n",
|
|
|
|
|
"age 67.0\n",
|
|
|
|
|
"hypertension 0\n",
|
|
|
|
|
"heart_disease 1\n",
|
|
|
|
|
"ever_married Yes\n",
|
|
|
|
|
"work_type Private\n",
|
|
|
|
|
"Residence_type Urban\n",
|
|
|
|
|
"avg_glucose_level 228.69\n",
|
|
|
|
|
"bmi 36.6\n",
|
|
|
|
|
"smoking_status formerly smoked\n",
|
|
|
|
|
"stroke 1\n",
|
|
|
|
|
"Name: 9046, dtype: object\n",
|
|
|
|
|
" gender age\n",
|
|
|
|
|
"id \n",
|
|
|
|
|
"60182 Female 49.0\n",
|
|
|
|
|
"1665 Female 79.0\n",
|
|
|
|
|
" gender age\n",
|
|
|
|
|
"id \n",
|
|
|
|
|
"60182 Female 49.0\n",
|
|
|
|
|
"1665 Female 79.0\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"print(df[\"age\"])\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(df.loc[63864])\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(df.loc[63864, \"Residence_type\"])\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(df.loc[63864:63898, [\"age\", \"Residence_type\"]])\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(df[0:3])\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(df.iloc[0])\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(df.iloc[3:5, 0:2])\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(df.iloc[[3, 4], [0, 1]])"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Работа с данными - отбор и группировка"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 51,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"['Male' 'Female' 'Other']\n",
|
|
|
|
|
"Male count = 2115\n",
|
|
|
|
|
"Female count = 2994\n",
|
|
|
|
|
"Other count = 1\n",
|
|
|
|
|
"Total count = 5110\n",
|
|
|
|
|
" bmi smoking_status Count\n",
|
|
|
|
|
"0 10.3 Unknown 1\n",
|
|
|
|
|
"1 11.3 Unknown 1\n",
|
|
|
|
|
"2 11.5 never smoked 1\n",
|
|
|
|
|
"3 12.0 Unknown 1\n",
|
|
|
|
|
"4 12.3 Unknown 1\n",
|
|
|
|
|
"... ... ... ...\n",
|
|
|
|
|
"1185 66.8 Unknown 1\n",
|
|
|
|
|
"1186 71.9 never smoked 1\n",
|
|
|
|
|
"1187 78.0 smokes 1\n",
|
|
|
|
|
"1188 92.0 never smoked 1\n",
|
|
|
|
|
"1189 97.6 Unknown 1\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[1190 rows x 3 columns]\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"s_values = df[\"gender\"].unique()\n",
|
|
|
|
|
"print(s_values)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"s_total = 0\n",
|
|
|
|
|
"for s_value in s_values:\n",
|
|
|
|
|
" count = df[df[\"gender\"] == s_value].shape[0]\n",
|
|
|
|
|
" s_total += count\n",
|
|
|
|
|
" print(s_value, \"count =\", count)\n",
|
|
|
|
|
"print(\"Total count = \", s_total)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(df.groupby([\"bmi\", \"smoking_status\"]).size().reset_index(name=\"Count\")) # type: ignore"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Виртуализация - Исходные данные\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 52,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
" age work_type smoking_status\n",
|
|
|
|
|
"id \n",
|
|
|
|
|
"9046 67.0 Private formerly smoked\n",
|
|
|
|
|
"51676 61.0 Self-employed never smoked\n",
|
|
|
|
|
"31112 80.0 Private never smoked\n",
|
|
|
|
|
"60182 49.0 Private smokes\n",
|
|
|
|
|
"1665 79.0 Self-employed never smoked\n",
|
|
|
|
|
"... ... ... ...\n",
|
|
|
|
|
"18234 80.0 Private never smoked\n",
|
|
|
|
|
"44873 81.0 Self-employed never smoked\n",
|
|
|
|
|
"19723 35.0 Self-employed never smoked\n",
|
|
|
|
|
"37544 51.0 Private formerly smoked\n",
|
|
|
|
|
"44679 44.0 Govt_job Unknown\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[5110 rows x 3 columns]\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"data = df[[\"age\", \"work_type\", \"smoking_status\"]].copy()\n",
|
|
|
|
|
"data.dropna(subset=[\"smoking_status\"], inplace=True)\n",
|
|
|
|
|
"print(data)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Визуализация - Линейная диаграмма"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 53,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjYAAAHHCAYAAACskBIUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB5fElEQVR4nO3dd1QU198G8Gd3gV16laYUKwi2iA1R7BI1aiLGXqNGjS2mGVN+aooa38SSaGKNGsXYk9hbYsOKErvYEQuKSgeBZfe+fyAbV1AXBAaW53MOJ9nZmTtfLsPyOHPnjkwIIUBERERkBORSF0BERERUVBhsiIiIyGgw2BAREZHRYLAhIiIio8FgQ0REREaDwYaIiIiMBoMNERERGQ0GGyIiIjIaDDZERERkNBhsiKhEDBo0CFZWVlKXUeKio6Mhk8nw/fffv3C9ffv2QSaTYd++fSVTGJGRYrChMuvnn3+GTCZD48aNpS6l1NJoNHB3d4dMJsP27dulLqdEZGVlYc6cOXjttddgY2MDOzs7+Pv7491330VUVJTU5ZVaZ8+eRffu3eHl5QWVSoWKFSuiXbt2+Omnn/TWmzp1Kv78889C7+fChQuYPHkyoqOjX61goudgsKEyKywsDN7e3jh+/DiuXr0qdTml0j///IPY2Fh4e3sjLCxM6nJKRGhoKD788EPUqlUL06dPx5QpUxAcHIzt27fj6NGjUpf3XMHBwXj8+DGCg4NLfN+HDx9GgwYNcPr0aQwbNgxz587F0KFDIZfLMWfOHL11iyLYTJkyhcGGio2J1AUQFcaNGzdw+PBhbNy4EcOHD0dYWBgmTZpUojVotVpkZWVBpVKV6H4LYuXKlahfvz4GDhyIzz77DGlpabC0tJS6rGITERGBLVu24Ntvv8Vnn32m997cuXORmJgoTWEGkMvlkh1L3377LWxtbREREQE7Ozu99+Li4iSpiaiweMaGyqSwsDDY29ujU6dO6N69u97ZCLVaDQcHBwwePDjPdsnJyVCpVPjoo490yzIzMzFp0iRUq1YNSqUSHh4e+OSTT5CZmam3rUwmw+jRoxEWFgZ/f38olUrs2LEDAPD999+jadOmcHR0hLm5OQICArB+/fo8+3/8+DHGjh0LJycnWFtbo0uXLrhz5w5kMhkmT56st+6dO3fwzjvvwMXFBUqlEv7+/vj1118N7qPHjx/jjz/+QK9evdCjRw88fvwYf/31V77rrlu3Dn5+flCpVKhVqxb++OMPDBo0CN7e3nrrabVazJ49G/7+/lCpVHBxccHw4cORkJBgcF3Xr19HSEgILC0t4e7ujq+++gpCCACAEALe3t7o2rVrnu0yMjJga2uL4cOHP7fta9euAQCCgoLyvKdQKODo6Kh7PXnyZMhkMly+fBn9+vWDra0tKlSogC+//BJCCNy6dQtdu3aFjY0NXF1d8cMPP+RpMy4uDkOGDIGLiwtUKhXq1q2L5cuXv7QPhBB49913YWZmho0bNwLIf4xNy5YtUatWLVy4cAGtWrWChYUFKlasiBkzZuRp8+bNm+jSpQssLS3h7OyM8ePHY+fOnQaN27l27Rr8/f3zhBoAcHZ21v2/TCZDWloali9fDplMBplMhkGDBun2/95778HHxwfm5uZwdHTE22+/rXdmZtmyZXj77bcBAK1atdK1kVtffr8HAODt7a3bD5DzOz5lyhRUr14dKpUKjo6OaNasGXbv3v3C75PKCUFUBvn6+oohQ4YIIYQ4cOCAACCOHz+ue/+dd94RdnZ2IjMzU2+75cuXCwAiIiJCCCGERqMR7du3FxYWFuL9998XCxYsEKNHjxYmJiaia9euetsCEDVr1hQVKlQQU6ZMEfPmzRP//vuvEEKISpUqiffee0/MnTtXzJw5UzRq1EgAEFu2bNFro0ePHgKA6N+/v5g3b57o0aOHqFu3rgAgJk2apFvv3r17olKlSsLDw0N89dVX4pdffhFdunQRAMSsWbMM6qPVq1cLmUwmYmJihBBCtG7dWnTs2DHPelu2bBEymUzUqVNHzJw5U3z55ZfC3t5e1KpVS3h5eemtO3ToUGFiYiKGDRsm5s+fLyZMmCAsLS1Fw4YNRVZW1gvrGThwoFCpVKJ69eqif//+Yu7cueKNN94QAMSXX36pW+/zzz8Xpqam4tGjR3rbr127VgAQBw4ceO4+Dh8+LACIYcOGCbVa/cJ6Jk2aJACIevXqid69e4uff/5ZdOrUSQAQM2fOFD4+PmLkyJHi559/FkFBQQKA2L9/v2779PR0UbNmTWFqairGjx8vfvzxR9G8eXMBQMyePVu33o0bNwQA8X//939CCCGys7PFgAEDhFKp1Ds+9u7dKwCIvXv36pa1aNFCuLu7Cw8PDzFu3Djx888/i9atWwsAYtu2bbr1UlNTRZUqVYS5ubn49NNPxezZs0WjRo10x9bTbeanffv2wtraWpw9e/aF661YsUIolUrRvHlzsWLFCrFixQpx+PBhIYQQ69atE3Xr1hX/+9//xMKFC8Vnn30m7O3thZeXl0hLSxNCCHHt2jUxduxYAUB89tlnujbu3bsnhBB5fg9yeXl5iYEDB+pef/bZZ0Imk4lhw4aJRYsWiR9++EH07t1bTJ8+/YX1U/nAYENlzokTJwQAsXv3biGEEFqtVlSqVEmMGzdOt87OnTsFALF582a9bTt27CiqVKmie71ixQohl8vFwYMH9dabP3++ACAOHTqkWwZAyOVycf78+Tw1paen673OysoStWrVEq1bt9YtO3nypAAg3n//fb11Bw0alOcDfciQIcLNzU08fPhQb91evXoJW1vbPPvLzxtvvCGCgoJ0rxcuXChMTExEXFyc3nq1a9cWlSpVEikpKbpl+/btEwD0gs3BgwcFABEWFqa3/Y4dO/Jd/qyBAwcKAGLMmDG6ZVqtVnTq1EmYmZmJBw8eCCGEuHTpkgAgfvnlF73tu3TpIry9vYVWq33uPrRarWjRooUAIFxcXETv3r3FvHnzxM2bN/Osmxts3n33Xd2y7OxsUalSJSGTyfT+SCYkJAhzc3O9P66zZ88WAMTKlSt1y7KyskRgYKCwsrISycnJQgj9YKNWq0XPnj2Fubm52Llzp149zws2AMRvv/2mW5aZmSlcXV1FaGiobtkPP/wgAIg///xTt+zx48fC19fXoGCza9cuoVAohEKhEIGBgeKTTz4RO3fuzDesWlpa6vVDrvyOySNHjuSpf926dc+tydBgU7duXdGpU6cXfk9UfvFSFJU5YWFhcHFxQatWrQDknL7u2bMnVq9eDY1GAwBo3bo1nJycsGbNGt12CQkJ2L17N3r27Klbtm7dOtSsWRO+vr54+PCh7qt169YAgL179+rtu0WLFvDz88tTk7m5ud5+kpKS0Lx5c0RGRuqW5162eu+99/S2HTNmjN5rIQQ2bNiAzp07QwihV1dISAiSkpL02s3Po0ePsHPnTvTu3Vu3LDQ0FDKZDGvXrtUtu3v3Ls6ePYsBAwbo3YrdokUL1K5dW6/NdevWwdbWFu3atdOrKSAgAFZWVnn66nlGjx6t+//cy3tZWVnYs2cPAKBGjRpo3Lix3uXF+Ph4bN++HX379oVMJntu2zKZDDt37sQ333wDe3t7/P777xg1ahS8vLzQs2fPfMfYDB06VPf/CoUCDRo0gBACQ4YM0S23s7ODj48Prl+/rlu2bds2uLq66vWxqakpxo4di9TUVOzfv19vP1lZWXj77bexZcsWbNu2De3btzegtwArKyv069dP99rMzAyNGjXSq2XHjh2oWLEiunTpolumUqkwbNgwg/bRrl07HDlyBF26dMHp06cxY8YMhISEoGLFiti0aZNBbTz9O6BWq/Ho0SNUq1YNdnZ2Lz1eC8rOzg7nz5/HlStXirRdMg4MNlSmaDQarF69Gq1atcKNGzdw9epVXL16FY0bN8b9+/fx999/AwBMTEwQGhqKv/76SzdWZuPGjVCr1XrB5sqVKzh//jwqVKig91WjRg0AeQdOVq5cOd+6tmzZgiZNmkClUsHBwQEVKlTAL7/8gqSkJN06N2/ehFwuz9NGtWrV9F4
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
|
"average_age = data.groupby(\"smoking_status\")[\"age\"].mean()\n",
|
|
|
|
|
"average_age.plot(\n",
|
|
|
|
|
" kind=\"line\",\n",
|
|
|
|
|
" marker=\"o\",\n",
|
|
|
|
|
" title=\"Average Age by Smoking Status\",\n",
|
|
|
|
|
" xlabel=\"Smoking Status\",\n",
|
|
|
|
|
" ylabel=\"Average Age\",\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"plt.grid(True)\n",
|
|
|
|
|
"plt.show()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Визуализация - столбчатая диаграмма"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 62,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAAJOCAYAAACqS2TfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACdmklEQVR4nOzdd3xO9///8eeVLUhiZdk7sTWUoGYIoq1aNSqoVRUtSlFao1WqVdVSSmu0pS2Kqr1XxG4IxceOlpiV1MiQnN8ffrm+rsaWKxfJ43675dZe57zP+7zO5WQ8r/f7nGMyDMMQAAAAAABId3a2LgAAAAAAgMyK0A0AAAAAgJUQugEAAAAAsBJCNwAAAAAAVkLoBgAAAADASgjdAAAAAABYCaEbAAAAAAArIXQDAAAAAGAlhG4AAAAAAKyE0A0AyNQ6d+6sHDlyPLBd3bp1VbduXesXlIUVKVJEzZo1s3UZVle3bl2VK1fO1mUAAJ4ShG4AQLqKiopSq1atVLhwYbm4uCh//vxq2LChvvrqK1uX9tRKTEzUxIkTVblyZbm5ucnDw0Nly5ZVjx49dPjwYXO7bdu2acSIEbp69epj7+vrr7/WrFmznrzop9CFCxdkMpn09ttvp1n39ttvy2Qyafjw4WnWhYaGytHRUTdu3MiIMu9q48aNMplMD/UFAHi2ONi6AABA5rFt2zbVq1dPhQoVUvfu3eXt7a0zZ85o+/btmjhxovr06WPrEu9p9erVNtt3y5YttWLFCrVr107du3dXUlKSDh8+rKVLl6pGjRry8/OTdPv9HTlypDp37iwPD4/H2tfXX3+tvHnzqnPnzul3AE8JT09PlSxZUlu3bk2zLjw8XA4ODgoPD7/rusqVK8vV1TUjyrwrf39//fDDDxbLhgwZohw5cmjo0KE2qgoAkB4I3QCAdDN69Gi5u7tr165daULhhQsXbFPUQ3JycrLJfnft2qWlS5dq9OjReu+99yzWTZo06YlGtbOiWrVq6fvvv9e1a9fMlxVcv35d+/btU5s2bbRkyRIlJyfL3t5eknTu3DmdOHFCL7/88hPv+/r168qePftjbevl5aXXXnvNYtnYsWOVN2/eNMsBAM8WppcDANLN8ePHVbZs2buOwnp6elq8NplMCgsL0/z581WmTBlly5ZNgYGBioqKkiR98803KlGihFxcXFS3bl2dOnUqTZ/z589XQECAsmXLZg4nf//99wPrjIyMVL58+VS3bl1du3ZNUtprulOn+86bN0+jR49WgQIF5OLiogYNGujYsWNp+pw8ebKKFSumbNmy6fnnn9eWLVse6jrx48ePS5Jq1qyZZp29vb3y5MkjSRoxYoQGDhwoSSpatKh5qnHq+zJz5kzVr19fnp6ecnZ2VpkyZTRlyhSL/ooUKaKDBw9q06ZN5u1T6xsxYsRdpy7PmjXLYj+StHv3bgUHBytv3rzKli2bihYtqtdff/2+x3mn1atXq1KlSnJxcVGZMmW0cOFC87oTJ07IZDJpwoQJabbbtm2bTCaTfvrpp3v2XatWLSUnJ2v79u3mZTt27NCtW7c0YMAAXbt2TZGRkeZ1qSPftWrVMi97mPMq9V4Bx48fV9OmTZUzZ0516NDhvsfs6uqqdu3a6datW/d+c+7BMAwVKVLkrh8OxMfHy93dXT179pT0f+fuL7/8ovfee0/e3t7Knj27XnrpJZ05cybN9jt27FDjxo3l7u4uV1dX1alT564zAgAAj4fQDQBIN4ULF9aePXt04MCBh2q/ZcsWvfPOO+rUqZNGjBihQ4cOqVmzZpo8ebK+/PJLvfnmmxo4cKAiIiLShLpZs2apTZs2sre315gxY9S9e3ctXLhQtWrVuu/o8K5du1S/fn1VrlxZK1aseOBN1saOHatFixZpwIABGjJkiLZv354mXE2ZMkVhYWEqUKCAxo0bpxdeeEHNmzfXX3/99cD3oHDhwpKkOXPm3DeMtWjRQu3atZMkTZgwQT/88IN++OEH5cuXz1xD4cKF9d5772n8+PEqWLCg3nzzTU2ePNncxxdffKECBQrIz8/PvP2jTl2+cOGCGjVqpFOnTmnw4MH66quv1KFDB4uQez9Hjx7Vq6++qiZNmmjMmDFycHBQ69attWbNGklSsWLFVLNmTc2ZMyfNtnPmzFHOnDnvOyqdGp7vnGIeHh6uUqVKqXLlyipQoIBFoPxv6H6U8+rWrVsKDg6Wp6enPvvsM7Vs2fKuNS1dulQvvfSSWrdurR9//FEODo8+0dBkMum1117TihUrdOXKFYt1v//+u+Li4tKMiI8ePVrLli3ToEGD9NZbb2nNmjUKCgrSzZs3zW3Wr1+v2rVrKy4uTsOHD9fHH3+sq1evqn79+tq5c+cj1wkAuAsDAIB0snr1asPe3t6wt7c3AgMDjXfffddYtWqVkZiYmKatJMPZ2dk4efKkedk333xjSDK8vb2NuLg48/IhQ4YYksxtExMTDU9PT6NcuXLGzZs3ze2WLl1qSDI++OAD87JOnToZ2bNnNwzDMLZu3Wq4ubkZISEhRnx8vEU9derUMerUqWN+vWHDBkOS4e/vbyQkJJiXT5w40ZBkREVFGYZhGAkJCUaePHmMqlWrGklJSeZ2s2bNMiRZ9Hk3KSkpRp06dQxJhpeXl9GuXTtj8uTJxunTp9O0/fTTTy3ehzvduHEjzbLg4GCjWLFiFsvKli1715qGDx9u3O3PgpkzZ1rsc9GiRYYkY9euXfc9rrspXLiwIcn49ddfzctiY2MNHx8fo3LlyuZlqefBoUOHzMsSExONvHnzGp06dXrgfjw9PY0GDRqYXwcHBxtdunQxDMMw2rRpY7Ru3dq8rkqVKkbJkiXN+3iU80qSMXjw4DT7r1OnjlG2bFnDMAzj119/NRwdHY3u3bsbycnJD6z9Tv/9tzpy5IghyZgyZYpFu5deeskoUqSIkZKSYhjG/527+fPnt/g+mjdvniHJmDhxomEYt8+9kiVLGsHBweZtDeP2uVS0aFGjYcOGj1QvAODuGOkGAKSbhg0bKiIiQi+99JL27duncePGKTg4WPnz59eSJUvStG/QoIGKFClifl2tWjVJt28sljNnzjTLT5w4Ien29OYLFy7ozTfflIuLi7ldSEiI/Pz8tGzZsjT72rBhg4KDg9WgQQMtXLhQzs7OD3VMXbp0sbje+4UXXkhTy+XLl9W9e3eLEcwOHTooV65cD+zfZDJp1apV+uijj5QrVy799NNP6t27twoXLqxXX331oa/pzpYtm/n/Y2NjdenSJdWpU0cnTpxQbGzsQ/XxMFIvHVi6dKmSkpIeeXtfX1+98sor5tdubm4KDQ3VH3/8oZiYGElSmzZt5OLiYjHavWrVKl26dOmhrm+uWbOmduzYoeTkZKWkpGj79u2qUaOGeV3q6PaNGzcUGRlpHuV+nPOqV69e96zjp59+0quvvqqePXvqm2++kZ3dk/3ZVapUKVWrVs3ifbly5YpWrFihDh06pLk8IDQ01OL7qFWrVvLx8dHy5csl3b7M4ujRo2rfvr0uX76sS5cu6dKlS7p+/boaNGigzZs3KyUl5YlqBgAwvRwAkM6qVq2qhQsX6p9//tHOnTs1ZMgQ/fvvv2rVqpX+/PNPi7aFChWyeO3u7i5JKliw4F2X//PPP5Kk06dPS5JKly6dZv9+fn7m9ani4+MVEhKiypUra968eY9007T/1pgapP9bS4kSJSzaOTg4WHygcD/Ozs4aOnSoDh06pLNnz+qnn35S9erVNW/ePIWFhT1UH+Hh4QoKClL27Nnl4eGhfPnymW/Mlp6hu06dOmrZsqVGjhypvHnz6uWXX9bMmTOVkJDwUNuXKFEiTTgsVaqUJJmvG/fw8NCLL76ouXPnmtvMmTNH+fPnV/369R+4j1q1apmv3T5w4IBiY2PN18zXqFFDZ8+e1alTp8zXeqeG7kc9rxwcHFSgQIG71nDy5Em99tpratmypb766qt0e9RXaGiowsPDzbXMnz9fSUlJ6tixY5q2JUuWtHhtMplUokQJ8/t89OhRSVKnTp2UL18+i69vv/1WCQkJ6XruAEBWRegGAFi
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 1000x600 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"pivot_table = data.groupby([\"work_type\", \"smoking_status\"]).size().unstack()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"pivot_table.plot(kind=\"bar\", stacked=True, figsize=(10, 6))\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"plt.title(\"Smoking Status by Work Type\")\n",
|
|
|
|
|
"plt.xlabel(\"Work Type\")\n",
|
|
|
|
|
"plt.ylabel(\"Count\")\n",
|
|
|
|
|
"plt.xticks(rotation=45)\n",
|
|
|
|
|
"plt.legend(title=\"Smoking Status\")\n",
|
|
|
|
|
"plt.grid(axis='y')\n",
|
|
|
|
|
"plt.tight_layout() \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"plt.show()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Визуализация - Гистограмма"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 61,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAHHCAYAAABZbpmkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABCK0lEQVR4nO3de1xUdeL/8fcAw1WBRAFdQS0tZNU0r6RZJkFGbSZdTE0yNzfCMrG23C1Ty0xb7baabT/DeiRZbtaWa5qX0krKyxaVGmq6YcolVEDuA3N+f7TOtwktxIFhjq/n48Hj4XzOZ855nzkp786cM2MxDMMQAACASXm5OwAAAEBTouwAAABTo+wAAABTo+wAAABTo+wAAABTo+wAAABTo+wAAABTo+wAAABTo+wAAABTo+wAaNH++9//ymKxaNmyZU2+rWXLlslisei///2vY6xz58669tprm3zbkvTRRx/JYrHoo48+apbtAecKyg5wDli8eLEsFosGDhzo7iiyWCyOHx8fH7Vp00Z9+/bVlClTtHv3bpdtZ/Hixc1SkBqjJWcDzMjCd2MB5jd48GAdOXJE//3vf7Vv3z517drVbVksFouuuuoqjR8/XoZhqKSkRNnZ2Vq5cqXKy8s1b948paenO+YbhqHq6mpZrVZ5e3s3eDs9evRQ27Ztz+gsSV1dnWw2m/z8/GSxWCT9dGanR48eWr16dYPX09hsdrtdNTU18vX1lZcX/y8KuAp/mwCTO3jwoLZu3aqFCxeqXbt2Wr58ubsj6cILL9S4ceN02223afLkyXrppZf03XffqX///po2bZrWrFnjmGuxWOTv739GRedMlZeXS5K8vb3l7+/vKDrNzcvLS/7+/hQdwMX4GwWY3PLly3XeeecpKSlJN95442nLztGjR3XbbbcpODhYoaGhSklJUXZ29imvl/n222914403qk2bNvL391e/fv307rvvnlXOsLAwrVixQj4+PpozZ45j/FTX7OTn52vChAnq2LGj/Pz81L59e11//fWOa206d+6sXbt2afPmzY63zK644gpJ/3ddzubNm3X33XcrPDxcHTt2dFr282t2Tvrggw/Uu3dv+fv7KzY2VqtWrXJaPnPmzFOWpF+u89eyne6anZUrV6pv374KCAhQ27ZtNW7cOB0+fNhpzu23365WrVrp8OHDGjlypFq1aqV27drp/vvvV11d3W+8+oC5+bg7AICmtXz5co0aNUq+vr669dZb9cILL2j79u3q37+/Y47dbtd1112nbdu2KTU1VTExMfrXv/6llJSUeuvbtWuXBg8erN/97nd66KGHFBQUpDfffFMjR47UW2+9pRtuuKHRWaOjo3X55Zfrww8/VGlpqYKDg085Lzk5Wbt27dI999yjzp07q7CwUOvXr1dubq46d+6sZ555Rvfcc49atWqlv/71r5KkiIgIp3XcfffdateunWbMmOE4s3M6+/bt0y233KK77rpLKSkpysjI0E033aS1a9fqqquuOqN9bEi2n1u2bJkmTJig/v37a+7cuSooKNCzzz6rTz/9VF988YVCQ0Mdc+vq6pSYmKiBAwfqb3/7mzZs2KAFCxboggsuUGpq6hnlBEzFAGBaO3bsMCQZ69evNwzDMOx2u9GxY0djypQpTvPeeustQ5LxzDPPOMbq6uqMK6+80pBkZGRkOMaHDx9u9OzZ06iqqnKM2e1249JLLzW6dev2m5kkGWlpaaddPmXKFEOSkZ2dbRiGYRw8eNApw/Hjxw1JxlNPPfWr2/n9739vXH755fXGMzIyDEnGkCFDjNra2lMuO3jwoGOsU6dOhiTjrbfecoyVlJQY7du3N/r06eMYe/TRR41T/ZN6qnWeLtuHH35oSDI+/PBDwzAMo6amxggPDzd69OhhVFZWOuatXr3akGTMmDHDMZaSkmJIMmbPnu20zj59+hh9+/atty3gXMLbWICJLV++XBERERo2bJikn65/ueWWW7RixQqntzbWrl0rq9WqO++80zHm5eWltLQ0p/UdO3ZMmzZt0s0336wTJ06oqKhIRUVFOnr0qBITE7Vv3756b6+cqVatWkmSTpw4ccrlAQEB8vX11UcffaTjx483ejt33nlng68D6tChg9MZq+DgYI0fP15ffPGF8vPzG53ht+zYsUOFhYW6++675e/v7xhPSkpSTEyM/v3vf9d7zl133eX0+LLLLtOBAweaLCPgCSg7gEnV1dVpxYoVGjZsmA4ePKj9+/dr//79GjhwoAoKCrRx40bH3O+//17t27dXYGCg0zp+edfW/v37ZRiGHnnkEbVr187p59FHH5UkFRYWnlXusrIySVLr1q1PudzPz0/z5s3T+++/r4iICA0dOlTz588/49LRpUuXBs/t2rVrvetxLrzwQkk65fU9rvL9999Lki666KJ6y2JiYhzLT/L391e7du2cxs4777yzKoWAGXDNDmBSmzZtUl5enlasWKEVK1bUW758+XIlJCSc0Trtdrsk6f7771diYuIp55ztbe3ffPONvL29f7WM3Hfffbruuuv0zjvvaN26dXrkkUc0d+5cbdq0SX369GnQdgICAs4q5y+d7g6u5rw4uCnvWAM8GWUHMKnly5crPDxcixYtqrds1apVevvtt7VkyRIFBASoU6dO+vDDD1VRUeF0dmf//v1Ozzv//PMlSVarVfHx8S7PnJubq82bNysuLu60Z3ZOuuCCCzRt2jRNmzZN+/btU+/evbVgwQK99tprkk5fPhrj5Bmtn69z7969kn66u0r66QyKJBUXFztdNPzLsy9nkq1Tp06SpJycHF155ZVOy3JychzLAfw63sYCTKiyslKrVq3StddeqxtvvLHez+TJk3XixAnH7eKJiYmy2Wx66aWXHOuw2+31ilJ4eLiuuOIKvfjii8rLy6u33R9//LHRmY8dO6Zbb71VdXV1jruUTqWiokJVVVVOYxdccIFat26t6upqx1hQUJCKi4sbnefnjhw5orffftvxuLS0VK+++qp69+6tyMhIRwZJ2rJli2NeeXm5XnnllXrra2i2fv36KTw8XEuWLHHat/fff1979uxRUlJSY3cJOKdwZgcwoXfffVcnTpzQH/7wh1MuHzRokOMDBm+55RaNHDlSAwYM0LRp07R//37FxMTo3Xff1bFjxyQ5n4lYtGiRhgwZop49e+rOO+/U+eefr4KCAmVlZemHH35Qdnb2b+bbu3evXnvtNRmGodLSUscnKJeVlWnhwoW6+uqrf/W5w4cP180336zY2Fj5+Pjo7bffVkFBgUaPHu2Y17dvX73wwgt6/PHH1bVrV4WHh9c7O9JQF154oSZOnKjt27crIiJCL7/8sgoKCpSRkeGYk5CQoOjoaE2cOFEPPPCAvL299fLLL6tdu3bKzc11Wl9Ds1mtVs2bN08TJkzQ5ZdfrltvvdVx63nnzp01derURu0PcM5x891gAJrAddddZ/j7+xvl5eWnnXP77bcbVqvVKCoqMgzDMH788UdjzJgxRuvWrY2QkBDj9ttvNz799FNDkrFixQqn53733XfG+PHjjcjISMNqtRq/+93vjGuvvdb45z//+ZvZJDl+vLy8jNDQUKNPnz7GlClTjF27dtWb/8tbz4uKioy0tDQjJibGCAoKMkJCQoyBAwcab775ptPz8vPzjaSkJKN169aGJMet3idvBd++fXu9bZ3u1vOkpCRj3bp1Rq9evQw/Pz8jJibGWLlyZb3n79y50xg4cKDh6+trREdHGwsXLjzlOk+X7Ze3np/0xhtvGH369DH8/PyMNm3aGGPHjjV++OEHpzkpKSlGUFBQvUynuyUeOJfw3VgATuudd97RDTfcoE8++USDBw92dxwAaBTKDgBJP13n8/M7lOrq6pSQkKAdO3YoPz/f5XcvAUBz4ZodAJKke+65R5WVlYqLi1N1dbVWrVqlrVu36oknnqDoAPBonNkBIEnKzMzUggULtH//flVVValr165KTU3V5MmT3R0NAM4KZQcAAJgan7MDAABMjbIDAABMjQuU9dMnxR45ckStW7d26UfMAwCApmMYhk6cOKEOHTrIy+v0528
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"plt.hist(data[\"age\"], bins=10, edgecolor=\"black\")\n",
|
|
|
|
|
"plt.title(\"Age Distribution\")\n",
|
|
|
|
|
"plt.xlabel(\"Age\")\n",
|
|
|
|
|
"plt.ylabel(\"Frequency\")\n",
|
|
|
|
|
"plt.grid(axis=\"y\")\n",
|
|
|
|
|
"plt.show()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Визуализация - Ящик с усами"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 56,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0kAAAIjCAYAAADWYVDIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABU0klEQVR4nO3deVhV5f7//9cWZFIQRJlMRQ0FzQmcqUDFyBzLKXM2zY85nLThxNHUBrXMHCrTshI7TlmZxzTNjkOe1JxIswJFj6QnZ1NBxQnu3x/92N+1QxEM24LPx3XtS/e91rrXew83m9dea93YjDFGAAAAAABJUglnFwAAAAAAtxNCEgAAAABYEJIAAAAAwIKQBAAAAAAWhCQAAAAAsCAkAQAAAIAFIQkAAAAALAhJAAAAAGBBSAIAAAAAC0ISAPxF0tLSZLPZlJiY6OxSHKxatUr16tWTh4eHbDabzpw54+yS8i3nOZ08ebKzS/nL9e3bV6VLl77herGxsYqNjb31BQFAMUJIAlDkJCYmymazOdwCAgLUvHlzrVy58i+vZ/369Q61lCxZUlWrVlXv3r313//+t1D2sWnTJo0bN67QA8ypU6fUtWtXeXp6asaMGfrnP/+pUqVK3XC7d955RzabTY0bNy7Uem5nu3fvVufOnVW5cmV5eHioQoUKatWqld566y1nl3bbunz5sqZPn6769evLx8dHvr6+qlWrlp544gmlpKTY1yuM9/c777xz230BAaDocnV2AQBws1566SVVqVJFxhgdO3ZMiYmJeuihh/TFF1+obdu2f3k9w4cPV8OGDXXlyhUlJSXpvffe04oVK7R7926FhIT8qb43bdqkF198UX379pWvr2/hFCxp27ZtysjI0Msvv6y4uLh8bzd//nyFhoZq69at2rdvn+6+++5Cq+l2tGnTJjVv3lyVKlXSwIEDFRQUpEOHDum7777T9OnTNWzYMGeXeF2rV6922r47deqklStXqnv37ho4cKCuXLmilJQULV++XM2aNVN4eLikwnl/v/POOypXrpz69u1beA8AwB2LkASgyGrdurUaNGhgv//4448rMDBQCxcudEpIuu+++9S5c2dJUr9+/VS9enUNHz5cc+fOVUJCwl9eT34cP35ckgr0i+mBAwe0adMmLVmyRIMGDdL8+fM1duzYW1Th7WH8+PEqU6aMtm3bluu5ynkOb1dubm5O2e+2bdu0fPlyjR8/Xv/4xz8clr399ttF6rROAHceTrcDUGz4+vrK09NTrq6O3/+cP39eTz/9tCpWrCh3d3fVqFFDkydPljFGkpSZmanw8HCFh4crMzPTvt1vv/2m4OBgNWvWTFlZWQWup0WLFpJ+DxV5Wbt2re677z6VKlVKvr6+6tChg5KTk+3Lx40bp2effVaSVKVKFftpfWlpaXn2+8knnygqKkqenp4qV66cevbsqV9//dW+PDY2Vn369JEkNWzYUDabLV/fws+fP19+fn5q06aNOnfurPnz519zvVOnTqlXr17206z69OmjXbt2XfO6rJSUFHXu3Flly5aVh4eHGjRooGXLlt2wFqupU6eqcuXK8vT0VExMjH788Uf7sjlz5shms+n777/Ptd2ECRPk4uLi8Nz80f79+1WrVq1rhsmAgACH+zabTUOHDtUnn3yimjVrytPTU02bNtXu3bslSe+++67uvvtueXh4KDY29pqv441eu+vZuXOnypcvr9jYWJ07d05S7muSck4PXbx4scaPH6+77rpLHh4eatmypfbt25erzxkzZqhq1ary9PRUo0aN9J///Cdf1znt379fkhQdHZ1rmYuLi/z9/SXd+P09Z84ctWjRQgEBAXJ3d1fNmjU1c+ZMh/5CQ0P1008/6ZtvvrFvn1PfuHHjZLPZctWQc9qu9fnfvn274uPjVa5cOXl6eqpKlSrq379/no8TQPHEkSQARdbZs2d18uRJGWN0/PhxvfXWWzp37px69uxpX8cYo/bt22vdunV6/PHHVa9ePX311Vd69tln9euvv2rq1Kny9PTU3LlzFR0drVGjRmnKlCmSpCFDhujs2bNKTEyUi4tLgevL+SUx55fBa/n3v/+t1q1bq2rVqho3bpwyMzP11ltvKTo6WklJSQoNDdUjjzyivXv3auHChZo6darKlSsnSSpfvvx1+01MTFS/fv3UsGFDTZw4UceOHdP06dO1ceNGff/99/L19dWoUaNUo0YNvffee/ZTF6tVq3bDxzV//nw98sgjcnNzU/fu3TVz5kxt27ZNDRs2tK+TnZ2tdu3aaevWrRo8eLDCw8P1r3/9yx7KrH766SdFR0erQoUKev7551WqVCktXrxYHTt21GeffaaHH374hjV99NFHysjI0JAhQ3Tx4kVNnz5dLVq00O7duxUYGKjOnTtryJAhmj9/vurXr5/r8cTGxqpChQrX7b9y5cravHmzfvzxR91zzz03rOc///mPli1bpiFDhkiSJk6cqLZt2+q5557TO++8oyeffFKnT5/WpEmT1L9/f61du9a+bX5eu2vZtm2b4uPj1aBBA/3rX/+Sp6dnnjW++uqrKlGihJ555hmdPXtWkyZNUo8ePbRlyxb7OjNnztTQoUN13333acSIEUpLS1PHjh3l5+enu+66K8/+K1euLOn35zc6OjrXlxc5bvT+njlzpmrVqqX27dvL1dVVX3zxhZ588kllZ2fbn99p06Zp2LBhKl26tEaNGiVJCgwMzLO+Pzp+/LgeeOABlS9fXs8//7x8fX2VlpamJUuWFKgfAMWEAYAiZs6cOUZSrpu7u7tJTEx0WHfp0qVGknnllVcc2jt37mxsNpvZt2+fvS0hIcGUKFHCbNiwwXzyySdGkpk2bdoN61m3bp2RZD788ENz4sQJc/jwYbNixQoTGhpqbDab2bZtmzHGmAMHDhhJZs6cOfZt69WrZwICAsypU6fsbbt27TIlSpQwvXv3tre9/vrrRpI5cODADeu5fPmyCQgIMPfcc4/JzMy0ty9fvtxIMmPGjLG35TyXOTXeyPbt240k8/XXXxtjjMnOzjZ33XWX+dvf/uaw3meffZbr+cvKyjItWrTI9Ry0bNnS1K5d21y8eNHelp2dbZo1a2bCwsLyrCfnOfX09DT/+9//7O1btmwxksyIESPsbd27dzchISEmKyvL3paUlJSrnmtZvXq1cXFxMS4uLqZp06bmueeeM1999ZW5fPlyrnVz3ovW1+rdd981kkxQUJBJT0+3tyckJDi8rgV57fr06WNKlSpljDHm22+/NT4+PqZNmzYOz6MxxsTExJiYmBj7/Zz3a0REhLl06ZK9ffr06UaS2b17tzHGmEuXLhl/f3/TsGFDc+XKFft6iYmJRpJDn9eSnZ1tYmJijCQTGBhounfvbmbMmGF++eWXXOvm9f6+cOFCrrb4+HhTtWpVh7ZatWpds6axY8eaa/26k/Pez9nn559/XqCxAKB443Q7AEXWjBkz9PXXX+vrr7/WvHnz1Lx5cw0YMMDhm98vv/xSLi4uGj58uMO2Tz/9tIwxDrPhjRs3TrVq1VKfPn305JNPKiYmJtd2eenfv7/Kly+vkJAQtWnTRufPn9fcuXMdrpuyOnLkiHbu3Km+ffuqbNmy9vY6deqoVatW+vLLL/O9b6vt27fr+PHjevLJJ+Xh4WFvb9OmjcLDw7VixYqb6lf6/ahAYGCgmjdvLun3U8u6deumRYsWOZySuGrVKpUsWVIDBw60t5UoUcL+zX+O3377TWvXrlXXrl2VkZGhkydP6uTJkzp16pTi4+OVmpqar9PMOnbs6HAkqFGjRmrcuLHDc9i7d28dPnxY69atc3g8np6e6tSpU579t2rVSps3b1b79u21a9cuTZo0SfHx8apQocI1Twts2bKlQkND7fdzZgHs1KmTvL29c7XnzIJ4M6/dunXrFB8fr5YtW2rJkiVyd3fP87Hk6Nevn8P1Svfdd1+uWk6dOqWBAwc6HAXq0aOH/Pz8bti/zWbTV199pVdeeUV+fn5auHChhgwZosqVK6tbt275vibJekQs5+hxTEyM/vvf/+rs2bP56iM/co7QLV+
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 1000x600 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"data = df[[\"age\", \"work_type\", \"smoking_status\"]].copy()\n",
|
|
|
|
|
"data.dropna(subset=[\"smoking_status\"], inplace=True)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"plt.figure(figsize=(10, 6))\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"box_data = [\n",
|
|
|
|
|
" data[data[\"smoking_status\"] == status][\"age\"]\n",
|
|
|
|
|
" for status in data[\"smoking_status\"].unique()\n",
|
|
|
|
|
"]\n",
|
|
|
|
|
"plt.boxplot(box_data)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"plt.xticks(\n",
|
|
|
|
|
" range(1, len(data[\"smoking_status\"].unique()) + 1),\n",
|
|
|
|
|
" list(data[\"smoking_status\"].unique()), )\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"plt.title(\"Box Plot of Age by Smoking Status\")\n",
|
|
|
|
|
"plt.xlabel(\"Smoking Status\")\n",
|
|
|
|
|
"plt.ylabel(\"Age\")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"plt.show()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Визуализация - диаграммы с областями"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 57,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkkAAAHHCAYAAACr0swBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gUVdvA4d9sS++9E0qA0MEGKL1IUREERQRBsYIKdqzo+4qKBQsq2AARVPTDRo/Ul16U3lMIJT2bnu3z/RGysiRANtklyXLu68qlO3vmzHN2l82TmTPPkWRZlhEEQRAEQRBsKOo7AEEQBEEQhIZIJEmCIAiCIAjVEEmSIAiCIAhCNUSSJAiCIAiCUA2RJAmCIAiCIFRDJEmCIAiCIAjVEEmSIAiCIAhCNUSSJAiCIAiCUA2RJAmCIAiCIFRDJEmCcJHp06cjSRK5ubn1HUqdlJSUMHHiRMLDw5EkiSlTptR3SDWWlpaGJEm8//77l223YcMGJEliw4YNVyewa9D8+fORJIndu3fXdyhOVflZ+uWXX+o7FKEBEUmSUGeff/45kiRx44031ncol2U2m5k3bx69evUiMDAQNzc3mjRpwoQJE+r1F8CMGTP47bffnNLv/Pnzeeyxx1i4cCFjx469ZFuDwcDHH39Mp06d8PX1xd/fnzZt2vDwww9z9OhRh8fmKg4cOMBdd91FXFwc7u7uREVF0b9/fz799FObdnV9jw8fPsz06dNJS0urW8AN1OOPP45CoSA/P99me35+PgqFAjc3N3Q6nc1zKSkpSJLESy+9dDVDraJXr15IknTFn+nTp9drnELtqOo7AKHxW7RoEU2aNGHnzp2cPHmS5s2b13dIVZSXlzN8+HBWrVpFjx49eOmllwgMDCQtLY0lS5awYMEC0tPTiY6OvuqxzZgxg7vuuothw4Y5tN9169Zx00038frrr1+x7YgRI1i5ciWjR4/moYcewmg0cvToUZYtW0a3bt1o1aqVQ2NzlB49elBeXo5Go7nqx966dSu9e/cmNjaWhx56iPDwcE6fPs327dv5+OOPeeKJJ6xt6/oeHz58mDfeeINevXrRpEkTxwygAbn55pv54osv2LJlC7fddpt1+9atW1EoFBiNRnbv3s3NN99sfW7Lli3WfevTyy+/zMSJE62Pd+3axSeffMJLL71E69atrdvbt29fH+EJdSSSJKFOUlNT2bp1K0uXLuWRRx5h0aJFNfqlbDKZsFgsV+2X23PPPceqVauYNWtWlctOr7/+OrNmzboqcVSSZRmdToeHh4fTjpGdnU1iYuIV2+3atYtly5bx1ltvVfmrfPbs2RQUFDgpwrpTKBS4u7vXy7Hfeust/Pz82LVrF/7+/jbPZWdn10tMjVVlorN582abJGnLli20b9+e8vJyNm/ebJMQbd68GYVCQbdu3ep07Mrvotrq37+/zWN3d3c++eQT+vfvT69eveoUm1D/xOU2oU4WLVpEQEAAQ4YM4a677mLRokVV2lw4v+Sjjz6iWbNmuLm5cfjwYQCOHj3KXXfdRWBgIO7u7lx33XX88ccfNn3k5+fz7LPP0q5dO7y9vfH19WXQoEHs27fvijGeOXOGuXPn0r9//2rn5SiVSp599tkqZ5EKCgoYP348/v7++Pn5MWHCBMrKymzazJs3jz59+hAaGoqbmxuJiYl88cUXVY7RpEkThg4dyurVq7nuuuvw8PBg7ty5SJJEaWkpCxYssJ6WHz9+/GXHk52dzYMPPkhYWBju7u506NCBBQsWWJ+vnFuRmprK8uXLrf1e6lJNcnIyAN27d6/2tQkKCrI+rpyvdfz4ce677z78/PwICQnh1VdfRZZlTp8+zR133IGvry/h4eF88MEHdsd/KbIs8/DDD6PRaFi6dKnNWC+ck9SrVy/atm3L4cOH6d27N56enkRFRTFz5swqfZ46dYrbb78dLy8vQkNDmTp1KqtXr67RPKfk5GTatGlTJUECCA0Ntf7/5d7jU6dO8fjjj9OyZUs8PDwICgpi5MiRNu/V/PnzGTlyJAC9e/e29lEZ36Uu5TRp0sTms2Q0GnnjjTdo0aIF7u7uBAUFcfPNN5OUlHTZcVYqKyvjkUceISgoCF9fX8aNG4dWq7U+f//99xMcHIzRaKyy74ABA2jZsuUl+46NjSUmJsZ6dqjSli1b6N69O926dav2uQtf/5p8rq70XXQxvV7P0KFD8fPzY+vWrZd9fS5l3rx5SJLEP//8U+W5GTNmoFQqOXv2LPDvZ3fPnj1069YNDw8P4uPjmTNnTrWxvf766zRv3hw3NzdiYmJ4/vnn0ev1tYpTuARZEOqgVatW8oMPPijLsixv2rRJBuSdO3fatElNTZUBOTExUW7atKn8zjvvyLNmzZJPnTolHzx4UPbz85MTExPld999V549e7bco0cPWZIkeenSpdY+du3aJTdr1kx+8cUX5blz58pvvvmmHBUVJfv5+clnz569bIxffvmlDMjfffddjcb0+uuvy4DcqVMnefjw4fLnn38uT5w4UQbk559/3qbt9ddfL48fP16eNWuW/Omnn8oDBgyQAXn27Nk27eLi4uTmzZvLAQEB8osvvijPmTNHXr9+vbxw4ULZzc1NvuWWW+SFCxfKCxculLdu3XrJ2MrKyuTWrVvLarVanjp1qvzJJ5/It9xyiwzIH330kSzLspyZmSkvXLhQDg4Oljt27Gjtt6SkpNo+t27dKgPyQw89JBuNxhq9Nh07dpRHjx4tf/755/KQIUNkQP7www/lli1byo899pj8+eefy927d5cBeePGjXbFL8v/fmbee+89WZZl2WQyyePGjZPd3NzkZcuWWdutX79eBuT169dbt/Xs2VOOjIyUY2Ji5Keeekr+/PPP5T59+siAvGLFCmu7kpISuWnTprKHh4f84osvyh999JF8ww03yB06dKjSZ3UGDBgg+/j4yAcOHLhsu8u9xz///LPcoUMH+bXXXpO//PJL+aWXXpIDAgLkuLg4ubS0VJZlWU5OTpaffPJJGZBfeuklax+ZmZmyLMsyIL/++utVjhsXFyfff//91scvvfSSLEmS/NBDD8lfffWV/MEHH8ijR4+W33nnncvGP2/ePBmQ27VrJ99yyy3yJ598Ik+aNElWKBRyjx49ZIvFIsuyLCclJcmA/Oeff9rsn5GRISuVSvnNN9+87HFGjx4tu7m5yTqdTpZlWdbr9bK7u7u8ePFi+euvv5YDAwOtx8rPz5clSZIfe+wxWZbt/1xV911U+Vn6+eefrX32799fDggIqPKddjk///yzzeenqKhI9vDwkJ955pkqbRMTE+U+ffpYH1d+dkNDQ+XJkyfLn3zyiXzzzTfLgPzNN99Y25nNZnnAgAGyp6enPGXKFHnu3Lny5MmTZZVKJd9xxx01jlW4MpEkCbW2e/duGZCTkpJkWZZli8UiR0dHy0899ZRNu8ovJl9fXzk7O9vmub59+8rt2rWzfjFW9tOtWze5RYsW1m06nU42m81V+nVzc7vil+/UqVNlQP7nn39qNK7KROCBBx6w2X7nnXfKQUFBNtvKysqq7D9w4EC5adOmNtvi4uJkQF61alWV9l5eXja/zC7no48+kgH5+++/t24zGAxy165dZW9vb7moqMjmmEOGDLlinxaLRe7Zs6cMyGFhYfLo0aPlzz77TD516lSVtpWvzcMPP2zdZjKZ5OjoaFmSJJtfuFqtVvbw8LAZW03jvzBJMhqN8t133y17eHjIq1evtonnUknSxUmxXq+Xw8PD5REjRli3ffDBBzIg//bbb9Zt5eXlcqtWrWqUJK1Zs0ZWKpWyUqmUu3btKj///PPy6tWrZYPBUKXtpd7j6j4/27ZtqxL/xb94L1TTJKlDhw41+jxcrDJJ6tKli83YZs6cKQPy77//LstyxS/u6Oho+e6777bZ/8MPP5QlSZJTUlIue5zPPvtMBuT//e9/siz/+zqcOnVKPnz4sAzIhw4dkmVZlpctWyYD8qJFi2RZtv9zVd130YVJUnFxsdyzZ085ODi4xt8blap7r0aPHi1HRkbafIf9/fffMiDPmzfPuq3
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"data = df[[\"age\", \"work_type\", \"smoking_status\"]].copy()\n",
|
|
|
|
|
"data.dropna(subset=[\"smoking_status\"], inplace=True)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"grouped_data = (\n",
|
|
|
|
|
" data.groupby([\"work_type\", \"smoking_status\"]).size().unstack(fill_value=0)\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"grouped_data.plot(kind=\"area\", alpha=0.5, stacked=True)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"plt.title(\"Area Chart of Smoking Status by Work Type\")\n",
|
|
|
|
|
"plt.xlabel(\"Work Type\")\n",
|
|
|
|
|
"plt.ylabel(\"Number of Observations\")\n",
|
|
|
|
|
"plt.legend(title=\"Smoking Status\")\n",
|
|
|
|
|
"plt.grid(True)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"plt.show()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Визуализация - диаграммы рассеяния"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 58,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAj4AAAHHCAYAAAC/R1LgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOy9d5gkZ3W3fVfuPDnszuagtFoFJESQACW0CAmBAUvAhxH5NYgMBmPAJAPGxmD8YgOvjQEjCUkgEBklhAAlLBTQ7krapE2TU+eu/Hx/PN2tmZ3ZMJtX+9zXNdJOV3V1VXX11K/P+Z1zNCGEQKFQKBQKheI4QD/SO6BQKBQKhUJxuFDCR6FQKBQKxXGDEj4KhUKhUCiOG5TwUSgUCoVCcdyghI9CoVAoFIrjBiV8FAqFQqFQHDco4aNQKBQKheK4QQkfhUKhUCgUxw1K+CgUCoVCoThuUMJHoVAo9pPf/va3aJrGb3/72yO9K8c16n1QzAUlfBTHJd/5znfQNG3aT3d3NxdccAG/+tWvZqzfWOetb33rrNv72Mc+1lxnbGys+fgb3/hGMpnMITuO/eHKK69E0zQ+8pGPHOldOSopFot87nOf4+yzz6alpQXHcVi8eDFXXXUVv/jFL4707h0xjsZrWaHYH5TwURzXfOYzn+F73/se//M//8OHP/xhRkdHeelLX8rPf/7zGesmEgluvvlmfN+fsez73/8+iUTicOzyAVEsFvnZz37GkiVL+P73v48a1TedTZs2ceaZZ/LJT36SpUuX8tnPfpavf/3rvPnNb2br1q1cfvnlfO973zvSu6lQKA4A80jvgEJxJLn00ks5++yzm7+/5S1voaenh+9///tcfvnl09Z9yUtewk9/+lN+9atf8fKXv7z5+L333stTTz3Fq171Km6++ebDtu/7w80330wURfz3f/83F154Ib/73e940YtedFj3wXVdbNtG14+u711hGPIXf/EXDA8Pc/fdd3PuuedOW/7JT36S2267jSiKjtAeKhSKg8HR9ZdHoTjCtLa2kkwmMc2Z3wn6+vp44QtfyPXXXz/t8euuu47Vq1dz6qmn7tdrfulLX0LTNLZt2zZj2Uc/+lFs22ZychKAjRs38qpXvYre3l4SiQQLFizgNa95DYVCYZ9e67rrruPFL34xF1xwASeffDLXXXddc9mDDz6Ipml897vfnfG8W2+9FU3TpkXC+vv7efOb30xPTw+O47Bq1Sr++7//e9rzGt6LG264gY9//OP09fWRSqUoFotMTEzwoQ99iNWrV5PJZMjlclx66aU8+uijM15/27ZtXHHFFaTTabq7u3n/+9/f3KddfR0PPPAAL3nJS2hpaSGVSvGiF72Ie+65Z6/n5gc/+AFr167lE5/4xAzR0+CSSy7h0ksv3eN2lixZwhvf+MYZj59//vmcf/750x5zXZdPfepTnHDCCSQSCebNm8crX/lKNm/e3FynUqnwwQ9+kIULF+I4DieeeCJf+tKXZkTrbr/9ds477zxaW1vJZDKceOKJ/N3f/d20dTzP45Of/CQrVqzAcRwWLlzIhz/8YTzP2+MxzYW9nf8f/vCHaJrG3XffPeO53/zmN9E0jbVr1zYfe+KJJ3j1q19Ne3s7iUSCs88+m5/+9KcHbX8Vxx8q4qM4rikUCoyNjSGEYGRkhP/7f/8v5XKZ17/+9bOu/7rXvY73vve9lMtlMpkMYRjygx/8gA984AO4rrtf+3DllVfy4Q9/mJtuuom/+Zu/mbbspptu4pJLLqGtrQ3f91mzZg2e5/Hud7+b3t5e+vv7+fnPf04+n6elpWWPrzMwMMBdd93VFDavfe1r+cpXvsLXvvY1bNvm7LPPZtmyZdx0001cffXV055744030tbWxpo1awAYHh7muc99Lpqm8a53vYuuri5+9atf8Za3vIViscj73ve+ac//7Gc/i23bfOhDH8LzPGzbZv369dxyyy385V/+JUuXLmV4eJhvfvObvOhFL2L9+vXMnz8fkDf+Cy+8kMHBQd773vfS29vL9ddfz1133TXjGH/zm99w6aWXctZZZ/HJT34SXdf59re/zYUXXsjvf/97zjnnnN2en5/97GcAu33vDzZRFHH55Zdz55138prXvIb3vve9lEolbr/9dtauXcvy5csRQnDFFVdw11138Za3vIUzzjiDW2+9lb/5m7+hv7+fr3zlKwCsW7eOyy+/nNNOO43PfOYzOI7Dpk2bpgmOOI654oor+MMf/sDb3/52Tj75ZB577DG+8pWvsGHDBm655ZYDPqZ9Of+XXXYZmUyGm266aUa08cYbb2TVqlXNLxHr1q3j3HPPpa+vj7/9278lnU5z00038YpXvIKbb76Zv/iLvzjgfVYchwiF4jjk29/+tgBm/DiOI77zne/MWB8Q11xzjZiYmBC2bYvvfe97QgghfvGLXwhN08TWrVvFJz/5SQGI0dHR5vOuvvpqkU6n97o/z3ve88RZZ5017bE//vGPAhD/8z//I4QQ4uGHHxaA+MEPfrBfx/ylL31JJJNJUSwWhRBCbNiwQQDixz/+cXOdj370o8KyLDExMdF8zPM80draKt785jc3H3vLW94i5s2bJ8bGxqa9xmte8xrR0tIiqtWqEEKIu+66SwBi2bJlzccauK4roiia9thTTz0lHMcRn/nMZ5qP/cu//IsAxC233NJ8rFariZNOOkkA4q677hJCCBHHsVi5cqVYs2aNiOO4uW61WhVLly4VL37xi/d4fs4880zR2to64/FyuSxGR0ebP4VCobmscXyNfRBCiMWLF4urr756xnZe9KIXiRe96EXN3//7v/9bAOLLX/7yjHUb+3/LLbcIQPzDP/zDtOWvfvWrhaZpYtOmTUIIIb7yla/MuPZ25Xvf+57QdV38/ve/n/b4N77xDQGIe+65Z7fPFWLv1/Jczv9rX/ta0d3dLcIwbD42ODgodF2f9t5fdNFFYvXq1cJ13Wmv8/znP1+sXLmy+dhs74NCsTtUqktxXPPv//7v3H777dx+++1ce+21XHDBBbz1rW/lRz/60azrt7W18ZKXvITvf//7AFx//fU8//nPZ/HixQe0H1dddRV/+tOfpqU4brzxRhzHafqJGhGdW2+9lWq1OufXuO6667jsssvIZrMArFy5krPOOmtauuuqq64iCIJpx3/bbbeRz+e56qqrABBCcPPNN/Oyl70MIQRjY2PNnzVr1lAoFHjooYemvfbVV19NMpmc9pjjOE2fTxRFjI+PN1M0U5//61//mr6+Pq644ormY4lEgre97W3TtvfII4+wceNGXve61zE+Pt7cp0qlwkUXXcTvfvc74jje7fkpFouzVi197GMfo6urq/nzute9brfbmAs333wznZ2dvPvd756xTNM0AH75y19iGAbvec97pi3/4Ac/iBCiWYHY2toKwE9+8pPdHuMPfvADTj75ZE466aRp79mFF14IMGsEbS7M5fxfddVVjIyMTEtT/vCHPySO4+Z1NjExwW9+8xuuvPJKSqVSc3vj4+OsWbOGjRs30t/ff0D7rDhOOcLCS6E4IjQiPv/7v/877fEoisRpp50m5s2bJzzPaz5OPeIjhBA33HCDsCxLbNu2TaTTafHv//7vQghxQBGf/v5+oeu6+NznPieEkN9qFy1aJF7xildMW+8DH/iAAEQymRSXXHKJ+NrXviby+fxet79+/XoBiK985Sti48aNzZ8PfvCDIpFITItinHTSSdO+nb/+9a8XnZ2dIggCIYQQw8PDs0bLpv786Ec/EkI8/U28EbWaShRF4stf/rJYsWKFMAxj2vMvuOCC5nonnHCCeOELXzjj+T/5yU+mfcu/8cYb97pfUyNZu3LGGWfMGvF58sknxe233y5uv/120dPTIy677LLmsgOJ+Jx00kni3HPP3e3+CCHEmjVrxMKFC2c8ns/nBSA+9KEPCSFkVOXcc88VgOjs7BRXXXWVuPHGG6dF1E4++eQ9npv3vOc9e9yXvV3Lczn/ruuKlpYW8ba3va35/PPOO0+cccYZzd8feOCBvW7voYceEkKoiI9ibiiPj0IxBV3XueC
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"plt.scatter(df[\"bmi\"], df[\"avg_glucose_level\"], alpha=0.5)\n",
|
|
|
|
|
"plt.title(\"BMI vs Average Glucose Level\")\n",
|
|
|
|
|
"plt.xlabel(\"BMI\")\n",
|
|
|
|
|
"plt.ylabel(\"Average Glucose Level\")\n",
|
|
|
|
|
"plt.grid(True)\n",
|
|
|
|
|
"plt.show()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Визуализация - круговая диаграмма"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 59,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoAAAAH4CAYAAADaVFwSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABWBUlEQVR4nO3dd3hUVeLG8XfSe2gphBY6SEcQURAVEBDsrh0BBcvqKqyua1vXsro/14a7rl1BQcW6FjoKSlERkCbSIaEFkpDeMzP390ckGgkQkknO3Jnv53nmgcxM7rwJKS/n3HOuw7IsSwAAAPAbAaYDAAAAoGFRAAEAAPwMBRAAAMDPUAABAAD8DAUQAADAz1AAAQAA/AwFEAAAwM9QAAEAAPwMBRAAAMDPUAABH/Dwww/L4XA0yGudffbZOvvssyvf/vrrr+VwOPTRRx81yOuPHz9eycnJDfJatVVQUKCJEycqMTFRDodDkydPNh2pVqZPny6Hw6GUlBTTUQB4GAUQ8DJHfukeuYWFhSkpKUkjRozQv//9b+Xn53vkdQ4cOKCHH35Y69at88jxPMmbs9XEE088oenTp+vWW2/VjBkzNHbs2OM+3+126+2339bw4cPVrFkzBQcHKz4+Xuedd55effVVlZaWNlByAP4iyHQAANV79NFH1bZtW5WXl+vgwYP6+uuvNXnyZD377LP6/PPP1bNnz8rnPvjgg7r33ntP6vgHDhzQI488ouTkZPXu3bvG77dw4cKTep3aOF621157TW63u94z1MXixYt1+umn6+9///sJn1tcXKxLLrlECxYs0BlnnKG7775bCQkJysrK0jfffKM//vGPWrlypd54440GSA7AX1AAAS81atQo9evXr/Lt++67T4sXL9aYMWN04YUXavPmzQoPD5ckBQUFKSiofr+di4qKFBERoZCQkHp9nRMJDg42+vo1kZ6erlNOOaVGz50yZYoWLFigqVOn6s4776zy2F133aXt27dr0aJF9RGzwRUWFioyMtJ0DABiChiwlXPPPVd/+9vflJqaqpkzZ1beX905gIsWLdKgQYPUqFEjRUVFqXPnzrr//vslVZy3179/f0nShAkTKqebp0+fLqniPL/u3btrzZo1OuussxQREVH5vr8/B/AIl8ul+++/X4mJiYqMjNSFF16ovXv3VnlOcnKyxo8ff9T7/vaYJ8pW3TmAhYWFuuuuu9SqVSuFhoaqc+fOevrpp2VZVpXnORwO3X777fr000/VvXt3hYaGqlu3bpo/f371n/DfSU9P14033qiEhASFhYWpV69eeuuttyofP3I+5O7duzVnzpzK7Mc6h27v3r16/fXXNXLkyKPK3xEdO3bUH//4xyr3ud1uTZ06Vd26dVNYWJgSEhJ08803Kzs7u8rzkpOTNWbMGC1fvlynnXaawsLC1K5dO7399ttHvc6mTZt07rnnKjw8XC1bttQ//vGPY460zps3T4MHD1ZkZKSio6M1evRobdq0qcpzxo8fr6ioKO3cuVPnn3++oqOjde2111Z7PAANjxFAwGbGjh2r+++/XwsXLtSkSZOqfc6mTZs0ZswY9ezZU48++qhCQ0O1Y8cOrVixQpLUtWtXPfroo3rooYd00003afDgwZKkM844o/IYhw8f1qhRo3TVVVfpuuuuU0JCwnFzPf7443I4HPrrX/+q9PR0TZ06VcOGDdO6desqRyproibZfsuyLF144YVasmSJbrzxRvXu3VsLFizQX/7yF+3fv1/PPfdclecvX75cn3zyif74xz8qOjpa//73v3XZZZdpz549atq06TFzFRcX6+yzz9aOHTt0++23q23btvrwww81fvx45eTk6M4771TXrl01Y8YMTZkyRS1bttRdd90lSYqLi6v2mPPmzZPL5dJ1111X48+PJN18882aPn26JkyYoDvuuEO7d+/WCy+8oLVr12rFihVVRkl37Nihyy+/XDfeeKPGjRunN998U+PHj9epp56qbt26SZIOHjyoc845R06nU/fee68iIyP16quvVvvvNmPGDI0bN04jRozQk08+qaKiIr300ksaNGiQ1q5dW6WcO51OjRgxQoMGDdLTTz+tiIiIk/o4AdQjC4BXmTZtmiXJWrVq1TGfExsba/Xp06fy7b///e/Wb7+dn3vuOUuSlZGRccxjrFq1ypJkTZs27ajHhgwZYkmyXn755WofGzJkSOXbS5YssSRZLVq0sPLy8irv/+CDDyxJ1vPPP195X5s2baxx48ad8JjHyzZu3DirTZs2lW9/+umnliTrH//4R5XnXX755ZbD4bB27NhReZ8kKyQkpMp969evtyRZ//nPf456rd+aOnWqJcmaOXNm5X1lZWXWwIEDraioqCofe5s2bazRo0cf93iWZVlTpkyxJFnr1q2rcn9paamVkZFRecvMzKx8bNmyZZYk65133qnyPvPnzz/q/jZt2liSrKVLl1bel56eboWGhlp33XVX5X2TJ0+2JFkrV66s8rzY2FhLkrV7927LsiwrPz/fatSokTVp0qQqr33w4EErNja2yv3jxo2zJFn33nvvCT8PABoeU8CADUVFRR13NXCjRo0kSZ999lmtF0yEhoZqwoQJNX7+9ddfr+jo6Mq3L7/8cjVv3lxz586t1evX1Ny5cxUYGKg77rijyv133XWXLMvSvHnzqtw/bNgwtW/fvvLtnj17KiYmRrt27Trh6yQmJurqq6+uvC84OFh33HGHCgoK9M0335x09ry8PEkV/56/f624uLjKW5s2bSof+/DDDxUbG6vhw4crMzOz8nbqqacqKipKS5YsqXKsU045pXIUVaoYjezcuXOVj3fu3Lk6/fTTddppp1V53u+nbBctWqScnBxdffXVVV47MDBQAwYMOOq1JenWW2896c8LgPpHAQRsqKCgoErZ+r0rr7xSZ555piZOnKiEhARdddVV+uCDD06qDLZo0eKkFnx07NixytsOh0MdOnSo9z3kUlNTlZSUdNTno2vXrpWP/1br1q2POkbjxo2POn+uutfp2LGjAgKq/tg81uvUxJHMBQUFVe4/88wztWjRIi1atEjnnXdelce2b9+u3NxcxcfHVymJcXFxKigoUHp6epXn1+TjPfKx/V7nzp2Pem2p4lzU37/2woULj3rtoKAgtWzZ8kSfBgAGcA4gYDP79u1Tbm6uOnTocMznhIeHa+nSpVqyZInmzJmj+fPn6/3339e5556rhQsXKjAw8ISvczLn7dXUsTardrlcNcrkCcd6Het3C0YaQpcuXSRJP/30k3r16lV5f1xcnIYNGyZJVRb7SBULQOLj4/XOO+9Ue8zfn2/oyY/3yH8gZsyYocTExKMe//1K9NDQ0KMKMwDvQAEEbGbGjBmSpBEjRhz3eQEBARo6dKiGDh2qZ599Vk888YQeeOABLVmyRMOGDfP4lUOOjA4dYVmWduzYUWW/wsaNGysnJ+eo901NTVW7du0q3z6ZbG3atNGXX36p/Pz8KqOAW7ZsqXzcE9q0aaMNGzbI7XZXKTV1eZ1Ro0YpMDBQ77zzTo1XyLZv315ffvmlzjzzTI+V9DZt2hz17ydJW7duPeq1JSk+Pr6yoAKwJ/5rBtjI4sWL9dhjj6lt27bHLQxZWVlH3XdkQ+UjV5U4sh9bdYWsNt5+++0q5yV+9NFHSktL06hRoyrva9++vb7//nuVlZVV3jd79uyjtos5mWznn3++XC6XXnjhhSr3P/fcc3I4HFVevy7OP/98HTx4UO+//37lfU6nU//5z38UFRWlIUOGnPQxW7durRtuuEHz5s07Kv8Rvx+pu+KKK+RyufTYY48d9Vyn01mrf8/zzz9f33//vX744YfK+zIyMo4aZRwxYoRiYmL0xBNPqLy8/KjjZGRknPRrAzCDEUDAS82bN09btmyR0+nUoUOHtHjxYi1atEht2rTR559/rrCwsGO+76OPPqqlS5dq9OjRatOmjdLT0/Xiiy+qZcuWGjRokKSKMtaoUSO9/PLLio6OVmRkpAYMGKC2bdvWKm+TJk00aNAgTZgwQYcOHdLUqVPVoUOHKlvVTJw4UR999JFGjhy
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 800x600 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"gender_counts = df[\"gender\"].value_counts()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"labels = [str(label) for label in gender_counts.index]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"plt.figure(figsize=(8, 6))\n",
|
|
|
|
|
"plt.pie(gender_counts, labels=labels, autopct=\"%1.1f%%\", startangle=90)\n",
|
|
|
|
|
"plt.title(\"Distribution of Gender\")\n",
|
|
|
|
|
"plt.axis(\"equal\")\n",
|
|
|
|
|
"plt.show()"
|
2024-09-13 21:28:22 +04:00
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
2024-09-20 17:38:49 +04:00
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
2024-09-13 21:28:22 +04:00
|
|
|
|
"name": "python",
|
2024-09-20 17:38:49 +04:00
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
2024-09-13 21:28:22 +04:00
|
|
|
|
"version": "3.12.6"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 2
|
|
|
|
|
}
|