766 lines
196 KiB
Plaintext
766 lines
196 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Работа с NumPy"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 1,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"matrix = \n",
|
|||
|
" [[4 5 0]\n",
|
|||
|
" [9 9 9]] \n",
|
|||
|
"\n",
|
|||
|
"tmatrix = \n",
|
|||
|
" [[4 9]\n",
|
|||
|
" [5 9]\n",
|
|||
|
" [0 9]] \n",
|
|||
|
"\n",
|
|||
|
"vector = \n",
|
|||
|
" [4 5 0 9 9 9] \n",
|
|||
|
"\n",
|
|||
|
"tvector = \n",
|
|||
|
" [[4]\n",
|
|||
|
" [5]\n",
|
|||
|
" [0]\n",
|
|||
|
" [9]\n",
|
|||
|
" [9]\n",
|
|||
|
" [9]] \n",
|
|||
|
"\n",
|
|||
|
"list_matrix = \n",
|
|||
|
" [array([4, 5, 0]), array([9, 9, 9])] \n",
|
|||
|
"\n",
|
|||
|
"matrix as str = \n",
|
|||
|
" [[4 5 0]\n",
|
|||
|
" [9 9 9]] \n",
|
|||
|
"\n",
|
|||
|
"matrix type is <class 'numpy.ndarray'> \n",
|
|||
|
"\n",
|
|||
|
"vector type is <class 'numpy.ndarray'> \n",
|
|||
|
"\n",
|
|||
|
"list_matrix type is <class 'list'> \n",
|
|||
|
"\n",
|
|||
|
"str_matrix type is <class 'str'> \n",
|
|||
|
"\n",
|
|||
|
"formatted_vector = \n",
|
|||
|
" 4; 5; 0; 9; 9; 9 \n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"matrix = np.array([[4, 5, 0], [9, 9, 9]])\n",
|
|||
|
"print(\"matrix = \\n\", matrix, \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"tmatrix = matrix.T\n",
|
|||
|
"print(\"tmatrix = \\n\", tmatrix, \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"vector = np.ravel(matrix)\n",
|
|||
|
"print(\"vector = \\n\", vector, \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"tvector = np.reshape(vector, (6, 1))\n",
|
|||
|
"print(\"tvector = \\n\", tvector, \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"list_matrix = list(matrix)\n",
|
|||
|
"print(\"list_matrix = \\n\", list_matrix, \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"str_matrix = str(matrix)\n",
|
|||
|
"print(\"matrix as str = \\n\", str_matrix, \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"print(\"matrix type is\", type(matrix), \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"print(\"vector type is\", type(vector), \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"print(\"list_matrix type is\", type(list_matrix), \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"print(\"str_matrix type is\", type(str_matrix), \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"formatted_vector = \"; \".join(map(str, vector))\n",
|
|||
|
"\n",
|
|||
|
"print(\"formatted_vector = \\n\", formatted_vector, \"\\n\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Работа с Pandas DataFrame\n",
|
|||
|
"\n",
|
|||
|
"https://pandas.pydata.org/docs/user_guide/10min.html"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Работа с данными - чтение и запись CSV"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 3,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"\n",
|
|||
|
"df = pd.read_csv(\"data/healthcare-dataset-stroke-data.csv\", index_col=\"id\")\n",
|
|||
|
"\n",
|
|||
|
"df.to_csv(\"test.csv\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Работа с данными - основные команды"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 20,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|||
|
"Index: 5110 entries, 9046 to 44679\n",
|
|||
|
"Data columns (total 11 columns):\n",
|
|||
|
" # Column Non-Null Count Dtype \n",
|
|||
|
"--- ------ -------------- ----- \n",
|
|||
|
" 0 gender 5110 non-null object \n",
|
|||
|
" 1 age 5110 non-null float64\n",
|
|||
|
" 2 hypertension 5110 non-null int64 \n",
|
|||
|
" 3 heart_disease 5110 non-null int64 \n",
|
|||
|
" 4 ever_married 5110 non-null object \n",
|
|||
|
" 5 work_type 5110 non-null object \n",
|
|||
|
" 6 Residence_type 5110 non-null object \n",
|
|||
|
" 7 avg_glucose_level 5110 non-null float64\n",
|
|||
|
" 8 bmi 4909 non-null float64\n",
|
|||
|
" 9 smoking_status 5110 non-null object \n",
|
|||
|
" 10 stroke 5110 non-null int64 \n",
|
|||
|
"dtypes: float64(3), int64(3), object(5)\n",
|
|||
|
"memory usage: 608.1+ KB\n",
|
|||
|
" count mean std min 25% 50% \\\n",
|
|||
|
"age 5110.0 43.226614 22.612647 0.08 25.000 45.000 \n",
|
|||
|
"hypertension 5110.0 0.097456 0.296607 0.00 0.000 0.000 \n",
|
|||
|
"heart_disease 5110.0 0.054012 0.226063 0.00 0.000 0.000 \n",
|
|||
|
"avg_glucose_level 5110.0 106.147677 45.283560 55.12 77.245 91.885 \n",
|
|||
|
"bmi 4909.0 28.893237 7.854067 10.30 23.500 28.100 \n",
|
|||
|
"stroke 5110.0 0.048728 0.215320 0.00 0.000 0.000 \n",
|
|||
|
"\n",
|
|||
|
" 75% max \n",
|
|||
|
"age 61.00 82.00 \n",
|
|||
|
"hypertension 0.00 1.00 \n",
|
|||
|
"heart_disease 0.00 1.00 \n",
|
|||
|
"avg_glucose_level 114.09 271.74 \n",
|
|||
|
"bmi 33.10 97.60 \n",
|
|||
|
"stroke 0.00 1.00 \n",
|
|||
|
" gender age hypertension work_type avg_glucose_level bmi \\\n",
|
|||
|
"id \n",
|
|||
|
"9046 Male 67.0 0 Private 228.69 36.6 \n",
|
|||
|
"51676 Female 61.0 0 Self-employed 202.21 NaN \n",
|
|||
|
"31112 Male 80.0 0 Private 105.92 32.5 \n",
|
|||
|
"60182 Female 49.0 0 Private 171.23 34.4 \n",
|
|||
|
"1665 Female 79.0 1 Self-employed 174.12 24.0 \n",
|
|||
|
"\n",
|
|||
|
" smoking_status stroke \n",
|
|||
|
"id \n",
|
|||
|
"9046 formerly smoked 1 \n",
|
|||
|
"51676 never smoked 1 \n",
|
|||
|
"31112 never smoked 1 \n",
|
|||
|
"60182 smokes 1 \n",
|
|||
|
"1665 never smoked 1 \n",
|
|||
|
" gender age hypertension work_type avg_glucose_level bmi \\\n",
|
|||
|
"id \n",
|
|||
|
"18234 Female 80.0 1 Private 83.75 NaN \n",
|
|||
|
"44873 Female 81.0 0 Self-employed 125.20 40.0 \n",
|
|||
|
"19723 Female 35.0 0 Self-employed 82.99 30.6 \n",
|
|||
|
"37544 Male 51.0 0 Private 166.29 25.6 \n",
|
|||
|
"44679 Female 44.0 0 Govt_job 85.28 26.2 \n",
|
|||
|
"\n",
|
|||
|
" smoking_status stroke \n",
|
|||
|
"id \n",
|
|||
|
"18234 never smoked 0 \n",
|
|||
|
"44873 never smoked 0 \n",
|
|||
|
"19723 never smoked 0 \n",
|
|||
|
"37544 formerly smoked 0 \n",
|
|||
|
"44679 Unknown 0 \n",
|
|||
|
" gender age hypertension work_type avg_glucose_level bmi \\\n",
|
|||
|
"id \n",
|
|||
|
"47350 Female 0.08 0 children 139.67 14.1 \n",
|
|||
|
"29955 Male 0.08 0 children 70.33 16.9 \n",
|
|||
|
"22877 Male 0.16 0 children 114.71 17.4 \n",
|
|||
|
"41500 Male 0.16 0 children 69.79 13.0 \n",
|
|||
|
"8247 Male 0.16 0 children 109.52 13.9 \n",
|
|||
|
"\n",
|
|||
|
" smoking_status stroke \n",
|
|||
|
"id \n",
|
|||
|
"47350 Unknown 0 \n",
|
|||
|
"29955 Unknown 0 \n",
|
|||
|
"22877 Unknown 0 \n",
|
|||
|
"41500 Unknown 0 \n",
|
|||
|
"8247 Unknown 0 \n",
|
|||
|
" gender age hypertension work_type avg_glucose_level bmi \\\n",
|
|||
|
"id \n",
|
|||
|
"38829 Female 82.0 0 Private 59.32 33.2 \n",
|
|||
|
"25510 Male 82.0 0 Self-employed 111.81 19.8 \n",
|
|||
|
"27705 Female 82.0 0 Self-employed 88.60 32.5 \n",
|
|||
|
"40163 Female 82.0 1 Private 222.52 NaN \n",
|
|||
|
"64778 Male 82.0 0 Private 208.30 32.5 \n",
|
|||
|
"\n",
|
|||
|
" smoking_status stroke \n",
|
|||
|
"id \n",
|
|||
|
"38829 never smoked 1 \n",
|
|||
|
"25510 formerly smoked 0 \n",
|
|||
|
"27705 Unknown 0 \n",
|
|||
|
"40163 formerly smoked 0 \n",
|
|||
|
"64778 Unknown 1 \n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df.info()\n",
|
|||
|
"\n",
|
|||
|
"print(df.describe().transpose())\n",
|
|||
|
"\n",
|
|||
|
"clear_df = df.drop([\"heart_disease\", \"ever_married\", \"Residence_type\"], axis=1)\n",
|
|||
|
"print(clear_df.head())\n",
|
|||
|
"print(clear_df.tail())\n",
|
|||
|
"\n",
|
|||
|
"sorted_df = clear_df.sort_values(by=\"age\")\n",
|
|||
|
"print(sorted_df.head())\n",
|
|||
|
"print(sorted_df.tail())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Работа с данными - работа с элементами"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"id\n",
|
|||
|
"9046 36.6\n",
|
|||
|
"51676 NaN\n",
|
|||
|
"31112 32.5\n",
|
|||
|
"60182 34.4\n",
|
|||
|
"1665 24.0\n",
|
|||
|
" ... \n",
|
|||
|
"18234 NaN\n",
|
|||
|
"44873 40.0\n",
|
|||
|
"19723 30.6\n",
|
|||
|
"37544 25.6\n",
|
|||
|
"44679 26.2\n",
|
|||
|
"Name: bmi, Length: 5110, dtype: float64\n",
|
|||
|
"gender Male\n",
|
|||
|
"age 67.0\n",
|
|||
|
"hypertension 0\n",
|
|||
|
"heart_disease 1\n",
|
|||
|
"ever_married Yes\n",
|
|||
|
"work_type Private\n",
|
|||
|
"Residence_type Urban\n",
|
|||
|
"avg_glucose_level 228.69\n",
|
|||
|
"bmi 36.6\n",
|
|||
|
"smoking_status formerly smoked\n",
|
|||
|
"stroke 1\n",
|
|||
|
"Name: 9046, dtype: object\n",
|
|||
|
"Male\n",
|
|||
|
" gender bmi\n",
|
|||
|
"id \n",
|
|||
|
"9046 Male 36.6\n",
|
|||
|
"51676 Female NaN\n",
|
|||
|
"31112 Male 32.5\n",
|
|||
|
"60182 Female 34.4\n",
|
|||
|
"1665 Female 24.0\n",
|
|||
|
"56669 Male 29.0\n",
|
|||
|
"53882 Male 27.4\n",
|
|||
|
" gender age hypertension heart_disease ever_married work_type \\\n",
|
|||
|
"id \n",
|
|||
|
"9046 Male 67.0 0 1 Yes Private \n",
|
|||
|
"51676 Female 61.0 0 0 Yes Self-employed \n",
|
|||
|
"31112 Male 80.0 0 1 Yes Private \n",
|
|||
|
"\n",
|
|||
|
" Residence_type avg_glucose_level bmi smoking_status stroke \n",
|
|||
|
"id \n",
|
|||
|
"9046 Urban 228.69 36.6 formerly smoked 1 \n",
|
|||
|
"51676 Rural 202.21 NaN never smoked 1 \n",
|
|||
|
"31112 Rural 105.92 32.5 never smoked 1 \n",
|
|||
|
"gender Male\n",
|
|||
|
"age 67.0\n",
|
|||
|
"hypertension 0\n",
|
|||
|
"heart_disease 1\n",
|
|||
|
"ever_married Yes\n",
|
|||
|
"work_type Private\n",
|
|||
|
"Residence_type Urban\n",
|
|||
|
"avg_glucose_level 228.69\n",
|
|||
|
"bmi 36.6\n",
|
|||
|
"smoking_status formerly smoked\n",
|
|||
|
"stroke 1\n",
|
|||
|
"Name: 9046, dtype: object\n",
|
|||
|
" gender age\n",
|
|||
|
"id \n",
|
|||
|
"60182 Female 49.0\n",
|
|||
|
"1665 Female 79.0\n",
|
|||
|
" gender age\n",
|
|||
|
"id \n",
|
|||
|
"60182 Female 49.0\n",
|
|||
|
"1665 Female 79.0\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(df[\"bmi\"])\n",
|
|||
|
"\n",
|
|||
|
"print(df.loc[9046])\n",
|
|||
|
"\n",
|
|||
|
"print(df.loc[9046, \"gender\"])\n",
|
|||
|
"\n",
|
|||
|
"print(df.loc[9046:53882, [\"gender\", \"bmi\"]])\n",
|
|||
|
"\n",
|
|||
|
"print(df[0:3])\n",
|
|||
|
"\n",
|
|||
|
"print(df.iloc[0])\n",
|
|||
|
"\n",
|
|||
|
"print(df.iloc[3:5, 0:2])\n",
|
|||
|
"\n",
|
|||
|
"print(df.iloc[[3, 4], [0, 1]])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Работа с данными - отбор и группировка"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 26,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"['Male' 'Female' 'Other']\n",
|
|||
|
"Male count = 2115\n",
|
|||
|
"Female count = 2994\n",
|
|||
|
"Other count = 1\n",
|
|||
|
"Total count = 5110\n",
|
|||
|
" ever_married avg_glucose_level Count\n",
|
|||
|
"0 No 55.12 1\n",
|
|||
|
"1 No 55.25 1\n",
|
|||
|
"2 No 55.34 1\n",
|
|||
|
"3 No 55.35 1\n",
|
|||
|
"4 No 55.39 1\n",
|
|||
|
"... ... ... ...\n",
|
|||
|
"4445 Yes 263.56 1\n",
|
|||
|
"4446 Yes 267.60 1\n",
|
|||
|
"4447 Yes 267.61 1\n",
|
|||
|
"4448 Yes 267.76 1\n",
|
|||
|
"4449 Yes 271.74 1\n",
|
|||
|
"\n",
|
|||
|
"[4450 rows x 3 columns]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"s_values = df[\"gender\"].unique()\n",
|
|||
|
"print(s_values)\n",
|
|||
|
"\n",
|
|||
|
"s_total = 0\n",
|
|||
|
"for s_value in s_values:\n",
|
|||
|
" count = df[df[\"gender\"] == s_value].shape[0]\n",
|
|||
|
" s_total += count\n",
|
|||
|
" print(s_value, \"count =\", count)\n",
|
|||
|
"print(\"Total count = \", s_total)\n",
|
|||
|
"\n",
|
|||
|
"print(df.groupby([\"ever_married\", \"avg_glucose_level\"]).size().reset_index(name=\"Count\")) # type: ignore"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Визуализация - Исходные данные"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 27,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" gender age bmi\n",
|
|||
|
"id \n",
|
|||
|
"9046 Male 67.0 36.6\n",
|
|||
|
"31112 Male 80.0 32.5\n",
|
|||
|
"60182 Female 49.0 34.4\n",
|
|||
|
"1665 Female 79.0 24.0\n",
|
|||
|
"56669 Male 81.0 29.0\n",
|
|||
|
"... ... ... ...\n",
|
|||
|
"14180 Female 13.0 18.6\n",
|
|||
|
"44873 Female 81.0 40.0\n",
|
|||
|
"19723 Female 35.0 30.6\n",
|
|||
|
"37544 Male 51.0 25.6\n",
|
|||
|
"44679 Female 44.0 26.2\n",
|
|||
|
"\n",
|
|||
|
"[4909 rows x 3 columns]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"data = df[[\"gender\", \"age\", \"bmi\"]].copy()\n",
|
|||
|
"data.dropna(subset=[\"bmi\"], inplace=True)\n",
|
|||
|
"print(data)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Визуализация - Сводка пяти чисел\n",
|
|||
|
"\n",
|
|||
|
"<img src=\"assets/quantile.png\" width=\"400\" style=\"background-color: white\">"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 28,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" age \n",
|
|||
|
" min q1 q2 median q3 max\n",
|
|||
|
"gender \n",
|
|||
|
"Female 0.08 26.0 44.0 44.0 60.0 82.0\n",
|
|||
|
"Male 0.08 21.0 45.0 45.0 60.5 82.0\n",
|
|||
|
"Other 26.00 26.0 26.0 26.0 26.0 26.0\n",
|
|||
|
" age \n",
|
|||
|
" low_iqr iqr high_iqr\n",
|
|||
|
"gender \n",
|
|||
|
"Female 0.0 34.0 111.00\n",
|
|||
|
"Male 0.0 39.5 119.75\n",
|
|||
|
"Other 26.0 0.0 26.00\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Axes: title={'center': 'age'}, xlabel='gender'>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 28,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi8AAAHNCAYAAADWsJtQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA920lEQVR4nO3dd3gVVeL/8c9NbySEACkQepWAVDGCFClRAcG1LIoS7PsIStFlQQWlCIIIuIIgFlgRy9rdVYGAIqKggIIgRTr8gAQQQwiBkHJ+f7iZL9cE5EJuwkner+fJA3PmzJwz957kfjJzJuMyxhgBAABYwqe0OwAAAOAJwgsAALAK4QUAAFiF8AIAAKxCeAEAAFYhvAAAAKsQXgAAgFUILwAAwCqEFwAAYBXCC1BKXC6XnnrqqdLuRpm3bNkyuVwuLVu27Jz1nnrqKblcLh05cqRkOnYJGTBggGrVqlXa3QDOG+EFZc68efPkcrncvqpWrarOnTvr888/L+3uXbRNmzbpqaee0u7du0u7KwBQKvxKuwOAt4wdO1a1a9eWMUZpaWmaN2+err/+ev3nP/9Rz549S7t7F2zTpk0aM2aMOnXqxG/LAMolwgvKrOuuu06tW7d2lu+55x5FR0frrbfesjq8lKTc3Fzl5+crICCgtLsCSxhjdOrUKQUHB5d2V1CGcdkI5UbFihUVHBwsPz/3zH7ixAk98sgjio+PV2BgoBo2bKgpU6ao4IHrJ0+eVKNGjdSoUSOdPHnS2e7o0aOKjY3VVVddpby8PEm/zx0ICwvTzp07lZSUpNDQUMXFxWns2LE6nwe4//jjj7ruuusUHh6usLAwdenSRatWrXLWz5s3T7fccoskqXPnzs5lsT+bz/Huu+/qsssuU1BQkBISEvThhx8Wmuewe/duuVwuTZkyRdOnT1fdunUVGBioTZs2SZK++OILXX311QoNDVXFihXVu3dvbd682a2ds82dKJhPciaXy6VBgwZpwYIFatiwoYKCgtSqVSstX7680Pb79+/X3XffrejoaAUGBqpJkyZ67bXXCtX7f//v/6lPnz4KDQ1V1apVNXToUGVnZ5/ztfmjI0eO6NZbb1V4eLiioqI0ePBgnTp1ylnfsWNHXX755UVu27BhQyUlJZ1z//n5+XrqqacUFxenkJAQde7cWZs2bVKtWrU0YMAAt7rp6ekaMmSIMzbr1aunSZMmKT8/36lz5vs2Z84c531r06aNVq9eXaj9jz76SAkJCW5j4Wz9nD59upo0aaKgoCBFR0frgQce0G+//eZWr1atWurZs6cWLVqk1q1bKzg4WC+99NI5XwPgohmgjJk7d66RZJYsWWIOHz5sDh06ZDZu3GgeeOAB4+PjYxYvXuzUzc/PN9dcc41xuVzm3nvvNTNmzDC9evUyksyQIUOceqtWrTK+vr5m6NChTlnfvn1NcHCw2bp1q1OWnJxsgoKCTP369c2dd95pZsyYYXr27GkkmVGjRrn1U5J58sknneWNGzea0NBQExsba8aNG2eeeeYZU7t2bRMYGGhWrVpljDFmx44d5uGHHzaSzGOPPWbmz59v5s+fb1JTU8/6evz3v/81LpfLNGvWzEydOtWMGjXKREZGmoSEBFOzZk2n3q5du4wkc9lll5k6deqYZ555xkybNs3s2bPHpKSkGD8/P9OgQQMzefJkM2bMGFO5cmUTGRlpdu3a5Xb8Z+6zwJNPPmn++ONGkklISDCVK1c2Y8eONZMmTTI1a9Y0wcHBZsOGDU691NRUU716dRMfH2/Gjh1rZs2aZW644QYjyUybNs2pl5WVZRo0aGCCgoLM8OHDzfTp002rVq1Ms2bNjCTz5ZdfnvU1OrOPTZs2Nb169TIzZswwd9xxh5Fk7rzzTqfeyy+/bCS59dEYY77//nsjybz++uvnbGf48OFGktPGfffdZ6pXr24qV65skpOTnXonTpwwzZo1M1FRUeaxxx4zs2fPNv379zcul8sMHjzYqVfwvrVo0cLUq1fPTJo0yUyePNlUrlzZVK9e3Zw+fdqpu2jRIuPj42MSEhLM1KlTzeOPP24iIiJMkyZNCr1v9957r/Hz8zP33XefmT17tvnHP/5hQkNDTZs2bdz2WbNmTVOvXj0TGRlpRowYYWbPnv2nrzVwsQgvKHMKwssfvwIDA828efPc6n700UdGkhk/frxb+c0332xcLpfZvn27UzZy5Ejj4+Njli9fbt59910jyUyfPt1tu+TkZCPJPPTQQ05Zfn6+6dGjhwkICDCHDx92yv8YXvr06WMCAgLMjh07nLIDBw6YChUqmA4dOjhlBW2f7wdE06ZNTfXq1c3x48edsmXLlhlJRYaX8PBwc+jQIbd9NG/e3FStWtX8+uuvTtn69euNj4+P6d+/v9vxexJeJJk1a9Y4ZXv27DFBQUHmxhtvdMruueceExsba44cOeK2fd++fU1ERITJysoyxhgzffp0I8n8+9//duqcOHHC1KtXz6PwcsMNN7iVP/jgg0aSWb9+vTHGmPT0dBMUFGT+8Y9/uNV7+OGHTWhoqMnMzDxrG6mpqcbPz8/06dPHrfypp54yktzCy7hx40xoaKj55Zdf3OqOGDHC+Pr6mr179xpj/u99i4qKMkePHnXqffzxx0aS+c9//uOUNW/e3MTGxpr09HSnbPHixYXGwtdff20kmQULFri1vXDhwkLlNWvWNJLMwoULz3rcQHHjshHKrJkzZyolJUUpKSl644031LlzZ91777364IMPnDqfffaZfH199fDDD7tt+8gjj8gY43Z30lNPPaUmTZooOTlZDz74oDp27FhouwKDBg1y/l9weeT06dNasmRJkfXz8vK0ePFi9enTR3Xq1HHKY2Njdfvtt2vFihXKyMjw+DU4cOCANmzYoP79+yssLMwp79ixo5o2bVrkNjfddJOqVKniLB88eFDr1q3TgAEDVKlSJae8WbNm6tatmz777DOP+1UgMTFRrVq1cpZr1Kih3r17a9GiRcrLy5MxRu+//7569eolY4yOHDnifCUlJenYsWP64YcfJP3+XsbGxurmm2929hcSEqL777/foz4NHDjQbfmhhx5y9i9JERER6t27t9566y3nUmBeXp7eeecd55LV2SxdulS5ubl68MEHi2zjTO+++66uvvpqRUZGuh13165dlZeXV+jy2l//+ldFRkY6y1dffbUkaefOnZL+731MTk5WRESEU69bt2667LLLCrUdERGhbt26ubXdqlUrhYWF6csvv3SrX7t27T+9XAYUJybsosy64oor3Cbs3nbbbWrRooUGDRqknj17KiAgQHv27FFcXJwqVKjgtm3jxo0lSXv27HHKAgIC9Nprr6lNmzYKCgrS3LlzC83jkCQfHx+3ACJJDRo0kKSz3t58+PBhZWVlqWHDhoXWNW7cWPn5+dq3b5+aNGlyfgf/PwX9r1evXqF19erVcz74z1S7du0i93G2vi1atEgnTpw454f22dSvX79QWYMGDZSVlaXDhw/Lx8dH6enpmjNnjubMmVPkPg4dOuT0s169eoXek6L67Umf6tatKx8fH7f3rn///nrnnXf09ddfq0OHDlqyZInS0tJ05513nnPfZ3s/KlWq5BY8JGnbtm366aef3ILkmQqOu0CNGjXclgv2VzBHpaDtol7zhg0buo2Fbdu26dixY6patep5tf3HMQN4G+EF5YaPj486d+6s559/Xtu2bfM4CEjSokWLJEmnTp3Stm3byuQP7Yu5S6SoMCfJmdDsqYKJqXfccYeSk5OLrNOsWbML2vf5KuqYkpKSFB0drTfeeEMdOnTQG2+8oZiYGHXt2rXY2s3Pz1e3bt00fPjwItcXBOICvr6+RdYz5zFRvKi2q1atqgULFhS5/o+BijuLUNIILyhXcnNzJUmZmZmSpJo1a2rJkiU6fvy429mXLVu2OOsL/PTTTxo7dqzuuusurVu3Tvfee682bNjgdgpe+v0H/86dO90+XH755RdJOuvfZalSpYpCQkK0devWQuu2bNkiHx8fxcfHSzp7QChKQf+3b99eaF1RZefax9n6VrlyZeesS2RkpNL
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"def q1(x):\n",
|
|||
|
" return x.quantile(0.25)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# median = quantile(0.5)\n",
|
|||
|
"def q2(x):\n",
|
|||
|
" return x.quantile(0.5)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"def q3(x):\n",
|
|||
|
" return x.quantile(0.75)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"def iqr(x):\n",
|
|||
|
" return q3(x) - q1(x)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"def low_iqr(x):\n",
|
|||
|
" return max(0, q1(x) - 1.5 * iqr(x))\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"def high_iqr(x):\n",
|
|||
|
" return q3(x) + 1.5 * iqr(x)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"quantiles = data[[\"gender\", \"age\"]].groupby([\"gender\"]).aggregate([\"min\", q1, q2, \"median\", q3, \"max\"])\n",
|
|||
|
"print(quantiles)\n",
|
|||
|
"\n",
|
|||
|
"iqrs = data[[\"gender\", \"age\"]].groupby([\"gender\"]).aggregate([low_iqr, iqr, high_iqr])\n",
|
|||
|
"print(iqrs)\n",
|
|||
|
"\n",
|
|||
|
"data.boxplot(column=\"age\", by=\"gender\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Визуализация - Гистограмма"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 29,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Axes: ylabel='Frequency'>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 29,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGdCAYAAAD0e7I1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAsh0lEQVR4nO3df1SUZf7/8dcgv/wFCAUDGwgVpablD8pI+3wq+WRqrr8+tRaVmUe3whKpVCptswy1zcwyLT+p60lz66Ru2YnWRTfXIn9g2lqGVuSPdMCWYARXJOb+/tE23yaldBiYmcvn45w5p7nuay7e42Xx6rqv+75tlmVZAgAAMFSIvwsAAABoToQdAABgNMIOAAAwGmEHAAAYjbADAACMRtgBAABGI+wAAACjEXYAAIDRQv1dQCBwuVw6dOiQ2rdvL5vN5u9yAADAabAsS0ePHlVSUpJCQhpfvyHsSDp06JCSk5P9XQYAAPDCgQMHdN555zV6nLAjqX379pJ++MOKioryczUAAOB0OJ1OJScnu3+PN4awI7lPXUVFRRF2AAAIMr+2BYUNygAAwGiEHQAAYDTCDgAAMBp7dgAA8IOGhgbV19f7u4yA1qpVK4WGhjb5tjCEHQAAWlhNTY0OHjwoy7L8XUrAa9OmjRITExUeHu71GIQdAABaUENDgw4ePKg2bdro3HPP5Wa2jbAsSydOnNCRI0dUVlam9PT0X7xx4C8h7AAA0ILq6+tlWZbOPfdctW7d2t/lBLTWrVsrLCxM+/bt04kTJxQZGenVOGxQBgDAD1jROT3eruZ4jOGDOgAAAAIWYQcAABjNr3t2Nm7cqKefflolJSU6fPiwVq9eraFDh56y7913362XXnpJzz77rHJzc93tlZWVuu+++/T2228rJCREI0aM0HPPPad27dq1zJcAAMAHUqe806I/7+uZg1r05/mTX1d2amtrddlll2n+/Pm/2G/16tX66KOPlJSUdNKx7Oxsffrpp1q3bp3Wrl2rjRs3aty4cc1VMgAACDJ+DTsDBgzQk08+qWHDhjXa55tvvtF9992n5cuXKywszOPY7t27VVhYqP/7v/9T79691bdvXz3//PNauXKlDh061NzlAwBwViksLFTfvn0VExOjuLg43Xjjjfryyy/dxz/88EN1795dkZGRysjI0Jo1a2Sz2bRjxw53n127dmnAgAFq166dEhISdPvtt+vbb79t1roDes+Oy+XS7bffroceekiXXHLJSceLi4sVExOjjIwMd1tWVpZCQkK0efPmRsetq6uT0+n0eAEAgF9WW1urvLw8bdu2TUVFRQoJCdGwYcPkcrnkdDo1ePBgdevWTdu3b9cTTzyhyZMne3y+qqpK1113nXr06KFt27apsLBQ5eXluvnmm5u17oC+z86sWbMUGhqq+++//5THHQ6H4uPjPdpCQ0MVGxsrh8PR6LgFBQV6/PHHfVorgJbT2N6Gs2kPAuAPI0aM8Hi/ePFinXvuufrss8+0adMm2Ww2LVq0SJGRkerSpYu++eYbjR071t3/hRdeUI8ePfTUU095jJGcnKw9e/booosuapa6A3Zlp6SkRM8995yWLl3q83sR5Ofnq7q62v06cOCAT8cHAMBEe/fu1S233KLzzz9fUVFRSk1NlSTt379fpaWluvTSSz1u/HfFFVd4fH7nzp3asGGD2rVr53516tRJkjxOh/lawK7s/OMf/1BFRYVSUlLcbQ0NDXrggQc0d+5cff3117Lb7aqoqPD43Pfff6/KykrZ7fZGx46IiFBERESz1Q4AgIkGDx6sjh07atGiRUpKSpLL5VLXrl114sSJ0/p8TU2NBg8erFmzZp10LDEx0dflugVs2Ln99tuVlZXl0da/f3/dfvvtGj16tCQpMzNTVVVVKikpUa9evSRJ69evl8vlUu/evVu8ZgAATPWvf/1LpaWlWrRoka6++mpJ0qZNm9zHL774Yr366quqq6tzLyhs3brVY4yePXvqzTffVGpqqkJDWy6C+PU0Vk1NjXbs2OHepV1WVqYdO3Zo//79iouLU9euXT1eYWFhstvtuvjiiyVJnTt31g033KCxY8dqy5Yt+uCDDzR+/HiNHDnylJepAwAA73To0EFxcXF6+eWX9cUXX2j9+vXKy8tzH7/11lvlcrk0btw47d69W++9957++Mc/Svr/j8bIyclRZWWlbrnlFm3dulVffvml3nvvPY0ePVoNDQ3NVrtfw862bdvUo0cP9ejRQ5KUl5enHj16aNq0aac9xvLly9WpUyf169dPAwcOVN++ffXyyy83V8kAAJyVQkJCtHLlSpWUlKhr166aOHGinn76affxqKgovf3229qxY4e6d++uRx55xP37/Md9PElJSfrggw/U0NCg66+/Xt26dVNubq5iYmJ88gysxtgsy7KabfQg4XQ6FR0drerqakVFRfm7HAC/gquxEMyOHz+usrIypaWlef0U72CxfPlyjR49WtXV1V4/4f2X/rxO9/d3wO7ZAQAAwWXZsmU6//zz9Zvf/EY7d+7U5MmTdfPNN3sddHyFsAMAAHzC4XBo2rRpcjgcSkxM1E033aQZM2b4uyzCDgAA8I1JkyZp0qRJ/i7jJAF7U0EAAABfIOwAAACjEXYAAPADLoY+Pb74cyLsAADQglq1aiVJp/2IhbPdsWPHJElhYWFej8EGZQAAWlBoaKjatGmjI0eOKCwsrFlvphfMLMvSsWPHVFFRoZiYGHdI9AZhBwCAFmSz2ZSYmKiysjLt27fP3+UEvJiYmF98uPfpIOwAANDCwsPDlZ6ezqmsXxEWFtakFZ0fEXYAAPCDkJAQ4x8XESg4UQgAAIxG2AEAAEYj7AAAAKMRdgAAgNEIOwAAwGiEHQAAYDTCDgAAMBphBwAAGI2wAwAAjEbYAQAARiPsAAAAoxF2AACA0Qg7AADAaIQdAABgNMIOAAAwGmEHAAAYjbADAACMRtgBAABGI+wAAACjEXYAAIDRCDsAAMBohB0AAGA0wg4AADAaYQcAABiNsAMAAIxG2AEAAEYj7AAAAKMRdgAAgNH8GnY2btyowYMHKykpSTabTWvWrHEfq6+v1+TJk9WtWze1bdtWSUlJuuOOO3To0CGPMSorK5Wdna2oqCjFxMRozJgxqqmpaeFvAgAAApVfw05tba0uu+wyzZ8//6Rjx44d0/bt2zV16lRt375dq1atUmlpqX7729969MvOztann36qdevWae3atdq4caPGjRvXUl8BAAAEOJtlWZa/i5Akm82m1atXa+jQoY322bp1q6644grt27dPKSkp2r17t7p06aKtW7cqIyNDklRYWKiBAwfq4MGDSkpKOq2f7XQ6FR0drerqakVFRfni6wBoRqlT3jll+9czB7VwJQD86XR/fwfVnp3q6mrZbDbFxMRIkoqLixUTE+MOOpKUlZWlkJAQbd682U9VAgCAQBLq7wJO1/HjxzV58mTdcsst7vTmcDgUHx/v0S80NFSxsbFyOByNjlVXV6e6ujr3e6fT2TxFAwAAvwuKlZ36+nrdfPPNsixLCxYsaPJ4BQUFio6Odr+Sk5N9UCUAAAhEAR92fgw6+/bt07p16zzOydntdlVUVHj0//7771VZWSm73d7omPn5+aqurna/Dhw40Gz1AwAA/wro01g/Bp29e/dqw4YNiouL8ziemZmpqqoqlZSUqFevXpKk9evXy+VyqXfv3o2OGxERoYiIiGatHQAABAa/hp2amhp98cUX7vdlZWXasWOHYmNjlZiYqP/93//V9u3btXbtWjU0NLj34cTGxio8PFydO3fWDTfcoLFjx2rhwoWqr6/X+PHjNXLkyNO+EgsAAJjNr2Fn27Ztuvbaa93v8/LyJEmjRo3SH/7wB7311luSpO7du3t8bsOGDbrmmmskScuXL9f48ePVr18/hYSEaMSIEZo3b16L1A8AAAKfX8PONddco1+6zc/p3AIoNjZWK1as8GVZAADAIAG/QRkAAKApCDsAAMBohB0AAGA0wg4AADAaYQcAABiNsAMAAIwW0HdQNlnqlHdOavt65iA/VAIAgG+c6neb5P/fb6z
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"data.plot.hist(column=[\"age\"], bins=80)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Визуализация - Точечная диаграмма"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 41,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Axes: xlabel='smoking_status', ylabel='age'>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 41,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAAGwCAYAAACNeeBZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAApQUlEQVR4nO3deXRU9f3/8ddk30jCZkgwkITFQIESSvEHKMgiS5EDWKVSVBaVtgZl+SEEZSllc8P2KFUQK1SlYvsr1CpfQIyIhgIStq8IDWGTHUSTDCGQQOb+/qBMGcjgZD6TTAafj3PmnOTez3zyvvOZufPKvXc+Y7MsyxIAAAC8EuTvAgAAAAIZYQoAAMAAYQoAAMAAYQoAAMAAYQoAAMAAYQoAAMAAYQoAAMBAiL8L+CFwOBw6fvy4atWqJZvN5u9yAACAByzL0tmzZ5WUlKSgIPfHnwhT1eD48eNKTk72dxkAAMALR44c0a233up2PWGqGtSqVUvS5cGIjY31czUAAMATdrtdycnJzvdxdwhT1eDKqb3Y2FjCFAAAAeb7LtHhAnQAAAADhCkAAAADhCkAAAADhCkAAAADhCkAAAADhCkAAAADhCkAAAADhCkAAAADhCkAAAADP4gwtWTJEsXHx/u7DAAAcBMKqDB15MgRjRw5UklJSQoLC1Pjxo01ZswYffvtt842KSkp+sMf/uC/IoEAcOCbYq3LO62DZ875uxQACHgB8918Bw4cUMeOHdW8eXO9++67Sk1N1VdffaWnnnpKq1at0qZNm1SnTp1qrenixYsKDQ2t1r8JmCgsKdOT7+7QZ/nfOJd1aVZfrwzJUFwUz2UA8EbAHJnKzMxUWFiYPvroI3Xt2lWNGjVS37599fHHH+vYsWN65plndNddd+nrr7/WuHHjZLPZrvtiwjVr1qhFixaKiYlRnz59dOLECZf1b7zxhlq0aKGIiAilp6fr1Vdfda47dOiQbDab3nvvPXXt2lURERFaunRptWw74CtPvrtDG/adcVm2Yd8ZPfHudj9VBACBLyCOTH333Xdas2aNZs+ercjISJd1DRo00NChQ/Xee+8pPz9fbdu21ahRo/TYY4+5tCspKdGLL76ot99+W0FBQXrwwQc1YcIEZyBaunSppk2bpvnz5ysjI0Pbt2/XY489pujoaA0bNszZT1ZWlubNm6eMjAxFRERUWG9paalKS0udv9vtdl89FIDXDnxT7HJE6opyy9Jn+d/o4JlzSq0X7YfKACCwBUSYys/Pl2VZatGiRYXrW7RooYKCApWXlys4OFi1atVSgwYNXNpcvHhRCxYsUJMmTSRJo0eP1u9+9zvn+unTp2vevHm69957JUmpqanavXu3Fi5c6BKmxo4d62zjzty5czVjxgyvthWoKl9/V3LD9Ye+JUwBgDcC5jSfJFmW5fV9o6KinEFKkhITE3X69GlJ0rlz57R//3498sgjiomJcd5mzZql/fv3u/TTvn377/1bkydPVlFRkfN25MgRr+sGfKVxnagbrk+pS5ACAG8ExJGppk2bymazac+ePRo0aNB16/fs2aPatWurfv36bvu49kJxm83mDGfFxcWSpEWLFun22293aRccHOzye3T097/hhIeHKzw8/HvbAdUprX6MujSrrw37zqj8qn9Mgm02dW5aj6NSAOClgDgyVbduXd1999169dVXdf78eZd1J0+e1NKlS/WLX/xCNptNYWFhKi8vr1T/CQkJSkpK0oEDB9S0aVOXW2pqqi83BfCrV4ZkqHPTei7LOjetp1eGZPipIgAIfAFxZEqS5s+fr06dOql3796aNWuWy9QIDRs21OzZsyVdnmfqs88+0wMPPKDw8HDVq1fve3q+bMaMGXryyScVFxenPn36qLS0VLm5uSooKND48eOrctOAahMXFaq3Humgg2fO6dC355RSN5ojUgBgKCCOTElSs2bNlJubq7S0NA0ePFhNmjTRqFGj1K1bN23cuNE5x9Tvfvc7HTp0SE2aNLnhab9rPfroo3rjjTe0ePFitW7dWl27dtWSJUs4MoWbUmq9aHW77RaCFAD4gM0yuaobHrHb7YqLi1NRUZFiY2P9XQ4AAPCAp+/fAXNkCgAAoCYiTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABggTAEAABiodJgqLy/XZ599psLCwiooBwAAILBUOkwFBwerV69eKigoqIp6AAAAAopXp/latWqlAwcO+LoWAACAgONVmJo1a5YmTJigDz/8UCdOnJDdbne5AQAA/FDYLMuyKnunoKD/ZjCbzeb82bIs2Ww2lZeX+6a6m4TdbldcXJyKiooUGxvr73IAAIAHPH3/DvGm83Xr1nldGAAAwM3EqzDVtWtXX9cBAAAQkLyeZ+rzzz/Xgw8+qE6dOunYsWOSpLfffls5OTk+Kw4AAKCm8ypM/f3vf1fv3r0VGRmpbdu2qbS0VJJUVFSkOXPm+LRAAACAmszrT/MtWLBAixYtUmhoqHN5586dtW3bNp8VBwAAUNN5Faby8vLUpUuX65bHxcUxMzoAAPhB8SpMNWjQQPv27btueU5OjtLS0oyLAgAACBRehanHHntMY8aM0ebNm2Wz2XT8+HEtXbpUEyZM0G9+8xtf1wgAAFBjeTU1QlZWlhwOh3r06KGSkhJ16dJF4eHhmjBhgp544glf1wgAAFBjeTUD+hVlZWXat2+fiouL1bJlS8XExPiytpsGM6ADABB4qnQG9CvCwsLUsmVLky4AAAACmsdh6t577/W40+XLl3tVDAAAQKDx+AL0uLg45y02NlbZ2dnKzc11rt+6dauys7MVFxdXJYUCAADURB4fmVq8eLHz50mTJmnw4MFasGCBgoODJUnl5eV6/PHHA/6aoJSUFI0dO1Zjx471dykAACAAeHXN1JtvvqmcnBxnkJKk4OBgjR8/Xp06ddILL7zgUT/Dhw/Xn//85+uW5+fnq2nTpt6U9oNy4Jtivb3xkDYe+FbnSi/pfFm5ZJMKz13Upas+VmCTdPWnDA4
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjwAAAGxCAYAAABmyWwBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABSbklEQVR4nO3deVhV5do/8C8g8wwiYiIiCCIq5jzjgHFsTjPzNbNOZac0c8jK1JxyLPN48nQ6eko9pWX1am/H2XBKnHAiRxDnSlFRmVREeH5/9HPXPm6exdqy1t5r8f1cF9eV+wtr39Ji7du9bp7HRQghQERERGRiro4ugIiIiEhrbHiIiIjI9NjwEBERkemx4SEiIiLTY8NDREREpseGh4iIiEyPDQ8RERGZHhseIiIiMr0aji5Aa+Xl5fj111/h7+8PFxcXR5dDRERElSCEQGFhIerUqQNX13t/f8b0Dc+vv/6KyMhIR5dBREREdjh37hzq1q17z8cxfcPj7+8P4LdvWEBAgIOrISIiosooKChAZGSk5XX8Xpm+4blzGysgIIANDxERkcFU1TgKh5aJiIjI9NjwEBERkemx4SEiIiLTY8NDREREpseGh4iIiEyPDQ8RERGZHhseIiIiMj02PERERGR6bHiIiIjI9NjwEBERkemZfmsJrcSPXYWSMsDLDTg29SFHl0MmEvP2KpThtx/OnBk8t6hqjFp2ALtO56F9g5p4v2+So8shk+g9bxuO5hYiMSIA377a0dHlSLkIIYSji9BSQUEBAgMDkZ+fXyV7afWZtxV7fy686/F29QPx1V863fPxqfpK/XATsi5ev+vxJhF+WPl6sgMqIjNYsfccRnzz012P/61fczx6/30OqIjMYO6GLMxJy7nr8dEPxGFI94ZV8hxV/frNW1oq2Wp2AGDn6XydKyGzsdXsAMCh80U6V0JmYqvZAYBhyw7oWwiZiq1mBwDeX5+tcyWVx4ZHhfixq6R5I4WcqCIxb8vPnViFnMiWUQpNzehvMvUphEyl97xt0vzJj9N1qkQdNjwqlJTJ85sKOVFFlE6d27pUQWaz63SeNN9x8rJOlZCZHM21fafjjsPnC3SqRB02PCp4uslzL4WcqCJKpw5/u4Ds0bZ+qDRv36CmTpWQmSSE+0vzxIh7n7fRAhseFbIUfhuLv61F9jqh8NtY/G0tssfsfs2lOX9bi+yxfKj8F3Sc9be12PCo0HlGmjRPnrlRp0rIbOorzOgo5US2TP7+sDR/b+URnSohM2k3dYM07zDtB50qUcehDU9ZWRnGjx+P6OhoeHt7IyYmBlOmTMEff1NeCIF3330XERER8Pb2RkpKCo4fP+6Qen/JvynNz127oVMlRETKtivM6GzLuaRTJWQmuYW3pPn5ghKdKlHHoQ3PzJkz8Y9//APz5s3D0aNHMXPmTMyaNQsfffSR5XNmzZqFv/3tb/jkk0+wa9cu+Pr6IjU1FTdvypsPLdwX6CXNI4O8daqEiEhZB4UZnU6xYTpVQmYS7u8hzSMCPHWqRB2HLjz48MMPIzw8HJ9++qnlsT59+sDb2xtffPEFhBCoU6cORo0ahTfeeAMAkJ+fj/DwcCxatAhPP/204nNU9cJFslsLpzlnQfeA5xZpgecVaUGP88pUCw926NABaWlpyM7+baGizMxMbNu2Db169QIAnDp1ChcuXEBKSorlawIDA9G2bVvs2LHD5jFLSkpQUFBg9VFVEsevkeZNFHKiinCGh7QweHGGNP/L53t0qoTMxKgzPA79bde3334bBQUFaNSoEdzc3FBWVoapU6diwIABAIALFy4AAMLDw62+Ljw83JL9t+nTp2PSpEma1FtcWi7NixRyIiI97T13VZpnnLmiUyVkJpzhscPXX3+NJUuWYOnSpdi3bx8WL16MDz74AIsXL7b7mGPGjEF+fr7l49y5c1VWr6+7/Nvlp5ATEempZWSwNG8dFaJTJWQmRp3hcegr9OjRo/H222/j6aefRtOmTTFw4ECMGDEC06dPBwDUrl0bAJCbm2v1dbm5uZbsv3l6eiIgIMDqo6ocntJLmh9SyIkqonTPm7MWZI/5g1pL808GttKpEjKTnWN7SvPt76RIc0dxaMNz/fp1uLpal+Dm5oby8t9uDUVHR6N27dpIS/t9/ZuCggLs2rUL7du317VWAGj4jnyOIk4hJ6oIZ3hIC0bd84icm1HnWR06w/PII49g6tSpqFevHhITE7F//358+OGH+POf/wwAcHFxwfDhw/Hee++hYcOGiI6Oxvjx41GnTh08/vjjuterNKJziyM8ROREjLrnETk3o86zOrTh+eijjzB+/Hi8+uqruHjxIurUqYOXX34Z7777ruVz3nzzTRQXF2Pw4MG4du0aOnXqhLVr18LLS74mjhbcXeVNjwdHeIjIiSSE+2Pfz/kV5s665xE5N193V2nT46zzrA5dh0cPXIeHjILnFmmB5xVpgevwmBznLEgrPLdIC1yHh7SgdD2KdtLrFRseIiKT4jo85AjOetuIDQ8RkUlxHR5yBBdHF1ABNjwqcK0U0grPLdIC1+EhLShdj0456fWKDY8KA+bb3r/rjoH/2qlTJWQ2Rl3Xgpzb5O8PS/P3Vh7RqRIyk84z0qR58syNOlWiDhseFX76teJf7wSAAz9f06cQMh2jrmtBzm37ycvSfFvOJZ0qITP5Jf+mND937YZOlajDhkeFZnUCpXnzukH6FEKmw33aSAsdGtSU5p1iw3SqhMzkvkD5OniRQd46VaIOr6IqLBks387i8xfb6VQJmQ33aSMtvPtoojQf93BjnSohM/nx7R7SfMtb3XWqRB02PCpwzoK0Ej9Wvm5FI4WcyJZRyw5I89HfZOpTCJmKUedZ2fCowDkL0kpJmTy/qZAT2bLrdJ4036Ew40Nki1HnWdnwqMA5C9KKp5s891LIiWxpWz9UmrdXmPEhssWo86x8hVaBcxaklayp8nUrjinkRLbM7tdcmr/fN0mfQshUjDrPyoZHhVZT1kvzNgo5UUU4w0Na4F5apIV2UzdI8w7TftCpEnXY8KhwubhUml9UyIkqwhke0gL30iIt5BbekubnC0p0qkQdNjwq1PR1l+a1FHKiinCGh7TAvbRIC+H+HtI8IsBTp0rUYcOjwp7xD0jz3Qo5UUU4w0Na4F5apIWdY3tK8+3vpOhUiTpseIiITMy7gncHK3qcyKzY8KjQe942af7kx+k6VUJmw6Fl0sKWrIu4UcH8140y4Mfj3EuL1HtwzhZp/vDcrTpVog4bHhWO5hZK88PnC3SqhMyGQ8ukBaUF4PadlQ81E9mSc7lYmmdfLNKpEnXY8KiQEO4vzRMjAnSqhMyGQ8ukBaUF4FrUkw81E9kSW9NXmsfV8tOpEnXY8KiwfGgnaf7tqx11qoTMhkPLpIXk+FrSvHND7pZO6q0ekSzNV77eRadK1GHDo0LnGWnSPHnmRp0qIbNp8LZ8RidGISeyZfL3h6X5eyuP6FQJmYlRN6Vlw6PCL/k3pfm5azd0qoTMRmnbWY7wkD22K2wOui2HQ8uknlE3pWXDo8J9gV7SPDLIW6dKyGyUfhA5wkP26KCwOWinWN7SIvWMuiktGx4Vfny7hzTf8lZ3nSohszk5Qz6jc0IhJ7Ll3UcTpfm4hxvrVAmZiVE3pWXDo0LDd+RzFHEKOVFFuDEtaYGbh5IW5qUdl+Yfb8rRqRJ12PCoUKowaHFLaRCDqALcmJa0wM1DSQvpJ+QzOs66oCUbHhXcFb5bHvxukp24MS1pgZuHkhY6xshndJx1uQO+RKtwfJp8jiJbISeqCDemJS1w81DSwtAeDaX5q91idapEHTY8KnCGh7SitM5OLNfhITsYdc8jcm5G3VeSDY8KnOEhrSits3NblyrIbIy65xE5N6PuK8mGRwXO8JBWlNbZqaFLFWQ2Rt3ziJybUfeV5Eu0CpzhIa0orbOTw3V4yA5G3fOInJtR95Vkw6NC7Bj5HEVDhZyoIu2mbpDmHab9oFMlZCZGXS+FnJtR13diw6PCbSHPSxVyoorkFt6S5ucLSnSqhMzEqOulkHMz6vpObHhUqOEiz90
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df.plot.scatter(x=\"age\", y=\"gender\")\n",
|
|||
|
"\n",
|
|||
|
"df.plot.scatter(x=\"smoking_status\", y=\"age\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Визуализация - Столбчатая диаграмма"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 34,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA18AAAJGCAYAAACk6D+OAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB7OElEQVR4nO3deXgN5///8ddJIrsktiSCoKit9jX2rXZa1daSaixFS6qovXZqq6qlFK0KLaUL2mqr1Fq1R5UPiqqtJbEnEpVEMr8//DJfRxJC4hzh+biuc3Fm7pn7PSdzzskrM3OPxTAMQwAAAACAh8rB3gUAAAAAwJOA8AUAAAAANkD4AgAAAAAbIHwBAAAAgA0QvgAAAADABghfAAAAAGADhC8AAAAAsAHCFwAAAADYAOELAAAAAGzAyd4FAAAAAI+q/fv3a+vWrTpx4oSioqIUGxuryMhIrVixQl5eXvYuD1kM4Qs2U6hQIZ06dcpqmrOzs/z8/BQUFKTQ0FDVrl3bTtUBAAD8n127dqlXr14KDw9PMc/Z2Vk7duxQ48aN7VAZsjKLYRiGvYvAkyE5fNWsWVNFixaVJF29elV79uzRv//+K4vFoqlTp6p///52rhQAADzJNmzYoObNmys+Pl5dunRRz549VaZMGbm5udm7NGRxhC/YTHL4WrhwoTp37mxOv3Hjhnr27KnFixfL0dFRhw4d0tNPP22/QgEAwBMrKipKxYsX1+XLl7Vs2TK98MIL9i4JjxEG3IDdubq6avbs2fLw8FBiYqJWrFhh75IAAMATauHChYqMjNTw4cMJXsh0hC88Ejw9PVW8eHFJ0smTJ83pFy5c0MyZM9W8eXMVLlxYbm5u8vLyUuXKlTV58mTduHEjzXVev35d06dPV61atZQjRw65uLioYMGCatWqlZYuXWrVtl69erJYLPd81KtXz2q5TZs2mdOvX7+uYcOGqWjRonJ1dVVAQIC6deumf//9N80ar1y5olGjRql8+fLKnj273N3dVaZMGY0fP17Xr19Pc7mTJ0/es9bbX8fM6DMsLOyu/RUqVCjV5c6ePav+/furZMmScnd3V/bs2VWlShV9+OGHunnzZor2nTt3lsViUVhY2F23/c7+0pqeLCQkxKx106ZNqbb5+uuv1bRpU+XJk0fOzs7Kly+fXnnlFR06dCiNVyVt6dmfLBaLRo8eneryy5YtU8OGDZUzZ05z3+3atauOHj2a7hrSs5+kVUuhQoXS3I8WLlyYZv2jR49Od3/Jjh8/LkdHR+XIkeOu+2Dp0qVlsVj0448/pmv77fW+/vPPP9WlSxcVLFhQLi4uypkzpxo2bKgvv/wyRdukpCS1b99eFotFHTt2VFJSUqqv552vc1JSkjp27CiLxaIOHTpYLZfWMpJ08eJF5cqV667vodsfDg4O8vPzU40aNTR//vxU37O7du3SoEGDVLVqVfn7+5vX8rZq1Uq//PJLqq9R8ufJ7WdB3CmtffBB983k7R8wYIBKlSold3f3u+6X6XG3ZUaNGmXOT+vz7G7Cw8MVEhKiwoULy9XVVTlz5lS5cuU0cOBA8/rp5M/L+33PJSQk6PPPP1dwcLBKlCghLy8vubm5qXjx4urTp4/Onj1rVcuDfJZER0fLy8tLTk5OOnPmTJrb2bx5c1ksFs2ZM8dqumEYWrFihVq2bGnuV/7+/qpVq5YmT56s//77T9L/fbZPnDgxzT6+/PJLWSwWVa1aNV2vffJnTFBQkLp166aCBQvK2dlZOXLkUN26dbVw4cIU71XJer++dOmSevfurcDAQPMzvF+/frpy5Uqqfa5YsUKvvfaannnmGeXIkUOurq4qXLiwunbtqiNHjqS6zN2+L8ePHy+LxaJixYrpn3/+yXBfyDwMuIFHRnR0tCTJxcXFnPbzzz/rrbfeUr58+VS0aFFVr15dFy5c0M6dOzVkyBB9++232rhxo9UyknTmzBk1bdpUhw4dkru7u2rWrKlcuXLp33//1a+//qoDBw6oY8eOKWpo0qSJ/P39U0yPiIjQzz//nGbt8fHxatiwofbv36969eqpYsWK2rp1qz799FP9+OOP2rJli4oVK2a1zKFDh9S0aVOdOXNGefPmVa1atZQtWzbt2rVLI0aM0DfffKNNmzbJ29s7zX49PDz04osvWk37+uuvFRsbm2r7zOizSJEiqlWrlvk8JiZG33zzTaptt2zZoueff15XrlxRoUKF9OyzzyouLk67du3Sm2++qe+//16rV69WtmzZ0uwvM2zdulWLFy9Oc/7NmzcVHBysL7/8Ui4uLqpUqZLy5cuno0ePasmSJVqxYoVWrFihpk2b3nffISEhqU7ft2+f/vjjjxTTDcNQ586dtXjxYjk5OalOnTry9fXV3r17tXDhQi1fvlzffPNNumrx9PRM0X/yvuzn55diHeXLl7/nOq9cuaLBgwffs125cuXSXN+iRYusnhcpUkQtWrTQ999/ryVLlqh79+4pltm4caMOHTqkIkWKqFmzZvfs/3a2fF//8MMPevHFF3Xjxg0VL15cL7zwgs6fP6/Nmzdrw4YN+vnnn7VgwQKzvYODgz777DPFxsbqiy++kJeXl+bOnXvPbXrjjTf0xRdfqGXLlvrss8/k4JC+v6UOGTJEly9fvmub2z9XEhMTdfr0aW3dulXbt2/Xr7/+qs8++8yq/bBhw7Rx40aVLl1alSpVkoeHh44fP67Vq1dr9erVmj59ut5666101ZcR99o3r1+/rho1aujYsWNyd3dXvXr1lCdPHjOU3LlfZsTx48c1efLkB17+vffe05AhQ5SUlKSnn35azz33nP777z/99ddfmjp1qkqXLq3OnTtbfRYnW7NmjSIjI9Pc7yUpMjJSnTp1kre3t0qWLKmyZcsqNjZW+/bt06xZs7Rs2TJt27bNvD77QT5LvLy81LlzZ82aNUtz587Vu+++m6KO48ePa82aNfLy8tKrr75qTk9ISFD79u21YsUKOTg4qGrVqmrQoIEuXryoQ4cOaciQIWrXrp0KFSqkt956S4sXL9bcuXM1aNAgOTo6puhn9uzZkqTQ0NB7vPK3JIeV5Ne9aNGieuGFF3TlyhVt2rRJW7Zs0apVq/TNN9/IySnlr9JXrlxRtWrVdOnSJfOPQJs2bdL06dP1008/6ddff1WePHmslnn55Zfl4uKiUqVKqUGDBrp586b+97//aeHChfryyy+1du1a1ahRI131jx8/XiNGjFDRokW1adMm5cuX76H1hQdgADZSsGBBQ5KxcOHCFPP++OMPw8HBwZBkfPrpp+b0Q4cOGdu3b0/R/vLly0bjxo0NScaUKVOs5iUmJhqVK1c2JBmNGzc2zp8/bzX/v//+M3744QeraXXr1jUkGRs3bky19o0bNxqSjLp166Y6XZJRtGhR49SpU1b9tG3b1pBkVK9e3Wq569evG0WKFDEkGcOHDzfi4uLMebGxsUaHDh0MSUaXLl1Sref48eOGJKNgwYIp5iW/zidOnMjUPj/55BNDktG5c2er6SdOnEi1lnPnzhm5cuUyLBaLMWfOHCMxMdGcd/HiRaNBgwaGJGPMmDFWy4WEhKS5n9ytv7SmJyQkGGXKlDEcHR2NgICAVH/Ow4YNMyQZ1apVM/7++2+reV999ZXh6Oho5MiRw7hy5UqqNaUmeb9Iy6hRowxJxqhRo6ymf/TRR4YkI3fu3Mbvv/9uTk9KSjKX8fHxSbFfp1da+/Kd0tqP3njjDUOSERgYmGr9aW3X7VJ7bdatW2dIMsqVK5fqMsnvpffff/+udd/O1u/riIgIw9vb25BkjB8/3khKSjLn7d6928iRI4chyZg/f36KWv777z+jfv36hiRj4MCB5vTUXs8BAwYYkowGDRoY//33X4p1pfUz2L59u2GxWMyfXXrfQ7cv6+DgYERFRVnN+/HHH42zZ8+mWGbbtm2Gl5eXkS1bNuOff/6xmrdw4UJDkhESEpJiuWRp7YMPum9+9tlnhiQjICDAOHfuXIr+7vWeTU1ayzRr1syqlrQ+z1Lz7bffGpIMV1dXY/ny5SnmHzx40Dh06FCay99rvzcMw4iOjja
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"\n",
|
|||
|
"plot = (\n",
|
|||
|
" df.groupby(\n",
|
|||
|
" [\"Residence_type\", \"ever_married\"]\n",
|
|||
|
" ) \n",
|
|||
|
" .size()\n",
|
|||
|
" .unstack() # Преобразование таблицы для корректной визуализации\n",
|
|||
|
" .plot.bar(\n",
|
|||
|
" color=[\"pink\", \"green\"], figsize=(10, 6)\n",
|
|||
|
" )\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"plot.legend([\"Never married\", \"Ever married\"], title=\"Marital Status\")\n",
|
|||
|
"plot.set_title(\"Распределение по типу проживания и статусу брака\", fontsize=16)\n",
|
|||
|
"plot.set_xlabel(\"Тип проживания\", fontsize=12)\n",
|
|||
|
"plot.set_ylabel(\"Количество\", fontsize=12)\n",
|
|||
|
"\n",
|
|||
|
"# Показать диаграмму\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Визуализация - Временные ряды"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 36,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|||
|
"RangeIndex: 243 entries, 0 to 242\n",
|
|||
|
"Data columns (total 6 columns):\n",
|
|||
|
" # Column Non-Null Count Dtype \n",
|
|||
|
"--- ------ -------------- ----- \n",
|
|||
|
" 0 my_date 243 non-null object \n",
|
|||
|
" 1 my_value 243 non-null float64 \n",
|
|||
|
" 2 bullet 2 non-null object \n",
|
|||
|
" 3 bulletClass 2 non-null object \n",
|
|||
|
" 4 label 2 non-null object \n",
|
|||
|
" 5 date 243 non-null datetime64[ns]\n",
|
|||
|
"dtypes: datetime64[ns](1), float64(1), object(4)\n",
|
|||
|
"memory usage: 11.5+ KB\n",
|
|||
|
" my_date my_value bullet bulletClass label date\n",
|
|||
|
"0 28.03.2023 76.5662 NaN NaN NaN 2023-03-28\n",
|
|||
|
"1 31.03.2023 77.0863 NaN NaN NaN 2023-03-31\n",
|
|||
|
"2 01.04.2023 77.3233 NaN NaN NaN 2023-04-01\n",
|
|||
|
"3 04.04.2023 77.9510 NaN NaN NaN 2023-04-04\n",
|
|||
|
"4 05.04.2023 79.3563 NaN NaN NaN 2023-04-05\n",
|
|||
|
".. ... ... ... ... ... ...\n",
|
|||
|
"238 20.03.2024 92.2243 NaN NaN NaN 2024-03-20\n",
|
|||
|
"239 21.03.2024 92.6861 NaN NaN NaN 2024-03-21\n",
|
|||
|
"240 22.03.2024 91.9499 NaN NaN NaN 2024-03-22\n",
|
|||
|
"241 23.03.2024 92.6118 NaN NaN NaN 2024-03-23\n",
|
|||
|
"242 26.03.2024 92.7761 NaN NaN NaN 2024-03-26\n",
|
|||
|
"\n",
|
|||
|
"[243 rows x 6 columns]\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAHGCAYAAABeq3DqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAChdUlEQVR4nOzdd3xT9foH8M/JaNK994aWvRHKEhCZooBywYEyHCBe9aL3yhWvoIheRP2J4zq5iqgo6FVBVEAEWbIrlA2lLbRQ2tKZDrqS5/dHmkPTpOOkSXvSPu/Xqy9Ncs75PpyTnDz5ToGICIwxxhhjMqJo7QAYY4wxxuriBIUxxhhjssMJCmOMMcZkhxMUxhhjjMkOJyiMMcYYkx1OUBhjjDEmO5ygMMYYY0x2VK0dgC0MBgMyMzPh6ekJQRBaOxzGGGOMNQERobi4GGFhYVAoGq4jccoEJTMzE5GRka0dBmOMMcZskJGRgYiIiAa3ccoExdPTE4DxH+jl5dXK0TDGGGOsKXQ6HSIjI8Xv8YY4ZYJiatbx8vLiBIUxxhhzMk3pnsGdZBljjDEmO5ygMMYYY0x2OEFhjDHGmOw4ZR8UxhhjbY9er0dVVVVrh8GaQa1WQ6lU2uVYnKAwxhhrVUSErKwsFBYWtnYozA58fHwQEhLS7HnKOEFhjDHWqkzJSVBQENzc3HgCTidFRCgrK0NOTg4AIDQ0tFnH4wSFMcZYq9Hr9WJy4u/v39rhsGZydXUFAOTk5CAoKKhZzT3cSZYxxlirMfU5cXNza+VImL2YrmVz+xNxgsIYY6zVcbNO22Gva8kJCmOMMcZkhxMUxlibVF6lb+0QGGPNwAkKY6zNWbPvIros3oKd53JaOxTGZOPixYsQBAHHjh1r7VCahBMUxlibYjAQPtyVAgA4kJrfytEwxmzFCQpjrE05kJqHq0XlAIDCsspWjoYxZitOUBhzcuezi5F4qaC1w5CN7/68Iv5/AScoTomIUFZZ3Sp/RNTkOEeOHIknnngCCxYsgK+vL4KDg7Fq1SqUlpZizpw58PT0RFxcHDZv3gwiQlxcHN544w2zYxw7dgyCIODChQsNlnXffffh7rvvNnuuqqoKAQEB+PzzzwEAW7ZswbBhw+Dj4wN/f3/cfvvtSElJqfeYn332GXx8fMye27Bhg8UonI0bN6Jfv37QarXo0KEDli5diurq6sZOT7PxRG2MOTEiwn2rDqDoehUSF4+Bl1bd2iG1qrLKamw+eVV8XFDG67o4o+tVenRbsrVVyj790ji4uTT9q3HNmjVYuHAhDh06hPXr12P+/Pn44YcfcOedd+K5557DypUr8cADDyA9PR0PPvggVq9ejX/84x/i/qtXr8bw4cMRFxfXYDkzZszAtGnTUFJSAg8PDwDA1q1bUVZWhjvvvBMAUFpaiqeffhq9evVCSUkJlixZgjvvvBPHjh2DQmFbfcSePXswc+ZMvPPOO7j55puRkpKCuXPnAgBeeOEFm47ZVFyDwpgTKyirQm5JJar0hMJS/jLeeioLZZV6mH4AchMPc7TevXvj+eefR3x8PBYtWgStVouAgAA88sgjiI+Px5IlS5CXl4fjx49j9uzZOHfuHA4dOgTAWAPy1Vdf4cEHH2y0nHHjxsHd3R0//PCD+NxXX32FSZMmwdPTEwAwdepU3HXXXYiLi0OfPn3w6aef4sSJEzh9+rTN/76lS5fi2WefxaxZs9ChQweMGTMGy5Ytw0cffWTzMZuKa1AYc2KZhdfF/6+o5mG139c074zpGoxfT2dzDYqTclUrcfqlca1WthS9evUS/1+pVMLf3x89e/YUnwsODgZgnPp90KBBmDhxIj799FMMHDgQmzZtQkVFBaZNm9ZoOSqVCtOnT8fatWvxwAMPoLS0FBs3bsS6devEbZKTk7FkyRIcPHgQubm5MBgMAID09HT06NFD0r/LJCkpCX/88QdeeeUV8Tm9Xo/y8nKUlZU5dAZgTlAYc2KmzqAAUFFtaMVIWl9WUTn2XsgFADw4LBa/ns5GYVkliIhnKXUygiBIamZpTWq1ebOqIAhmz5nee6Zk4eGHH8YDDzyAlStXYvXq1bj77rub/CU/Y8YMjBgxAjk5Odi2bRtcXV0xfvx48fU77rgD0dHRWLVqFcLCwmAwGNCjRw9UVlqvSVQoFBZ9bupOT19SUoKlS5firrvusthfq9U2KW5bOcc7gDFm1dWi2jUo7TtB2XDsCoiAgTF+6BXhDQCo0hNKK/Xw0PCtjsnDbbfdBnd3d3zwwQfYsmULdu/e3eR9hwwZgsjISKxfvx6bN2/GtGnTxGQoLy8P586dw6pVq3DzzTcDAPbu3dvg8QIDA1FcXIzS0lK4u7sDgMUcKf369cO5c+ca7SPjCPypZcyJZRbWrkFpv008RITvEi8DAO7sFw5XtRIuKgUqqw0oKK3kBIXJhlKpxOzZs7Fo0SLEx8dj8ODBkva/77778OGHH+L8+fP4/fffxed9fX3h7++Pjz/+GKGhoUhPT8ezzz7b4LESEhLg5uaG5557Dk8++SQOHjyIzz77zGybJUuW4Pbbb0dUVBT+8pe/QKFQICkpCSdPnsTLL78sKXapuJMsY06sdg1KZTuuQTl9VYfknBK4qBS4rWcoBEGAr5vxlyUPNWZy89BDD6GyshJz5syRvO+MGTNw+vRphIeHY+jQoeLzCoUC69atQ2JiInr06IGnnnoKr7/+eoPH8vPzw5dffolffvkFPXv2xNdff40XX3zRbJtx48bhp59+wq+//ooBAwZg0KBBWLlyJaKjoyXHLhX/rGDMiV0t5D4oAHDichEAICHWD96uxsTE180F2boK7ijLHGbnzp0Wz128eNHiubr9PK5cuQK1Wo2ZM2dKLrNr1671ztUyevRoixE7tbeNiYmx2HfKlCmYMmWK2XOPPPKI2eNx48Zh3LiW77TMCQpjTiyTa1AAAOn5ZQCAGH938TlfNxcAPNSYyUdFRQWuXbuGF198EdOmTRNH+DDrJDfx7N69G3fccQfCwsIgCAI2bNhg9joRYcmSJQgNDYWrqytGjx6N5ORks23y8/MxY8YMeHl5wcfHBw899BBKSkqa9Q9hrL0xGAjZOq5BAYCMAmOiFunnKj7n617TxFPKCQqTh6+//hrR0dEoLCzEa6+9Zvba2rVr4eHhYfWve/furRRx65Jcg1JaWorevXvjwQcftDrs6LXXXsM777yDNWvWIDY2FosXL8a4ceNw+vRpcUjSjBkzcPXqVWzbtg1VVVWYM2cO5s6di6+++qr5/yLG2onckgpU6W9U17bnGpSMmhqUKL8bwzV9ampQuImHycXs2bMxe/Zsq69NmjQJCQkJVl+rO5S5vZCcoEyYMAETJkyw+hoR4a233sLzzz+PyZMnAwA+//xzBAcHY8OGDbjnnntw5swZbNmyBYcPH8ZNN90EAHj33Xdx22234Y033kBYWFgz/jmMtR+ZteZAAdr3KB5TghLheyNBqd1J1mAgKBQ8F4qcSVkDpy3y9PQUZ4R1dva6lnYdxZOWloasrCyMHj1afM7b2xsJCQnYv38/AGD//v3w8fERkxPA2LFHoVDg4MGDVo9bUVEBnU5n9sdYe3e11iyyQPutQSmtqEZeTTNOlH/tBMVYg/L5/ku4/xPr9xbW+ky1A2VlZa0cCbMX07Vsbs2PXTvJZmVlAYBFx5/g4GDxtaysLAQFBZkHoVLBz89P3Kau5cuXY+nSpfYMlTGnZ1mD0j4TlIwC483Q21VttlhisNeNWS73peS1eFysaZRKJXx8fJCTkwMAcHNz45l/nRQRoaysDDk5OfDx8YFSKW3ZgLqcYhTPokWL8PTTT4uPdTodIiMjWzEixlof16AYZeRbdpAFgDHdgvHXWzrivd+Ny83rDQQlN/PIUkhICACISQpzbj4+PuI1bQ67JiimgLKzsxEaGio+n52djT59+ojb1H0TVldXIz8/v95/kEajgUajsWeojDk90zo8GpUCFdWGdtsHJd1KB1kA0KqVePyWeDFBKa/Sw51nlJUlQRAQGhqKoKAgi7VgmHNRq9XNrjkxseunNTY2FiE
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from datetime import datetime\n",
|
|||
|
"import matplotlib.dates as md\n",
|
|||
|
"\n",
|
|||
|
"ts = pd.read_csv(\"data/dollar.csv\")\n",
|
|||
|
"ts[\"date\"] = ts.apply(lambda row: datetime.strptime(row[\"my_date\"], \"%d.%m.%Y\"), axis=1)\n",
|
|||
|
"ts.info()\n",
|
|||
|
"\n",
|
|||
|
"print(ts)\n",
|
|||
|
"\n",
|
|||
|
"plot = ts.plot.line(x=\"date\", y=\"my_value\")\n",
|
|||
|
"plot.xaxis.set_major_locator(md.DayLocator(interval=10))\n",
|
|||
|
"plot.xaxis.set_major_formatter(md.DateFormatter(\"%d.%m.%Y\"))\n",
|
|||
|
"plot.tick_params(axis=\"x\", labelrotation=90)"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": ".venv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.2"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|