514 lines
192 KiB
Plaintext
514 lines
192 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"работа с данными, чтение и запись csv"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 1,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"\n",
|
|||
|
"df = pd.read_csv(\"data/country.csv\")\n",
|
|||
|
"\n",
|
|||
|
"df.to_csv(\"test1.csv\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"работа с даннными, основные команды"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 2,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|||
|
"RangeIndex: 234 entries, 0 to 233\n",
|
|||
|
"Data columns (total 3 columns):\n",
|
|||
|
" # Column Non-Null Count Dtype \n",
|
|||
|
"--- ------ -------------- ----- \n",
|
|||
|
" 0 Country/Territory 234 non-null object\n",
|
|||
|
" 1 Capital 232 non-null object\n",
|
|||
|
" 2 Continent 234 non-null object\n",
|
|||
|
"dtypes: object(3)\n",
|
|||
|
"memory usage: 5.6+ KB\n",
|
|||
|
" count unique top freq\n",
|
|||
|
"Country/Territory 234 234 Afghanistan 1\n",
|
|||
|
"Capital 232 232 Kabul 1\n",
|
|||
|
"Continent 234 6 Africa 57\n",
|
|||
|
" Country/Territory Capital\n",
|
|||
|
"0 Afghanistan Kabul\n",
|
|||
|
"1 Albania Tirana\n",
|
|||
|
"2 Algeria Algiers\n",
|
|||
|
"3 American Samoa Pago Pago\n",
|
|||
|
"4 Andorra Andorra la Vella\n",
|
|||
|
" Country/Territory Capital\n",
|
|||
|
"229 Wallis and Futuna Mata-Utu\n",
|
|||
|
"230 Western Sahara El Aain\n",
|
|||
|
"231 Yemen Sanaa\n",
|
|||
|
"232 Zambia Lusaka\n",
|
|||
|
"233 Zimbabwe Harare\n",
|
|||
|
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n",
|
|||
|
" Country/Territory Capital\n",
|
|||
|
"219 United Arab Emirates Abu Dhabi\n",
|
|||
|
"149 Nigeria Abuja\n",
|
|||
|
"75 Ghana Accra\n",
|
|||
|
"63 Ethiopia Addis Ababa\n",
|
|||
|
"2 Algeria Algiers\n",
|
|||
|
" Country/Territory Capital\n",
|
|||
|
"142 Nauru Yaren\n",
|
|||
|
"9 Armenia Yerevan\n",
|
|||
|
"46 Croatia Zagreb\n",
|
|||
|
"121 Malawi NaN\n",
|
|||
|
"127 Martinique NaN\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df.info()\n",
|
|||
|
"\n",
|
|||
|
"print(df.describe().transpose())\n",
|
|||
|
"\n",
|
|||
|
"cleared_df = df.drop([\"Continent\"], axis=1) # удаляет колонку\n",
|
|||
|
"print(cleared_df.head())\n",
|
|||
|
"print(cleared_df.tail())\n",
|
|||
|
"\n",
|
|||
|
"print(\"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\")\n",
|
|||
|
"\n",
|
|||
|
"sorted_df = cleared_df.sort_values(by=\"Capital\")\n",
|
|||
|
"print(sorted_df.head())\n",
|
|||
|
"print(sorted_df.tail())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"работа с данными, работа с элементами"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 3,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"0 Asia\n",
|
|||
|
"1 Europe\n",
|
|||
|
"2 Africa\n",
|
|||
|
"3 Oceania\n",
|
|||
|
"4 Europe\n",
|
|||
|
" ... \n",
|
|||
|
"229 Oceania\n",
|
|||
|
"230 Africa\n",
|
|||
|
"231 Asia\n",
|
|||
|
"232 Africa\n",
|
|||
|
"233 Africa\n",
|
|||
|
"Name: Continent, Length: 234, dtype: object\n",
|
|||
|
"Country/Territory Ivory Coast\n",
|
|||
|
"Capital Yamoussoukro\n",
|
|||
|
"Continent Africa\n",
|
|||
|
"Name: 100, dtype: object\n",
|
|||
|
"Ivory Coast\n",
|
|||
|
" Country/Territory Capital\n",
|
|||
|
"100 Ivory Coast Yamoussoukro\n",
|
|||
|
"101 Jamaica Kingston\n",
|
|||
|
"102 Japan Tokyo\n",
|
|||
|
"103 Jersey Saint Helier\n",
|
|||
|
"104 Jordan Amman\n",
|
|||
|
".. ... ...\n",
|
|||
|
"196 Spain Madrid\n",
|
|||
|
"197 Sri Lanka Colombo\n",
|
|||
|
"198 Sudan Khartoum\n",
|
|||
|
"199 Suriname Paramaribo\n",
|
|||
|
"200 Sweden Stockholm\n",
|
|||
|
"\n",
|
|||
|
"[101 rows x 2 columns]\n",
|
|||
|
" Country/Territory Capital Continent\n",
|
|||
|
"0 Afghanistan Kabul Asia\n",
|
|||
|
"1 Albania Tirana Europe\n",
|
|||
|
"2 Algeria Algiers Africa\n",
|
|||
|
"Country/Territory Afghanistan\n",
|
|||
|
"Capital Kabul\n",
|
|||
|
"Continent Asia\n",
|
|||
|
"Name: 0, dtype: object\n",
|
|||
|
" Country/Territory Capital\n",
|
|||
|
"0 Afghanistan Kabul\n",
|
|||
|
"1 Albania Tirana\n",
|
|||
|
"2 Algeria Algiers\n",
|
|||
|
"3 American Samoa Pago Pago\n",
|
|||
|
"4 Andorra Andorra la Vella\n",
|
|||
|
".. ... ...\n",
|
|||
|
"229 Wallis and Futuna Mata-Utu\n",
|
|||
|
"230 Western Sahara El Aain\n",
|
|||
|
"231 Yemen Sanaa\n",
|
|||
|
"232 Zambia Lusaka\n",
|
|||
|
"233 Zimbabwe Harare\n",
|
|||
|
"\n",
|
|||
|
"[234 rows x 2 columns]\n",
|
|||
|
" Country/Territory Capital\n",
|
|||
|
"3 American Samoa Pago Pago\n",
|
|||
|
"6 Anguilla The Valley\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(df[\"Continent\"]) # выводит колонку таблицы\n",
|
|||
|
"\n",
|
|||
|
"print(df.loc[100]) # выводит данные по одному объекту таблицы(по строке)\n",
|
|||
|
"\n",
|
|||
|
"print(df.loc[100, \"Country/Territory\"]) # выводит данные по конкретному столбцу конкретной строки\n",
|
|||
|
"\n",
|
|||
|
"print(df.loc[100:200, [\"Country/Territory\", \"Capital\"]]) # выводит данные с диапозона строк по столбцам\n",
|
|||
|
"\n",
|
|||
|
"print(df[0:3]) # просто выводит данные с с диапозона строк в таблице\n",
|
|||
|
"\n",
|
|||
|
"print(df.iloc[0])\n",
|
|||
|
"\n",
|
|||
|
"print(df.iloc[:, 0:2]) # так как айлок работает с индексами с помощью 3-5 мы задаем строки, которые хотим вывести, а спомощью 0-2 задаем столбцы которые хотим вывести\n",
|
|||
|
"\n",
|
|||
|
"print(df.iloc[[3, 6], [0, 1]]) # здесь 3,4 означает также номера строк, но не диапазон. 0,1 означает номера столбцов. но также не диапазон\n",
|
|||
|
"\n",
|
|||
|
"# лок отличается от айлока тем что позволяет создавать срезы, использует метки(названия столбцов как минимум). Айлок работает с индексами"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"работа с данными - отбор и группировка"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 4,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"['Asia' 'Europe' 'Africa' 'Oceania' 'North America' 'South America']\n",
|
|||
|
"Asia count = 50\n",
|
|||
|
"Europe count = 50\n",
|
|||
|
"Africa count = 57\n",
|
|||
|
"Oceania count = 23\n",
|
|||
|
"North America count = 40\n",
|
|||
|
"South America count = 14\n",
|
|||
|
"Total count = 234\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"s_values = df[\"Continent\"].unique() # выводит все уникальные значения по столбцу\n",
|
|||
|
"print(s_values)\n",
|
|||
|
"\n",
|
|||
|
"s_total = 0\n",
|
|||
|
"for s_value in s_values:\n",
|
|||
|
" count = df[df[\"Continent\"] == s_value].shape[0] # шэйп возвращаеет кортеж колва строк и колва столбцов в таблице. так как мы толлько что таблицу фильтранули, мы выводим шэйп с индексом 0(строки)\n",
|
|||
|
" s_total += count\n",
|
|||
|
" print(s_value, \"count =\", count)\n",
|
|||
|
"print(\"Total count = \", s_total)\n",
|
|||
|
"\n",
|
|||
|
"# print(df.groupby([\"Pclass\", \"Survived\"]).size().reset_index(name=\"Count\")) # невозможно применить к данным таблицы"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"визуализация - исходные данные "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 5,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" Country/Territory Capital Continent\n",
|
|||
|
"0 Afghanistan Kabul Asia\n",
|
|||
|
"1 Albania Tirana Europe\n",
|
|||
|
"2 Algeria Algiers Africa\n",
|
|||
|
"3 American Samoa Pago Pago Oceania\n",
|
|||
|
"4 Andorra Andorra la Vella Europe\n",
|
|||
|
".. ... ... ...\n",
|
|||
|
"229 Wallis and Futuna Mata-Utu Oceania\n",
|
|||
|
"230 Western Sahara El Aain Africa\n",
|
|||
|
"231 Yemen Sanaa Asia\n",
|
|||
|
"232 Zambia Lusaka Africa\n",
|
|||
|
"233 Zimbabwe Harare Africa\n",
|
|||
|
"\n",
|
|||
|
"[232 rows x 3 columns]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"data = df.copy()\n",
|
|||
|
"data.dropna(subset=[\"Capital\"], inplace=True) # дропна позволяет удалить строчки, с пустым значением по столбцу(сабсет) и не перезаписывать таблицу(инплэйс тру)\n",
|
|||
|
"print(data)\n",
|
|||
|
"data.to_csv('test2.csv')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 7,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"dd = pd.read_csv(\"data/healthcare.csv\")\n",
|
|||
|
"ddata = dd[[\"age\", \"work_type\", \"avg_glucose_level\"]].copy()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 8,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" avg_glucose_level \n",
|
|||
|
" min q1 q2 median q3 max\n",
|
|||
|
"work_type \n",
|
|||
|
"Govt_job 55.27 76.6600 91.93 91.93 114.3200 266.59\n",
|
|||
|
"Never_worked 59.99 78.4575 86.02 86.02 112.8075 161.28\n",
|
|||
|
"Private 55.12 77.8200 91.92 91.92 114.4600 271.74\n",
|
|||
|
"Self-employed 55.23 76.6050 93.60 93.60 124.9900 267.61\n",
|
|||
|
"children 55.34 76.2550 90.22 90.22 108.7100 219.81\n",
|
|||
|
" avg_glucose_level \n",
|
|||
|
" low_iqr iqr high_iqr\n",
|
|||
|
"work_type \n",
|
|||
|
"Govt_job 20.1700 37.660 170.8100\n",
|
|||
|
"Never_worked 26.9325 34.350 164.3325\n",
|
|||
|
"Private 22.8600 36.640 169.4200\n",
|
|||
|
"Self-employed 4.0275 48.385 197.5675\n",
|
|||
|
"children 27.5725 32.455 157.3925\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Axes: title={'center': 'avg_glucose_level'}, xlabel='work_type'>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 8,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjgAAAHNCAYAAAATwgHBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB8wElEQVR4nO3dd1xTV/8H8E8IEJYs2YqAOEDBha3iYLg3FLe2amuXs+7WPq2DVm2to7WutrbS1lGfqsXWVXGAuFq34kTEDbhBAZnn9we/3IcAatCQhPB5v16+JPd+c++5OcnNN+eec65MCCFAREREZECMdF0AIiIiIk1jgkNEREQGhwkOERERGRwmOERERGRwmOAQERGRwWGCQ0RERAaHCQ4REREZHCY4REREZHCY4BAREZHBYYJDpAUymQwzZszQdTEMXmxsLGQyGWJjY58ZN2PGDMhkMty9e1c7BdMy5euwfv16XReFSGeY4FClFhUVBZlMpvLPyckJoaGh2LZtm66L99LOnj2LGTNm4MqVK7ouClVBWVlZmDFjxnMTRiJ9ZKzrAhBpQmRkJLy8vCCEQFpaGqKiotCtWzf89ddf6NGjh66L98LOnj2LmTNnIiQkBJ6enrouDlUxWVlZmDlzJgAgJCREt4UhKicmOGQQunbtiubNm0uPhw8fDmdnZ6xdu7ZSJzjalJ+fj8LCQpiamuq6KPSClHVIRLxERQbK1tYW5ubmMDZWzeEzMzMxceJEuLu7Q6FQoH79+pg3bx6EEACA7Oxs+Pj4wMfHB9nZ2dLz7t+/D1dXV7Rq1QoFBQUAgGHDhsHKygqXL19G586dYWlpCTc3N0RGRkrbe5bjx4+ja9eusLa2hpWVFdq3b49Dhw5J66OiotC3b18AQGhoqHQJ7nmXC37//Xc0aNAAZmZm8PPzwx9//IFhw4aptABduXIFMpkM8+bNw9dffw1vb28oFAqcPXsWALB79260bdsWlpaWsLW1RVhYGM6dO6eyn5LbVFL2bylOJpNh9OjRWL16NerXrw8zMzMEBARg7969pZ5/8+ZNvPXWW3B2doZCoUDDhg3x008/lYq7ceMGwsPDYWlpCScnJ4wfPx45OTnPfG1Kunv3Lvr16wdra2tUr14dH3zwAZ48eSKtDw4ORuPGjct8bv369dG5c+enbnvChAmoXr26ynthzJgxkMlkWLRokbQsLS0NMpkMy5Ytk5bdvn1bStLNzMzQuHFj/Pzzzyrbf14dlpSTk4MePXrAxsYGBw4cePYL8//bd3R0BADMnDlTev/NmDEDK1euhEwmw/Hjx0s9b/bs2ZDL5bh58yaAopYfPz8/HD16FK1atYK5uTm8vLywfPnyMss4ffp01KlTBwqFAu7u7pgyZUq565UIACCIKrGVK1cKAGLnzp3izp074vbt2yIhIUG89957wsjISOzYsUOKLSwsFO3atRMymUy8/fbbYvHixaJnz54CgBg3bpwUd+jQISGXy8X48eOlZQMGDBDm5ubiwoUL0rKhQ4cKMzMzUbduXfHGG2+IxYsXix49eggA4tNPP1UpJwAxffp06XFCQoKwtLQUrq6u4rPPPhNffPGF8PLyEgqFQhw6dEgIIURSUpIYO3asACA+/vhj8euvv4pff/1VpKamPvX12Lx5s5DJZKJRo0ZiwYIF4tNPPxV2dnbCz89PeHh4SHHJyckCgGjQoIGoXbu2+OKLL8TChQvF1atXRUxMjDA2Nhb16tUTc+fOFTNnzhQODg7Czs5OJCcnqxx/8W0qTZ8+XZQ8tQAQfn5+wsHBQURGRoovv/xSeHh4CHNzc3H69GkpLjU1VdSsWVO4u7uLyMhIsWzZMtGrVy8BQCxcuFCKy8rKEvXq1RNmZmZiypQp4uuvvxYBAQGiUaNGAoDYs2fPU1+j4mX09/cXPXv2FIsXLxavv/66ACDeeOMNKe6HH34QAFTKKIQQ//77rwAgfvnll6fuY+PGjaWe27hxY2FkZCT69OkjLfv9998FAJGQkCAdm6+vrzAxMRHjx48XixYtEm3bthUAxNdffy0971l1uGfPHgFA/P7779I2O3bsKOzs7MS///77zNdG6fHjx2LZsmUCgHjttdek99/JkydFRkaGMDc3FxMnTiz1vAYNGoh27dpJj4ODg4Wbm5twcnISo0ePFosWLRJt2rQRAMSPP/4oxRUUFIhOnToJCwsLMW7cOPHdd9+J0aNHC2NjYxEWFqZWmYmKY4JDlZoywSn5T6FQiKioKJXY6OhoAUB8/vnnKsv79OkjZDKZuHTpkrRs6tSpwsjISOzdu1f6Air+5SJE0Rc8ADFmzBhpWWFhoejevbswNTUVd+7ckZaXTHDCw8OFqampSEpKkpbdunVLVKtWTQQFBUnLlPt+3he2kr+/v6hZs6Z49OiRtCw2NlYAKDPBsba2Frdv31bZRpMmTYSTk5O4d++etOzkyZPCyMhIDBkyROX4y5PgABBHjhyRll29elWYmZmJ1157TVo2fPhw4erqKu7evavy/AEDBggbGxuRlZUlhBDi66+/FgDEf//7XykmMzNT1KlTp1wJTq9evVSWjxw5UgAQJ0+eFEII8fDhQ2FmZiY+/PBDlbixY8cKS0tL8fjx46fu4/bt2wKAWLp0qbQtIyMj0bdvX+Hs7KyyLXt7e1FYWKhybKtWrZJicnNzRWBgoLCyshIZGRlCiGfXYfEE59GjRyI4OFg4ODiI48ePP/N1KenOnTul3rtKAwcOFG5ubqKgoEBaduzYMQFArFy5UloWHBwsAIj58+dLy3JycqT3WW5urhBCiF9//VUYGRmJ+Ph4lf0sX75cABD79+8vV9mJeImKDMKSJUsQExODmJgYrFq1CqGhoXj77bexceNGKWbr1q2Qy+UYO3asynMnTpwIIYTKqKsZM2agYcOGGDp0KEaOHIng4OBSz1MaPXq09LfyUkxubi527txZZnxBQQF27NiB8PBw1K5dW1ru6uqKQYMGYd++fcjIyCj3a3Dr1i2cPn0aQ4YMgZWVlbQ8ODgY/v7+ZT6nd+/e0mUIAEhJScGJEycwbNgw2NvbS8sbNWqEjh07YuvWreUul1JgYCACAgKkx7Vq1UJYWBj+/vtvFBQUQAiBDRs2oGfPnhBC4O7du9K/zp07Iz09HceOHQNQVJeurq7o06ePtD0LCwu8++675SrTqFGjVB6PGTNG2j4A2NjYICwsDGvXrpUuNRUUFGDdunXS5bGncXR0hI+Pj3QZbv/+/ZDL5Zg8eTLS0tKQmJgIAIiPj0ebNm2ky3pbt26Fi4sLBg4cKG3LxMQEY8eOxePHjxEXF6eyn5J1WFx6ejo6deqE8+fPIzY2Fk2aNFH3pXmuIUOG4NatW9izZ4+0bPXq1TA3N0fv3r1VYo2NjfHee+9Jj01NTfHee+/h9u3bOHr0KICiS6u+vr7w8fFRqft27doBgMp+iNTBBIcMwquvvooOHTqgQ4cOGDx4MLZs2YIGDRpIyQYAXL16FW5ubqhWrZrKc319faX1Sqampvjpp5+QnJyMR48eSX0OSjIyMlJJUgCgXr16APDUod137txBVlYW6tevX2qdr68vCgsLcf36dfUP/v8py1+nTp1S68paBgBeXl5lbuNpZbt79y4yMzPLXTYAqFu3bqll9erVQ1ZWFu7cuYM7d+7g4cOH+P777+Ho6Kjy78033wRQ1DdFWc46deqUqpOyyl2eMnl7e8PIyEil7oYMGYJr164hPj4eALBz506kpaXhjTfeeO7227ZtKz0vPj4ezZs3R/PmzWFvb4/4+HhkZGTg5MmTaNu2rfScq1evom7dujAyUj09l/U+BUrXYXHjxo3D4cOHsXPnTjRs2PC55S2Pjh07wtXVFatXrwYAFBYWYu3atQgLCyv1GXNzcyuVDJb8nCQmJuLMmTOl6l4Zp6x7InVxFBUZJCMjI4SGhuKbb75BYmLiC53c//77bwDAkydPkJiY+MwvksrK3Nz8hZ9bVsIHQOqEXV7K0T+vv/46hg4dWmZMo0aNXmjb6irrmDp37gxnZ2esWrUKQUFBWLVqFVxcXNChQ4fnbq9Nmzb44Yc
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"\n",
|
|||
|
"# функции для поиска квартилей\n",
|
|||
|
"def q1(x):\n",
|
|||
|
" return x.quantile(0.25)\n",
|
|||
|
"\n",
|
|||
|
"# median = quantile(0.5)\n",
|
|||
|
"def q2(x):\n",
|
|||
|
" return x.quantile(0.5)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"def q3(x):\n",
|
|||
|
" return x.quantile(0.75)\n",
|
|||
|
"\n",
|
|||
|
"# интерквартильный размах\n",
|
|||
|
"def iqr(x):\n",
|
|||
|
" return q3(x) - q1(x)\n",
|
|||
|
"\n",
|
|||
|
"# нижняя граница для обнаружения выбросов(е..ть)\n",
|
|||
|
"def low_iqr(x):\n",
|
|||
|
" return max(0, q1(x) - 1.5 * iqr(x))\n",
|
|||
|
"\n",
|
|||
|
"# верхняя граница для обнаружения выбросов\n",
|
|||
|
"def high_iqr(x):\n",
|
|||
|
" return q3(x) + 1.5 * iqr(x)\n",
|
|||
|
"\n",
|
|||
|
"# aggregate позволяет выполнить все эти функции к данным каждой группы и записать их в таблицу\n",
|
|||
|
"quantiles = ddata[[\"work_type\", \"avg_glucose_level\"]].groupby([\"work_type\"]).aggregate([\"min\", q1, q2, \"median\", q3, \"max\"])\n",
|
|||
|
"print(quantiles)\n",
|
|||
|
"\n",
|
|||
|
"iqrs = ddata[[\"work_type\", \"avg_glucose_level\"]].groupby([\"work_type\"]).aggregate([low_iqr, iqr, high_iqr])\n",
|
|||
|
"print(iqrs)\n",
|
|||
|
"\n",
|
|||
|
"ddata.boxplot(column=\"avg_glucose_level\", by=\"work_type\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"визуализация- гистограмма"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Axes: ylabel='Frequency'>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGdCAYAAAD0e7I1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAyGklEQVR4nO3de1RVdf7/8RfIJVAuIvcixftdU4sYzbEk8ZJfbzOlUV7Gr35rsDKyi02j3SbKJqd7rmZKa32liyu11KK8oVlkal7SjIRB0QHUNEE0EeHz+6Of+9tJVDgcOIft87HWXquz9+fs8958hPPqsz97by9jjBEAAIBNebu7AAAAgPpE2AEAALZG2AEAALZG2AEAALZG2AEAALZG2AEAALZG2AEAALZG2AEAALbm4+4CPEFVVZUKCwsVFBQkLy8vd5cDAABqwBij48ePKzY2Vt7e5x+/IexIKiwsVFxcnLvLAAAATti/f7+uuOKK824n7EgKCgqS9MsPKzg42M3VAACAmigtLVVcXJz1PX4+hB3JOnUVHBxM2AEAoJG52BQUJigDAABbI+wAAABbI+wAAABbY84OAFwiKisrVVFR4e4ygBpr0qSJfHx86nxbGMIOAFwCysrKdODAARlj3F0KUCuBgYGKiYmRn5+f0/sg7ACAzVVWVurAgQMKDAxUREQEN09Fo2CM0enTp3X48GHl5+erXbt2F7xx4IUQdgDA5ioqKmSMUUREhAICAtxdDlBjAQEB8vX11b59+3T69GlddtllTu2HCcoAcIlgRAeNkbOjOQ77cEEdAAAAHouwAwAAbI05OwBwiWr10IoG/by9Tw9r0M9rCHv37lV8fLy2bt2qnj17urucOhswYIB69uyp559/vsE+c+LEiTp27JiWLl1ab5/ByA4AALA1wg4AALA1wg4AwGNlZmaqX79+Cg0NVYsWLXTTTTcpLy9PkvS73/1ODz74oEP7w4cPy9fXV+vXr5ckFRUVadiwYQoICFB8fLwyMjLUqlWrGp+m+f7779WvXz9ddtll6ty5s1atWiUvL6/znnJZsGCBQkNDHdYtXbr0nCvhli1bpquvvlqXXXaZwsPDNWrUKGvbTz/9pPHjx6t58+YKDAzUkCFDtGfPHmv7vn37NHz4cDVv3lxNmzZVly5d9PHHH1vbd+7cqSFDhqhZs2aKiorS7bffrh9//LFGx/tb5eXlmjFjhi6//HI1bdpUCQkJysrKkiSVlpYqICBAn3zyicN7lixZoqCgIJ08eVKStH//ft18880KDQ1VWFiYRowYob179zpVj7MIO6izVg+tOGcBAFc4ceKE0tLStHnzZq1evVre3t4aNWqUqqqqlJKSonfffdfhrtDvvfeeYmNjdd1110mSxo8fr8LCQmVlZemDDz7Q66+/rkOHDtXosysrKzVy5EgFBgZq48aNev311/WXv/ylzse0YsUKjRo1SkOHDtXWrVu1evVqXXPNNdb2iRMnavPmzfroo4+UnZ0tY4yGDh1qPeojNTVV5eXlWr9+vb799ls988wzatasmSTp2LFjuuGGG3TVVVdp8+bNyszM1MGDB3XzzTc7Veu0adOUnZ2td999Vzt27NAf//hHDR48WHv27FFwcLBuuukmZWRkOLxn4cKF1s+toqJCycnJCgoK0ueff64vvvhCzZo10+DBg3X69Gknf4K1xwRlAIDHGjNmjMPrN998UxEREfruu+908803a/r06dqwYYMVbjIyMjRu3Dh5eXnp+++/16pVq7Rp0yb16dNHkvSvf/1L7dq1q9Fnr1y5Unl5ecrKylJ0dLQk6W9/+5tuvPHGOh3T3/72N40dO1aPPfaYta5Hjx6SpD179uijjz7SF198od/97neSfgkPcXFxWrp0qf74xz+qoKBAY8aMUbdu3SRJrVu3tvbz8ssv66qrrtJTTz1lrXvzzTcVFxenH374Qe3bt69xnQUFBZo/f74KCgoUGxsrSZoxY4YyMzM1f/58PfXUU0pJSdHtt9+ukydPKjAwUKWlpVqxYoWWLFki6ZfwWVVVpX/961/W6Nb8+fMVGhqqrKwsDRo0yJkfYa0xsgMA8Fh79uzRuHHj1Lp1awUHB6tVq1aSfvkijoiI0KBBg7Rw4UJJUn5+vrKzs5WSkiJJysnJkY+Pj3r16mXtr23btmrevHmNPjsnJ0dxcXFW0JHkMALjrG3btmngwIHVbtu9e7d8fHyUkJBgrWvRooU6dOig3bt3S5LuvvtuPfnkk+rbt69mz56tHTt2WG23b9+utWvXqlmzZtbSsWNHSbJO/9XUt99+q8rKSrVv395hf+vWrbP2NXToUPn6+uqjjz6SJH3wwQcKDg5WUlKSVU9ubq6CgoKs94eFhenUqVO1rqcuGNkBAHis4cOHq2XLlvrnP/+p2NhYVVVVqWvXrtYpkJSUFN1999166aWXlJGRoW7dulkjHu7g7e19zsNWf/uk+bo+suO///u/lZycrBUrVuizzz5Tenq6nnvuOd11110qKyvT8OHD9cwzz5zzvpiYmFp9TllZmZo0aaItW7aoSZMmDtvOnjbz8/PTH/7wB2VkZGjs2LHKyMjQLbfcIh8fH2sfvXv3tgLpr0VERNSqnrpgZAcA4JGOHDminJwcPfLIIxo4cKA6deqkn376yaHNiBEjdOrUKWVmZiojI8Ma1ZGkDh066MyZM9q6dau1Ljc395x9nE+HDh20f/9+HTx40Fq3adOmC74nIiJCx48f14kTJ6x127Ztc2jTvXt3rV69utr3d+rUSWfOnNHGjRutdWd/Dp07d7bWxcXF6Y477tDixYt133336Z///KckqVevXtq1a5datWqltm3bOixNmzat0XGfddVVV6myslKHDh06Z1+/Hu1KSUlRZmamdu3apTVr1jj0Qa9evbRnzx5FRkaes4+QkJBa1VMXhB0AgEdq3ry5WrRooddff125ublas2aN0tLSHNo0bdpUI0eO1F//+lft3r1b48aNs7Z17NhRSUlJmjp1qr7++mtt3bpVU6dOVUBAQI2eE3bjjTeqTZs2mjBhgnbs2KEvvvhCjzzyiKTzP2csISFBgYGBevjhh5WXl6eMjAwtWLDAoc3s2bP1zjvvaPbs2dq9e7c1yViS2rVrpxEjRmjKlCnasGGDtm/frttuu02XX365RowYIUmaPn26Pv30U+Xn5+ubb77R2rVr1alTJ0m/TF4+evSoxo0bp02bNikvL0+ffvqpJk2apMrKypr94P+/9u3bKyUlRePHj9fixYuVn5+vr7/+Wunp6Vqx4v8uROnfv7+io6OVkpKi+Ph4h1NwKSkpCg8P14gRI/T5558rPz9fWVlZuvvuu3XgwIFa1VMnBqakpMRIMiUlJe4upVFq+eDycxYAnuPnn3823333nfn555/dXUqtrVy50nTq1Mn4+/ub7t27m6ysLCPJLFmyxGrz8ccfG0mmf//+57y/sLDQDBkyxPj7+5uWLVuajIwMExkZaebNm1ejz9+9e7fp27ev8fPzMx07djTLli0zkkxmZqYxxpj8/HwjyWzdutV6z5IlS0zbtm1NQECAuemmm8zrr79ufvt1+8EHH5iePXsaPz8/Ex4ebkaPHm1tO3r0qLn99ttNSEiICQgIMMnJyeaHH36wtk+bNs20adPG+Pv7m4iICHP77bebH3/80dr+ww8/mFGjRpnQ0FATEBBgOnbsaKZPn26qqqouery///3vzT333GO9Pn36tJk1a5Zp1aqV8fX1NTExMWbUqFFmx44dDu974IEHjCQza9asc/ZZVFRkxo8fb8LDw42/v79p3bq1mTJlivWdO2HCBDNixIjz1nShf781/f72MuY3JxcvQaWlpQoJCVFJSYmCg4PdXU6jU92l5na8LTzQWJ06dUr5+fmKj4/XZZdd5u5y3OrAgQOKi4vTqlWrzjtJ+EK++OIL9evXT7m5uWrTpk09VIjfutC/35p+fzNBGQBgW2vWrFFZWZm6deumoqIiPfDAA2rVqpX69+9fo/cvWbJEzZo1U7t27ZSbm6t77rlHffv2Jeg0MszZAQDYVkVFhR5++GF16dJFo0a
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"ddata.plot.hist(column=[\"avg_glucose_level\"], bins=80)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Визуализация - точечная диаграмма"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 10,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Axes: xlabel='age', ylabel='work_type'>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 10,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjIAAAGxCAYAAAB4AFyyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADHaklEQVR4nOy9e3wU9b3//9okC2RDstlsQiAh5AIJdzRRLgmXimKotQWkp6dSTxXqsb9TsFiFttLaqv22R+sRbaVoT3sU6mmLbU9RsRcLAkIlXNQEBcEkkISES0iyuZEsIQmZ3x9hltnZmfl8PnPZWz7Px8NHS3bmc5/Zz877Na+3TRAEARwOh8PhcDgRSEyoG8DhcDgcDoejF76R4XA4HA6HE7HwjQyHw+FwOJyIhW9kOBwOh8PhRCx8I8PhcDgcDidi4RsZDofD4XA4EQvfyHA4HA6Hw4lY+EaGw+FwOBxOxBIX6gZYzcDAAM6fP4/ExETYbLZQN4fD4XA4HA4FgiDg0qVLyMjIQEyM+nOXqN/InD9/HllZWaFuBofD4XA4HB00NDRg7Nixqp9H/UYmMTERwOBAJCUlhbg1HA6Hw+FwaOjs7ERWVpbve1yNqN/IiOGkpKQkvpHhcDgcDifCIMlCuNiXw+FwOBxOxMI3MhwOh8PhcCIWvpHhcDgcDocTsfCNDIfD4XA4nIiFb2Q4HA6Hw+FELHwjw+FwOBwOJ2LhGxkOh8PhcDgRC9/IcDgcDofDiVj4RobD4XA4HE7EwjcyHA6Hw+FwIpaoT1EQLGqau3Cm1YtYmw3n2r0AbMhMHoGrApDjTkBuaoLsOAR8Zkb9TR09aLzUg6JxLszPT1M99nBtK2wAZue5kZuagH2VTTh6th1jnCOQljgCOe4ECILgd1y9pxt7K5sRAyB/dCLm5LkhCALOtHp9/RDbIZ4v/YylL/L2Sfsolie2WdpX+TGDZXkwOB/xuCoImu1ROl/eB5pjogXp+M2RzEWo2xSt420WWmNkdPxIZSutF9/9JWkErgqC3+e07aE5Tl6/eA8y+35Lg9nrlK97dfhGxiDt3l6s3XYU+6ubNY8rznPDZgPKTnsCPluQn4ZNKwrhdNhNrd/lsGPHmnnIcjt8x37jt+U4WOPfhrgYG/oHBOa6lepr8/YpfkbTR7X2zcpJQWyMze/v8jYnx9uRP2ok3j/T5vubM96Ojst07VEaR3l/Ssa7IQjwa4f8GCNzGU60e3ux+nflAeu1OM+NX/7bTSHpn9IcRct4m4XWGAkQDI0fqWyl9XLjWCfqPF60q1yH8mtUqT008662XpWwes2YvU75uidjEwTB+DdYGNPZ2Qmn04mOjg5Lkkbe+/IRHDjVcu1Xhj5ibTbMnZCKV++fZXr9LocdFT8s9R1L2nBZBU0fg9k+eXvMmEelciMVrblYkJ8Wkv4pzVG0jLdZaI0RAEPjRyrbjGtXqT00885y77B6zZi9Tofyuqf9/uYaGQPUNHdhf3Wz4S+/q4KA/dXNqG3pNr3+Nm8f/lnd7Ds2VJD6GOz2Sdtj1jzKy41USHMRiv6pzVE0jLdZkMbIyPjRlG0G8vbQzDvrvcPKNWP2OuXrng4eWjLAmVavqeXVebqZYr9vfXyeqtzy+jZTQkdm8GpZHRZOSsO59suQxskP17aGpD11HmtuBKS5VNIR6dUUaZWvpxyada3VPysgtanO031N06Wu52EdF73jSKtPMWu+RYzcj0jzafa9joQ4n6R7nJHr93CNx3T9EGmcDtW0oM7TTa0HutjRo1lesK/DcIVvZAyQneIwtbwX95xCUZYrID5MG/tV451PLuKWglFmNNEwW8rqsKWszu9vSSPi0NnTH5L2iF8oVpQrh1ZPBeiLgZsVS6dZ10r9sxJSm154pxoVDe1+fxP1PKzaEL3jyKpPkWKG5sHI/Yg0n2bf60i8uOeUn95NDSPX76Pbj+FvxxpN0Q+JkMZpw/bjmuWz3COA4F+H4QoPLRkgL20kFqi8GaSH8vp2fHNbhd/f1m47amgTAwAnLlzCxp1VcIWpMCwUm5hYmw0L8tOQm5rgm8dYm82Usl0Ou+KvpLXbjuLAqRaqMg6caglYCySUytdTDmldi+MWTNTmKNZmG9SByTYxwKAo+5vbKpjHRe84ap1Hmns98yRHa4wW5KdpfkaaT7OvES3iYmwor2/XPEbp+tWD1vwYuXZoxkmpfNp7BO28DRX4RsYg60oLTCtLLT5sVrlqbxRFK8549Y3b3Amp2LSi0PfvTSsKfaJFEb0bvzZvX0DsmlWHwxoDNzuWvmlFIUrGuwP+Xpzn9hu3YKI0R4XjkjXXNas2RO846tGnsJRPi9IYiWtd6zO9ZVtB/4BAvE6Url+l9UrCDP2QHNpxotUDKcEyb0OBkIaWrl69iieeeAK//e1v0djYiIyMDKxcuRKPPfYYbNd2tIIg4PHHH8evf/1rtLe3Y+7cuXjppZeQn58fyqb7aPX2ml6mGEc9frbd9LKjnaIsJ1JGDsfiqaPxpZuzsL+qCXs+bUbqyGG4ISsZ/QPKPjKe7itYNS8HDyzI9Ttm25F6bNh+jLkdb310DgLg87jRqzGgjYHTaEhYfr05HXb8/oE5qG3pxuEaDwRA1UcmWF4zTocdr94/C7Ut3T6dQZ2nG6u2vK+7TPm4sI4jrZZBb3tYUVvHIvLxU5tPJZ2IdPx3fHQOz++q1t1OvawsycHEax5W0pCM2noFBsf0+LkObNxZpatONV0LzTjVebrR2NGjeQ8R55y09laWZGPS6CQ/by0SQ8V7JqQbmZ/+9Kd46aWX8Jvf/AZTp07FBx98gFWrVsHpdGLt2rUAgGeeeQYvvPACfvOb3yA3Nxc/+MEPsHjxYpw4cQIjRowIZfMBACkWhGukcVQOG+UNHQCAd0424dE/f4yrkh83rB4V7d5ebC8/q6sdz0lu8i6HHb/Q+euJNgZOis3rjaXnpqrfAEPlNSNtk1F9k3xcSOMo6thImhez2kOL1jqWozantNqg3NQEfGFGRkg2Mlsl+jq1tsn7lpuaAFe8XfdGRq5r+fGyqXjsjU+oxkk0/NNCnHPS2ttadka1LjlDzXsmpKGlsrIyLF26FHfeeSdycnLwL//yLygtLcWRI0cADN6kfvazn+Gxxx7D0qVLMWPGDLz66qs4f/483njjjVA23cfGncG/mIcKRjU9V2XfcbQxaWncvPxMu6E2AIOhpge3VTBrDFhi4CR9hBW/xtT0W6I2JRjQ6HlYxoWkcRB1bCx6J5fDTpx7o/NkhsaDpQwrNTNxMTbdGhM1njNp03XgVAuWbj7ANNa01ybtmBrVa0UjId3IlJSUYPfu3aiqGtwpf/TRR3jvvfdwxx13AABqa2vR2NiIRYsW+c5xOp2YPXs2Dh48qFjmlStX0NnZ6fefVYTamyXaMVvTw+pRYZa3DDDYl6/MymLSGKxfzKa/MqqBYCGcvGZIeh7Wcdm0ohBF2cmKn+lZG23ePqwvLdCceyPzZIY+Sk8ZWuNkhP4BAUXjyOWyeuCYwVVBQJu3j3msadcgjb7GqF4rGr1nQhpaevTRR9HZ2YlJkyYhNjYWV69exU9+8hPcc889AIDGxkYAQHp6ut956enpvs/kPPXUU3jyySetbfg1gu2tEE1kuUagoc0cXQErtDFpWtaXFmBqphP7Kpv9Hn3LqWy6xKQxOHG+EyOHxynmilGKfStpSKyKi4eT1wyNnkc6LuJYtnp7FR+zOx12LC/KxPt15Nd/afF4ewPmBoAp80TrsSP3r4m12Xy5x1i0QdIcZ6sXTmDWKK0rLYCn64ovVKLE6lsn+DRQFzt68CiFxgRQzoNG67dlBvKxZr02WXRIateX2Xq5SCCkG5k//vGP+N3vfoff//73mDp1Ko4ePYpvfetbyMjIwH333aerzA0bNuCRRx7x/buzsxNZWVlmNdmPYHsrRBOh2sQAwIt7B3UOZs3fszursCA/DUtuGKN5XNE4FwB6jYHazZuU30lL12IW4eg1Q+q3y2HH42/WEXP2WKl9kbfRjHmi0fWQPFlm5rg0P39oWwV+9dWb8I3flfutvaQR7F8hNFo
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoQAAAGwCAYAAAAwtHRpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABJhUlEQVR4nO3dd3wUdf7H8fem90ISAtEAwdBbKIIQBFQQEJHiAYecwFlOT6qcCqg0lar81NOzwUk5g2IDRe7gMEIQRKkBJTFAAhKklyQQIIRkfn94rCzZJZuQsAnzej4e+3gk3/3Odz4zE3bfzOx812IYhiEAAACYlpurCwAAAIBrEQgBAABMjkAIAABgcgRCAAAAkyMQAgAAmByBEAAAwOQIhAAAACbn4eoCUDkUFhbq4MGDCgwMlMVicXU5AADACYZh6PTp04qKipKbm+PzgARCOOXgwYOKjo52dRkAAKAUMjMzdfPNNzt8nkAIpwQGBkr67Q8qKCjIxdUAAABn5OTkKDo62vo+7giBEE65dJk4KCiIQAgAQCVT3Me9uKkEAADA5AiEAAAAJkcgBAAAMDkCIQAAgMkRCAEAAEyOQAgAAGByBEIAAACTIxACAACYHIEQAADA5AiEFVynTp00evToMh1z6NCh6t27d5mOCQAAKi+XBsKhQ4fKYrFoxowZNu1Lly4t9itWgBvd4o37NXrxNn2yObPY9qS0o3o9cZe+3X3Mpu+bibs18L0Nemv1HmvbC1/uVLfXkvTSVynFjptx7IxWpx3V3uO5Zblp5TYuAKB0XP5dxj4+Ppo5c6Yee+wxhYaGurocq/z8fHl6erps/RcuXJCXl5fL1g/X+fFAlvq89Z0uFhqSpKXbDmr85z9q9h+a6G+f/mjTPu6zHfL38lBO3kXr8qF+nprUo6FGf7Ld2rYh46RmrUyzWc/Ph89o7rq9mtCjnqb/Z1eR9TWJCtK2A9nW/h3qROiNgc0V7Ff6fxdZZy9o5IfJWntZcC2LcQEA18bll4w7d+6satWqafr06Q77rFu3Trfffrt8fX0VHR2tkSNHKjf3tzMLzz77rNq0aVNkmWbNmumFF16w/j537lw1aNBAPj4+ql+/vt566y3rc/v27ZPFYtHixYvVsWNH+fj4KCEhwWE9hmEoIiJCn376qbUtLi5O1atXt6nZ29tbZ8+elSTt379fvXr1UkBAgIKCgtS/f38dOXLE2n/y5MmKi4vT3LlzFRMTIx8fH7vrXr58uYKDg631ZWZmqn///goJCVGVKlXUq1cv7du3z9q/oKBAY8aMUUhIiMLCwvTMM8/IMAyH2wbXuzwMXnKx0NCoj3cUaS8wZBMGJenU2XybMFicF5en2V3f5WFQktbvOa4RH25zelx7Rn6YrPV7jpf5uACAa+PyQOju7q5p06bpjTfe0IEDB4o8n56erm7duun+++/Xjh07tHjxYq1bt07Dhw+XJA0aNEgbN25Uenq6dZmdO3dqx44deuCBByRJCQkJmjhxoqZOnarU1FRNmzZNEyZM0IIFC2zWNW7cOI0aNUqpqanq2rWrw5otFos6dOigNWvWSJJOnTql1NRUnTt3Tj///LMkKSkpSbfeeqv8/PxUWFioXr166eTJk0pKStKqVauUkZGhAQMG2Iy7Z88effbZZ/r888+VnJxcZL2LFi3SwIEDlZCQoEGDBik/P19du3ZVYGCgvv32W61fv14BAQHq1q2bLly4IEmaPXu25s+fr/fff1/r1q3TyZMntWTJkmKOipSXl6ecnBybB8rf4o37i4SziqLAMLR297FSX+bNOHZGa3cfU4FxZai9tnEBANfO5YFQkvr06aO4uDhNmjSpyHPTp0/XoEGDNHr0aNWpU0ft2rXT3//+dy1cuFDnz59Xo0aN1KxZMy1atMi6TEJCgtq0aaPY2FhJ0qRJkzR79mz17dtXMTEx6tu3r5588km9++67NusaPXq0tc/lZ/vs6dSpkzUQrl27Vs2bN7dpW7NmjTp27ChJSkxM1I8//qhFixapZcuWatOmjRYuXKikpCRt2rTJOuaFCxe0cOFCNW/eXE2bNrVZ3z/+8Q898cQTWrZsme69915J0uLFi1VYWKi5c+eqSZMmatCggebNm6f9+/db63jttdc0fvx49e3bVw0aNNA777yj4ODgYo7Ib/s9ODjY+oiOji52GVy7DXtPuLqEYu07Ubrg9svJs+UyLgDg2lWIQChJM2fO1IIFC5SammrTvn37ds2fP18BAQHWR9euXVVYWKi9e/dK+u0s4aVAaBiGPvzwQw0aNEiSlJubq/T0dD388MM2Y7z00ks2ZxUlqVWrVk7X27FjR6WkpOjYsWNKSkpSp06drIEwPz9f3333nTp16iRJSk1NVXR0tE2oatiwoUJCQmy2t2bNmoqIiCiyrk8//VRPPvmkVq1aZQ2Zl/bNnj17FBgYaN2uKlWq6Pz580pPT1d2drYOHTpkc0ndw8PDqe0cP368srOzrY/MzMxil8G1axsT5uoSilUrzL9Uy9Ws4lcu4wIArp3Lbyq5pEOHDuratavGjx+voUOHWtvPnDmjxx57TCNHjiyyTI0aNSRJAwcO1NixY7V161adO3dOmZmZ1suxZ86ckSTNmTOnyGcN3d3dbX7393f+DalJkyaqUqWKkpKSlJSUpKlTp6patWqaOXOmNm3apPz8fLVr187p8a62/ubNm2vr1q16//331apVK+sd2GfOnFHLli3tft7RXrAsCW9vb3l7e1/TGCi5Aa1r6LmlP1XIy8buFoviY8MVE1664FY7IkAd6kRo/Z7jNpeNr3VcAMC1qzBnCCVpxowZWrZsmTZs2GBta9GihVJSUhQbG1vkceku3JtvvlkdO3ZUQkKCEhIS1KVLF1WtWlWSFBkZqaioKGVkZBRZPiYmptS1WiwW3X777friiy+0c+dOtW/fXk2bNlVeXp7effddtWrVyhrwGjRooMzMTJuzbCkpKcrKylLDhg2LXdctt9yi1atX64svvtCIESNs9s3u3btVtWrVItt26VJv9erV9cMPP1iXuXjxorZs2VLq7Ub5+3JYvDzcbKdd8nCz6O8D4oq0u1ukIG/b/9eF+nnq7wPinF7fxB4N7K6v+c22Hy2Ijw3XGwObOz2uPW8MbK742PAyHxcAcI0MFxoyZIjRq1cvm7YHH3zQ8PHxMS6Vtn37dsPX19cYNmyYsW3bNmPXrl3G0qVLjWHDhtksN2fOHCMqKsoIDw83/vWvfxV5ztfX13j99deNtLQ0Y8eOHcb7779vzJ492zAMw9i7d68hydi2bVuJ6n/ttdcMd3d3o02bNta2Xr16Ge7u7sa4ceOsbYWFhUZcXJxx++23G1u2bDF++OEHo2XLlkbHjh2tfSZNmmQ0a9asyDo6duxojBo1yjAMw/j555+NatWqWX/Pzc016tSpY3Tq1MlYu3atkZGRYaxevdoYMWKEkZmZaRiGYcyYMcOoUqWKsWTJEiM1NdV49NFHjcDAwCL7vTjZ2dmGJCM7O7tEy6H0Pt603xj10Vbj4037i21fu+uo8drXacbaXUdt+v7jm93GH9/9zvjHN7utbS8u22l0fXWN8eKyncWOm3HsjPHNz0eMjGNnynLTym1cAIAtZ9+/K1wg3Lt3r+Hl5WVcnlU3btxodOnSxQgICDD8/f2Npk2bGlOnTrVZ7tSpU4a3t7fh5+dnnD59usi6EhISjLi4OMPLy8sIDQ01OnToYHz++efWdZYmEG7bts2QZIwdO9ba9uqrrxqSjBUrVtj0/eWXX4z77rvP8Pf3NwIDA41+/foZhw8ftj7vTCA0DMNISUkxqlataowZM8YwDMM4dOiQMXjwYCM8PNzw9vY2ateubTz66KPWA5+fn2+MGjXKCAoKMkJCQowxY8YYgwcPJhACAGACzr5/WwyDSelQvJycHAUHBys7O1tBQUGuLgcAADjB2ffvCvUZQgAAAFx/BEIHunfvbjNNzeWPadOmubo8AACAMlNhpp2paObOnatz587Zfa5KlSrXuRoAAIDyQyB04KabbnJ1CQAAANcFl4wBAABMjkAIAABgcgRCAAA
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"ddata.plot.scatter(x=\"avg_glucose_level\", y=\"age\")\n",
|
|||
|
"\n",
|
|||
|
"ddata.plot.scatter(x=\"age\", y=\"work_type\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"столбчатая диаграмма"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.legend.Legend at 0x243abeb0140>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjAAAAIGCAYAAABK0rXfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABNLUlEQVR4nO3deVyU5f7/8feAgKACLshSiPu+4ZJRLrgccUlzKXMpN9Qy1BJT41RundTUSjst1im1Tm6nc9RKz8/jUu6UogG54RKJlWipMOICAvP7o69zmoOaC3DPzbyej8f9eMxc9zX3fIYpeHvd133dFpvNZhMAAICJuBldAAAAwO0iwAAAANMhwAAAANMhwAAAANMhwAAAANMhwAAAANMhwAAAANMhwAAAANMpZXQBRSU/P18///yzypUrJ4vFYnQ5AADgFthsNl24cEEhISFyc7vxOEuJDTA///yzQkNDjS4DAADcgZMnT+ree++94f4SG2DKlSsn6bcfgK+vr8HVAACAW2G1WhUaGmr/O34jJTbAXDtt5OvrS4ABAMBk/mj6B5N4AQCA6RBgAACA6RBgAACA6ZTYOTAAAPyv/Px85eTkGF2GS/Pw8JC7u/tdH4cAAwBwCTk5OUpNTVV+fr7Rpbg8f39/BQUF3dU6bQQYAECJZ7PZdOrUKbm7uys0NPSmC6Sh6NhsNl26dElnzpyRJAUHB9/xsQgwAIASLzc3V5cuXVJISIh8fHyMLseleXt7S5LOnDmjypUr3/HpJCIoAKDEy8vLkyR5enoaXAkk2UPk1atX7/gYBBgAgMvg3njOoTC+BwIMAAAwHQIMAAAu4IcffpDFYlFiYqLRpRSK2w4w27ZtU48ePRQSEiKLxaI1a9Y47LdYLNfd5s6da+9TtWrVAvtnz57tcJzk5GS1adNGpUuXVmhoqObMmXNnnxAAgBvZmlC8220aOnSoLBaLnnrqqQL7YmJiZLFYNHTo0EL4QZjPbQeYixcvqkmTJnr77bevu//UqVMO26JFi2SxWNS3b1+HfjNmzHDoN3bsWPs+q9Wqzp07KywsTHv37tXcuXM1bdo0vf/++7dbLgAAphYaGqoVK1bo8uXL9rYrV65o2bJlqlKlioGVGeu2A0zXrl31l7/8Rb17977u/qCgIIfts88+U/v27VW9enWHfuXKlXPoV6ZMGfu+pUuXKicnR4sWLVKDBg3Uv39/jRs3Tq+//vrtlgsAgKk1a9ZMoaGhWrVqlb1t1apVqlKlisLDw+1t69evV+vWreXv76+KFSvqoYce0vHjx2967P3796tr164qW7asAgMD9cQTT+jXX38tss9SmIp0Dszp06e1bt06RUdHF9g3e/ZsVaxYUeHh4Zo7d65yc3Pt++Lj49W2bVuHy92ioqKUkpKi8+fPX/e9srOzZbVaHTYABjFgqB0oyYYPH67Fixfbny9atEjDhg1z6HPx4kXFxsYqISFBmzdvlpubm3r37n3DlYczMjLUoUMHhYeHKyEhQevXr9fp06fVr1+/Iv0shaVIF7L76KOPVK5cOfXp08ehfdy4cWrWrJkqVKigXbt2KS4uTqdOnbKPsKSnp6tatWoOrwkMDLTvK1++fIH3mjVrlqZPn15EnwQAAOM8/vjjiouL04kTJyRJO3fu1IoVK7RlyxZ7n/+dqrFo0SIFBATo4MGDatiwYYFjvvXWWwoPD9fMmTMdXhMaGqojR46odu3aRfNhCkmRBphFixZp0KBBKl26tEN7bGys/XHjxo3l6empJ598UrNmzZKXl9cdvVdcXJzDca1Wq0JDQ++scAAAnEhAQIC6d++uJUuWyGazqXv37qpUqZJDn6NHj2rKlCn65ptv9Ouvv9pHXtLS0q4bYJKSkvTVV1+pbNmyBfYdP37cdQPM9u3blZKSopUrV/5h31atWik3N1c//PCD6tSpo6CgIJ0+fdqhz7XnQUFB1z2Gl5fXHYcfAACc3fDhwzVmzBhJuu6FND169FBYWJj+9re/KSQkRPn5+WrYsOEN776dlZWlHj166NVXXy2w727uUVRciizAfPjhh2revLmaNGnyh30TExPl5uamypUrS5IiIiL0wgsv6OrVq/Lw8JAkbdy4UXXq1Lnu6SMAAEq6Ll26KCcnRxaLRVFRUQ77zp49q5SUFP3tb39TmzZtJEk7duy46fGaNWumf/3rX6patapKlTLfrRFvexJvVlaWEhMT7QvhpKamKjExUWlpafY+VqtVn376qUaMGFHg9fHx8Zo/f76SkpL0/fffa+nSpRo/frwef/xxezgZOHCgPD09FR0drQMHDmjlypVasGCBwykiAABcibu7uw4dOqSDBw8WuAFi+fLlVbFiRb3//vs6duyYvvzyyz/8mxkTE6Nz585pwIAB2rNnj44fP67//Oc/GjZsmP3eUc7stiNXQkKC2rdvb39+7Qc0ZMgQLVmyRJK0YsUK2Ww2DRgwoMDrvby8tGLFCk2bNk3Z2dmqVq2axo8f7/CD9vPz04YNGxQTE6PmzZurUqVKmjJlikaNGnW75QIAcGPtWhhdwW3x9fW9brubm5tWrFihcePGqWHDhqpTp47efPNNRUZG3vBYISEh2rlzpyZPnqzOnTsrOztbYWFh6tKli9zcnH+hfovNZrMZXURRsFqt8vPzU2Zm5g2/cABFpDAugzbZHxY4tytXrig1NVXVqlUrcGEJit/Nvo9b/fvt/BELAADgfxBgAACA6RBgAACA6RBgAACA6RBgAACA6RBgAACA6RBgAACA6RBgAACA6RBgAADALatatarmz59vdBkEGAAAnNXQoUNlsVgKbMeOHTO6NMOZ7/aTAAAUEst0S7G+n23q7d+9p0uXLlq8eLFDW0BAQGGVZFqMwAAA4MS8vLwUFBTksLm7u+uzzz5Ts2bNVLp0aVWvXl3Tp09Xbm6u/XUWi0XvvfeeHnroIfn4+KhevXqKj4/XsWPHFBkZqTJlyuiBBx7Q8ePH7a85fvy4Hn74YQUGBqps2bJq2bKlNm3adNP6MjIyNGLECAUEBMjX11cdOnRQUlJSkf08riHAAABgMtu3b9fgwYP1zDPP6ODBg3rvvfe0ZMkSvfLKKw79Xn75ZQ0ePFiJiYmqW7euBg4cqCeffFJxcXFKSEiQzWbTmDFj7P2zsrLUrVs3bd68Wd9++626dOmiHj16KC0t7Ya1PProozpz5oz+3//7f9q7d6+aNWumjh076ty5c0X2+SVOIQEA4NTWrl2rsmXL2p937dpV58+f1/PPP68hQ4ZIkqpXr66XX35ZkyZN0tSpU+19hw0bpn79+kmSJk+erIiICL300kuKioqSJD3zzDMaNmyYvX+TJk3UpEkT+/OXX35Zq1ev1ueff+4QdK7ZsWOHdu/erTNnzsjLy0uSNG/ePK1Zs0b//Oc/NWrUqEL8STgiwAAA4MTat2+vd9991/68TJkyaty4sXbu3Okw4pKXl6crV67o0qVL8vHxkSQ1btzYvj8wMFCS1KhRI4e2K1euyGq1ytfXV1lZWZo2bZrWrVunU6dOKTc3V5cvX77hCExSUpKysrJUsWJFh/bLly87nJoqCgQYAACcWJkyZVSzZk2HtqysLE2fPl19+vQp0L906dL2xx4eHvbHFovlhm35+fmSpOeee04bN27UvHnzVLNmTXl7e+uRRx5RTk7OdWvLyspScHCwtmzZUmCfv7//rX3AO0SAAQDAZJo1a6aUlJQCweZu7dy5U0OHDlXv3r0l/RZQfvjhh5vWkZ6erlKlSqlq1aqFWssfIcAAAGAyU6ZM0UMPPaQqVarokUcekZubm5KSkrR//3795S9/uePj1qpVS6tWrVKPHj1ksVj00ksv2UdnrqdTp06KiIhQr169NGfOHNWuXVs///yz1q1bp969e6tFixZ3XMsf4SokAABMJioqSmvXrtWGDRvUsmVL3X///XrjjTcUFhZ2V8d9/fXXVb58eT3wwAPq0aOHoqKi1KxZsxv2t1gs+ve//622bdtq2LBhql27tvr3768TJ07Y59wUFYvNZrv9VXVMwGq1ys/PT5mZmfL19TW6HMC1bE24+2O0K7p
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"plot = dd.groupby([\"work_type\", \"gender\"]).size().unstack().plot.bar(color=[\"pink\", \"green\"])\n",
|
|||
|
"plot.legend([\"Male\", \"Female\"])"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": ".venv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.5"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|