738 lines
132 KiB
Plaintext
738 lines
132 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Работа с NumPy"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 2,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"matrix = \n",
|
|||
|
" [[4 5 0]\n",
|
|||
|
" [9 9 9]] \n",
|
|||
|
"\n",
|
|||
|
"tmatrix = \n",
|
|||
|
" [[4 9]\n",
|
|||
|
" [5 9]\n",
|
|||
|
" [0 9]] \n",
|
|||
|
"\n",
|
|||
|
"vector = \n",
|
|||
|
" [4 5 0 9 9 9] \n",
|
|||
|
"\n",
|
|||
|
"tvector = \n",
|
|||
|
" [[4]\n",
|
|||
|
" [5]\n",
|
|||
|
" [0]\n",
|
|||
|
" [9]\n",
|
|||
|
" [9]\n",
|
|||
|
" [9]] \n",
|
|||
|
"\n",
|
|||
|
"list_matrix = \n",
|
|||
|
" [array([4, 5, 0]), array([9, 9, 9])] \n",
|
|||
|
"\n",
|
|||
|
"matrix as str = \n",
|
|||
|
" [[4 5 0]\n",
|
|||
|
" [9 9 9]] \n",
|
|||
|
"\n",
|
|||
|
"matrix type is <class 'numpy.ndarray'> \n",
|
|||
|
"\n",
|
|||
|
"vector type is <class 'numpy.ndarray'> \n",
|
|||
|
"\n",
|
|||
|
"list_matrix type is <class 'list'> \n",
|
|||
|
"\n",
|
|||
|
"str_matrix type is <class 'str'> \n",
|
|||
|
"\n",
|
|||
|
"formatted_vector = \n",
|
|||
|
" 4; 5; 0; 9; 9; 9 \n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import numpy as np\n",
|
|||
|
"\n",
|
|||
|
"matrix = np.array([[4, 5, 0], [9, 9, 9]])\n",
|
|||
|
"print(\"matrix = \\n\", matrix, \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"tmatrix = matrix.T\n",
|
|||
|
"print(\"tmatrix = \\n\", tmatrix, \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"vector = np.ravel(matrix)\n",
|
|||
|
"print(\"vector = \\n\", vector, \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"tvector = np.reshape(vector, (6, 1))\n",
|
|||
|
"print(\"tvector = \\n\", tvector, \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"list_matrix = list(matrix)\n",
|
|||
|
"print(\"list_matrix = \\n\", list_matrix, \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"str_matrix = str(matrix)\n",
|
|||
|
"print(\"matrix as str = \\n\", str_matrix, \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"print(\"matrix type is\", type(matrix), \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"print(\"vector type is\", type(vector), \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"print(\"list_matrix type is\", type(list_matrix), \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"print(\"str_matrix type is\", type(str_matrix), \"\\n\")\n",
|
|||
|
"\n",
|
|||
|
"formatted_vector = \"; \".join(map(str, vector))\n",
|
|||
|
"print(\"formatted_vector = \\n\", formatted_vector, \"\\n\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Работа с Pandas DataFrame\n",
|
|||
|
"\n",
|
|||
|
"https://pandas.pydata.org/docs/user_guide/10min.html"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Работа с данными - чтение и запись CSV"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 3,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"\n",
|
|||
|
"df = pd.read_csv(\"data/world-population-by-country-2020.csv\", index_col=\"no\")\n",
|
|||
|
"\n",
|
|||
|
"df.to_csv(\"test.csv\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Работа с данными - основные команды"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 4,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" Country (or dependency) Population 2020 Yearly Change Net Change\n",
|
|||
|
"no \n",
|
|||
|
"1 China 1439323776 0.39 5540090\n",
|
|||
|
"2 India 1380004385 0.99 13586631\n",
|
|||
|
"3 United States 331002651 0.59 1937734\n",
|
|||
|
"4 Indonesia 273523615 1.07 2898047\n",
|
|||
|
"5 Pakistan 220892340 2.00 4327022\n",
|
|||
|
" Country (or dependency) Population 2020 Yearly Change Net Change\n",
|
|||
|
"no \n",
|
|||
|
"231 Montserrat 4992 0.06 3\n",
|
|||
|
"232 Falkland Islands 3480 3.05 103\n",
|
|||
|
"233 Niue 1626 0.68 11\n",
|
|||
|
"234 Tokelau 1357 1.27 17\n",
|
|||
|
"235 Holy See 801 0.25 2\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# df.info()\n",
|
|||
|
"\n",
|
|||
|
"# print(df.describe().transpose())\n",
|
|||
|
"\n",
|
|||
|
"from click import clear\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"cleared_df = df.drop(\n",
|
|||
|
" df.columns.difference([\n",
|
|||
|
" \"Country (or dependency)\", \"Population 2020\", \"Yearly Change\", \"Net Change\"\n",
|
|||
|
" ]\n",
|
|||
|
" ),\n",
|
|||
|
" axis=1,\n",
|
|||
|
")\n",
|
|||
|
"# print(cleared_df.head())\n",
|
|||
|
"# print(cleared_df.tail())\n",
|
|||
|
"cleared_df['Population 2020'] = cleared_df['Population 2020'].apply(\n",
|
|||
|
" lambda x: int(\"\".join(x.split(\",\")))\n",
|
|||
|
")\n",
|
|||
|
"cleared_df[\"Net Change\"] = cleared_df[\"Net Change\"].apply(\n",
|
|||
|
" lambda x: int(\"\".join(x.split(\",\")))\n",
|
|||
|
")\n",
|
|||
|
"cleared_df[\"Yearly Change\"] = cleared_df[\"Yearly Change\"].apply(\n",
|
|||
|
" lambda x: float(\"\".join(x.rstrip('%')))\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"sorted_df = cleared_df.sort_values(\n",
|
|||
|
" [\"Population 2020\", \"Net Change\", \"Country (or dependency)\"], ascending=[False, False, True]\n",
|
|||
|
")\n",
|
|||
|
"print(sorted_df.head())\n",
|
|||
|
"print(sorted_df.tail())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Работа с данными - работа с элементами"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 5,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"no\n",
|
|||
|
"1 China\n",
|
|||
|
"2 India\n",
|
|||
|
"3 United States\n",
|
|||
|
"4 Indonesia\n",
|
|||
|
"5 Pakistan\n",
|
|||
|
" ... \n",
|
|||
|
"231 Montserrat\n",
|
|||
|
"232 Falkland Islands\n",
|
|||
|
"233 Niue\n",
|
|||
|
"234 Tokelau\n",
|
|||
|
"235 Holy See\n",
|
|||
|
"Name: Country (or dependency), Length: 235, dtype: object\n",
|
|||
|
"Country (or dependency) Israel\n",
|
|||
|
"Population 2020 8,655,535\n",
|
|||
|
"Yearly Change 1.60%\n",
|
|||
|
"Net Change 136,158\n",
|
|||
|
"Density (P/Km²) 400\n",
|
|||
|
"Land Area (Km²) 21,640\n",
|
|||
|
"Migrants (net) 10,000\n",
|
|||
|
"Fert. Rate 3\n",
|
|||
|
"Med. Age 30\n",
|
|||
|
"Urban Pop % 93%\n",
|
|||
|
"World Share 0.11%\n",
|
|||
|
"Name: 100, dtype: object\n",
|
|||
|
"Israel\n",
|
|||
|
" Country (or dependency) Population 2020\n",
|
|||
|
"no \n",
|
|||
|
"100 Israel 8,655,535\n",
|
|||
|
"101 Switzerland 8,654,622\n",
|
|||
|
"102 Togo 8,278,724\n",
|
|||
|
"103 Sierra Leone 7,976,983\n",
|
|||
|
"104 Hong Kong 7,496,981\n",
|
|||
|
".. ... ...\n",
|
|||
|
"196 St. Vincent & Grenadines 110,940\n",
|
|||
|
"197 Aruba 106,766\n",
|
|||
|
"198 Tonga 105,695\n",
|
|||
|
"199 U.S. Virgin Islands 104,425\n",
|
|||
|
"200 Seychelles 98,347\n",
|
|||
|
"\n",
|
|||
|
"[101 rows x 2 columns]\n",
|
|||
|
" Country (or dependency) Population 2020 Yearly Change Net Change \\\n",
|
|||
|
"no \n",
|
|||
|
"1 China 1,439,323,776 0.39% 5,540,090 \n",
|
|||
|
"2 India 1,380,004,385 0.99% 13,586,631 \n",
|
|||
|
"3 United States 331,002,651 0.59% 1,937,734 \n",
|
|||
|
"\n",
|
|||
|
" Density (P/Km²) Land Area (Km²) Migrants (net) Fert. Rate Med. Age \\\n",
|
|||
|
"no \n",
|
|||
|
"1 153 9,388,211 -348,399 1.7 38 \n",
|
|||
|
"2 464 2,973,190 -532,687 2.2 28 \n",
|
|||
|
"3 36 9,147,420 954,806 1.8 38 \n",
|
|||
|
"\n",
|
|||
|
" Urban Pop % World Share \n",
|
|||
|
"no \n",
|
|||
|
"1 61% 18.47% \n",
|
|||
|
"2 35% 17.70% \n",
|
|||
|
"3 83% 4.25% \n",
|
|||
|
"Country (or dependency) China\n",
|
|||
|
"Population 2020 1,439,323,776\n",
|
|||
|
"Yearly Change 0.39%\n",
|
|||
|
"Net Change 5,540,090\n",
|
|||
|
"Density (P/Km²) 153\n",
|
|||
|
"Land Area (Km²) 9,388,211\n",
|
|||
|
"Migrants (net) -348,399\n",
|
|||
|
"Fert. Rate 1.7\n",
|
|||
|
"Med. Age 38\n",
|
|||
|
"Urban Pop % 61%\n",
|
|||
|
"World Share 18.47%\n",
|
|||
|
"Name: 1, dtype: object\n",
|
|||
|
" Country (or dependency) Population 2020\n",
|
|||
|
"no \n",
|
|||
|
"3 United States 331,002,651\n",
|
|||
|
"4 Indonesia 273,523,615\n",
|
|||
|
"5 Pakistan 220,892,340\n",
|
|||
|
" Country (or dependency) Yearly Change\n",
|
|||
|
"no \n",
|
|||
|
"4 Indonesia 1.07%\n",
|
|||
|
"5 Pakistan 2.00%\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(df[\"Country (or dependency)\"])\n",
|
|||
|
"\n",
|
|||
|
"print(df.loc[100])\n",
|
|||
|
"\n",
|
|||
|
"print(df.loc[100, \"Country (or dependency)\"])\n",
|
|||
|
"\n",
|
|||
|
"print(df.loc[100:200, [\"Country (or dependency)\", \"Population 2020\"]])\n",
|
|||
|
"\n",
|
|||
|
"print(df[0:3])\n",
|
|||
|
"\n",
|
|||
|
"print(df.iloc[0])\n",
|
|||
|
"\n",
|
|||
|
"print(df.iloc[2:5, 0:2])\n",
|
|||
|
"\n",
|
|||
|
"print(df.iloc[[3, 4], [0, 2]])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Работа с данными - отбор и группировка"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 6,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" Population 2020 Yearly Change Net Change \\\n",
|
|||
|
"Country (or dependency) \n",
|
|||
|
"China 1439323776 0.39 5540090 \n",
|
|||
|
"India 1380004385 0.99 13586631 \n",
|
|||
|
"United States 331002651 0.59 1937734 \n",
|
|||
|
"Indonesia 273523615 1.07 2898047 \n",
|
|||
|
"Pakistan 220892340 2.00 4327022 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"Montserrat 4992 0.06 3 \n",
|
|||
|
"Falkland Islands 3480 3.05 103 \n",
|
|||
|
"Niue 1626 0.68 11 \n",
|
|||
|
"Tokelau 1357 1.27 17 \n",
|
|||
|
"Holy See 801 0.25 2 \n",
|
|||
|
"\n",
|
|||
|
" Capital Continent \n",
|
|||
|
"Country (or dependency) \n",
|
|||
|
"China Beijing Asia \n",
|
|||
|
"India New Delhi Asia \n",
|
|||
|
"United States Washington, D.C. North America \n",
|
|||
|
"Indonesia Jakarta Asia \n",
|
|||
|
"Pakistan Islamabad Asia \n",
|
|||
|
"... ... ... \n",
|
|||
|
"Montserrat Brades North America \n",
|
|||
|
"Falkland Islands Stanley South America \n",
|
|||
|
"Niue Alofi Oceania \n",
|
|||
|
"Tokelau Nukunonu Oceania \n",
|
|||
|
"Holy See NaN NaN \n",
|
|||
|
"\n",
|
|||
|
"[235 rows x 5 columns]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# s_values = df[\"Sex\"].unique()\n",
|
|||
|
"# print(s_values)\n",
|
|||
|
"df2 = pd.read_csv(\n",
|
|||
|
" \"data/countries-continents-capitals.csv\", index_col=\"Country/Territory\",\n",
|
|||
|
" encoding = \"ISO-8859-1\"\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# for s_value in s_values:\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# count = df[df[\"Sex\"] == s_value].shape[0]\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# s_total += count\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# print(s_value, \"count =\", count)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# print(\"Total count = \", s_total)\n",
|
|||
|
"\n",
|
|||
|
"extended_df = cleared_df.set_index(\"Country (or dependency)\").join(\n",
|
|||
|
" df2\n",
|
|||
|
")\n",
|
|||
|
"print(extended_df)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# print(extended_df.groupby([\"Continent\"]).agg({\"population\" : [\"sum\"]}))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Визуализация - Исходные данные"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 7,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" Population 2020 Yearly Change Continent\n",
|
|||
|
"Country (or dependency) \n",
|
|||
|
"China 1439323776 0.39 Asia\n",
|
|||
|
"India 1380004385 0.99 Asia\n",
|
|||
|
"United States 331002651 0.59 North America\n",
|
|||
|
"Indonesia 273523615 1.07 Asia\n",
|
|||
|
"Pakistan 220892340 2.00 Asia\n",
|
|||
|
"... ... ... ...\n",
|
|||
|
"Montserrat 4992 0.06 North America\n",
|
|||
|
"Falkland Islands 3480 3.05 South America\n",
|
|||
|
"Niue 1626 0.68 Oceania\n",
|
|||
|
"Tokelau 1357 1.27 Oceania\n",
|
|||
|
"Holy See 801 0.25 NaN\n",
|
|||
|
"\n",
|
|||
|
"[235 rows x 3 columns]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"data = extended_df[[\"Population 2020\", \"Yearly Change\", \"Continent\"]].copy()\n",
|
|||
|
"data.dropna(subset=[\"Population 2020\"], inplace=True)\n",
|
|||
|
"print(data)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Визуализация - Сводка пяти чисел\n",
|
|||
|
"\n",
|
|||
|
"<img src=\"assets/quantile.png\" width=\"400\" style=\"background-color: white\">"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" Population 2020 \\\n",
|
|||
|
" min q1 q2 median \n",
|
|||
|
"Continent \n",
|
|||
|
"Africa 98347 2509845.75 13042506.5 13042506.5 \n",
|
|||
|
"Asia 437479 5985985.50 18138682.5 18138682.5 \n",
|
|||
|
"Europe 33691 1326535.00 5459642.0 5459642.0 \n",
|
|||
|
"North America 4992 67288.00 395436.0 395436.0 \n",
|
|||
|
"Oceania 1357 27368.25 144112.0 144112.0 \n",
|
|||
|
"South America 3480 1458346.50 14658037.5 14658037.5 \n",
|
|||
|
"\n",
|
|||
|
" \n",
|
|||
|
" q3 max \n",
|
|||
|
"Continent \n",
|
|||
|
"Africa 31118563.75 206139589 \n",
|
|||
|
"Asia 52054338.75 1439323776 \n",
|
|||
|
"Europe 10423054.00 145934462 \n",
|
|||
|
"North America 6589966.75 331002651 \n",
|
|||
|
"Oceania 488471.75 25499884 \n",
|
|||
|
"South America 31837875.50 212559417 \n",
|
|||
|
" Population 2020 \n",
|
|||
|
" low_iqr iqr high_iqr\n",
|
|||
|
"Continent \n",
|
|||
|
"Africa 0 28608718.00 7.403164e+07\n",
|
|||
|
"Asia 0 46068353.25 1.211569e+08\n",
|
|||
|
"Europe 0 9096519.00 2.406783e+07\n",
|
|||
|
"North America 0 6522678.75 1.637398e+07\n",
|
|||
|
"Oceania 0 461103.50 1.180127e+06\n",
|
|||
|
"South America 0 30379529.00 7.740717e+07\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Axes: title={'center': 'Population 2020'}, xlabel='Continent'>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjwAAAHNCAYAAAAaKaG7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABrVklEQVR4nO3de1zO9/8/8MfVVV2dRHQmlUNIUTKJkZAYTcPmNGJjNmxIQzbHbc6nfT9OHxtyZk7ZMLQmchpGm7OisKkckw5yVa/fH37X++PS8aor1eVxv926bdfr/Xq/3q/363rX++n1er3fL5kQQoCIiIhIh+lVdAWIiIiIyhsDHiIiItJ5DHiIiIhI5zHgISIiIp3HgIeIiIh0HgMeIiIi0nkMeIiIiEjnMeAhIiIinceAh4iIiHQeAx6iciSTyTB9+vSKrobOi46OhkwmQ3R0dJH5pk+fDplMhgcPHryeilUyiYmJkMlkCA8Pr+iqEL12DHioSgoPD4dMJlP7sba2hp+fH3799deKrl6ZXb58GdOnT0diYmJFV4XKYPfu3ejWrRssLS1haGgIe3t7fPDBB/j999/L9bibN2/GkiVLyvUY2sDrnF4n/YquAFFZzJw5E87OzhBCICUlBeHh4XjnnXfwyy+/oEePHhVdvVK7fPkyZsyYgQ4dOsDJyamiq0MaEkLgo48+Qnh4ODw9PRESEgJbW1skJSVh9+7d6NSpE44fP442bdqUy/E3b96MixcvYuzYsWrpjo6OyMrKgoGBQbkcV1O8zul1YsBDVVq3bt3QsmVL6fPHH38MGxsbbNmypUoHPK9TTk4O8vLyYGhoWNFV0RkLFy5EeHg4xo4di0WLFkEmk0nbvvrqK2zYsAH6+q//z69MJoORkdFrPy5RZcAhLdIpNWrUgLGxcb6bSUZGBsaPHw8HBwcoFAo0atQICxYsgBACAJCVlYXGjRujcePGyMrKkvZ79OgR7Ozs0KZNG+Tm5gIAhgwZAjMzM9y8eRMBAQEwNTWFvb09Zs6cKZVXlPPnz6Nbt24wNzeHmZkZOnXqhFOnTknbw8PD8f777wMA/Pz8pCG74uanbN++Ha6urjAyMoKbmxt2796NIUOGqP3LWTWHY8GCBViyZAnq168PhUKBy5cvAwB+//13tGvXDqampqhRowZ69uyJK1euqB3n1TJVVPNjXiaTyTB69Ghs2rQJjRo1gpGREby8vHD06NF8+//777/46KOPYGNjA4VCgaZNm2LNmjX58v3zzz8ICgqCqakprK2tMW7cOGRnZxfZNq968OABPvjgA5ibm6NWrVoYM2YMnj17Jm339fVF8+bNC9y3UaNGCAgIKLTsrKwszJ49G40bN8aCBQvytQkADBo0CK1atZI+37x5E++//z5q1qwJExMTtG7dGvv27VPbRzVP6aeffsJ3332HOnXqwMjICJ06dUJ8fLyUr0OHDti3bx9u3bolXTuq76ugOTyq6/nff/9FUFAQzMzMYGVlhdDQUOmaV8nLy8OSJUvQtGlTGBkZwcbGBiNGjMDjx4/V8jk5OaFHjx44duwYWrVqBSMjI9SrVw/r16+X8pT2OicqNUFUBa1du1YAEL/99pu4f/++uHfvnrh48aIYMWKE0NPTE4cOHZLy5uXliY4dOwqZTCaGDRsmli5dKgIDAwUAMXbsWCnfqVOnhFwuF+PGjZPS+vXrJ4yNjcW1a9ektODgYGFkZCQaNmwoBg0aJJYuXSp69OghAIgpU6ao1ROAmDZtmvT54sWLwtTUVNjZ2YlvvvlGzJkzRzg7OwuFQiFOnTolhBDixo0b4osvvhAAxOTJk8WGDRvEhg0bRHJycqHtsXfvXiGTyUSzZs3EokWLxJQpU4SFhYVwc3MTjo6OUr6EhAQBQLi6uop69eqJOXPmiMWLF4tbt26JyMhIoa+vL1xcXMS8efPEjBkzhKWlpbCwsBAJCQlq5/9ymSrTpk0Tr/5JASDc3NyEpaWlmDlzppg7d65wdHQUxsbG4sKFC1K+5ORkUadOHeHg4CBmzpwpVqxYId59910BQCxevFjKl5mZKVxcXISRkZGYMGGCWLJkifDy8hLNmjUTAMThw4cLbaOX6+ju7i4CAwPF0qVLxYcffigAiEGDBkn5fvjhBwFArY5CCHH69GkBQKxfv77QYxw6dEgAEDNnziyyLi+fu42NjahWrZr46quvxKJFi0Tz5s2Fnp6e2LVrl5Tv8OHDAoDw9PQUXl5eYvHixWL69OnCxMREtGrVSu34Hh4ewtLSUrp2du/eLYT43/e/du1aKb/qem7atKn46KOPxIoVK0Tv3r0FALF8+XK1ug4bNkzo6+uL4cOHi5UrV4qJEycKU1NT8dZbb4nnz59L+RwdHUWjRo2EjY2NmDx5sli6dKlo0aKFkMlk4uLFi0KI0l3nRGXBgIeqJFXA8+qPQqEQ4eHhankjIiIEAPHtt9+qpffp00fIZDIRHx8vpYWFhQk9PT1x9OhRsX37dgFALFmyRG2/4OBgAUB8/vnnUlpeXp7o3r27MDQ0FPfv35fSXw14goKChKGhobhx44aUdvfuXVGtWjXRvn17KU117OJu4Cru7u6iTp064unTp1JadHS0AFBgwGNubi7u3bunVoaHh4ewtrYWDx8+lNL++usvoaenJwYPHqx2/poEPADE2bNnpbRbt24JIyMj8d5770lpH3/8sbCzsxMPHjxQ279fv36ievXqIjMzUwghxJIlSwQA8dNPP0l5MjIyRIMGDTQKeN5991219JEjRwoA4q+//hJCCJGamiqMjIzExIkT1fJ98cUXwtTUVKSnpxd6jO+//14AkIKM4owdO1YAEDExMVLa06dPhbOzs3BychK5ublCiP8FPE2aNBHZ2dn5jvdycNa9e/cCv6PCAp6CAjRVYKUSExMjAIhNmzap5Ttw4EC+dEdHRwFAHD16VEq7d++eUCgUYvz48VKaptc5UVlwSIuqtGXLliEyMhKRkZHYuHEj/Pz8MGzYMOzatUvKs3//fsjlcnzxxRdq+44fPx5CCLWnuqZPn46mTZsiODgYI0eOhK+vb779VEaPHi39v2ro5vnz5/jtt98KzJ+bm4tDhw4hKCgI9erVk9Lt7OwwYMAAHDt2DGlpaRq3wd27d3HhwgUMHjwYZmZmUrqvry/c3d0L3Kd3796wsrKSPiclJSE2NhZDhgxBzZo1pfRmzZrB398f+/fv17heKj4+PvDy8pI+161bFz179sTBgweRm5sLIQR27tyJwMBACCHw4MED6ScgIABPnjzBuXPnALz4Lu3s7NCnTx+pPBMTE3zyySca1WnUqFFqnz///HOpfACoXr06evbsiS1btkjDlLm5udi2bZs0nFYY1XdYrVq1EtVl//79aNWqFd5++20pzczMDJ988gkSExOl4UaVoUOHqs23ateuHYAXw2Jl8emnn6p9bteunVqZ27dvR/Xq1eHv76/2HXl5ecHMzAyHDx9W29/V1VWqGwBYWVmhUaNGZa4nUWkx4KEqrVWrVujcuTM6d+6MgQMHYt++fXB1dZWCDwC4desW7O3t892AmjRpIm1XMTQ0xJo1a5CQkICnT59i7dq1Bc7B0NPTUwtaAMDFxQUACn3E9v79+8jMzESjRo3ybWvSpAny8vJw586dkp/8/6eqf4MGDfJtKygNAJydnQsso7C6PXjwABkZGRrXDQAaNmyYL83FxQWZmZm4f/8+7t+/j9TUVKxatQpWVlZqP0OHDgUA3Lt3T6pngwYN8n0nBdVbkzrVr18fenp6at/d4MGDcfv2bcTExAAAfvvtN6SkpGDQoEFFlm1ubg4AePr0aYnqcuvWrULbXbX9ZXXr1lX7bGFhAQD55tFowsjISC0AVpX7cplxcXF48uQJrK2t831P6enp0ndUWD0LKpPodeJTWqRT9PT04Ofnh++//x5xcXFo2rSpxmUcPHgQAPDs2TPExcXlCw50gbGxcan3LSgABJBvgmtJ5eXlAQA+/PBDBAcHF5inWbNmpSq7pAo6p4CAANjY2GDjxo1o3749Nm7cCFtbW3Tu3LnIsho3bgwAuHDhAoKCgrR
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"def q1(x):\n",
|
|||
|
" return x.quantile(0.25)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# median = quantile(0.5)\n",
|
|||
|
"def q2(x):\n",
|
|||
|
" return x.quantile(0.5)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"def q3(x):\n",
|
|||
|
" return x.quantile(0.75)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"def iqr(x):\n",
|
|||
|
" return q3(x) - q1(x)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"def low_iqr(x):\n",
|
|||
|
" return max(0, q1(x) - 1.5 * iqr(x))\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"def high_iqr(x):\n",
|
|||
|
" return q3(x) + 1.5 * iqr(x)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"quantiles = (\n",
|
|||
|
" data[[\"Continent\", \"Population 2020\"]]\n",
|
|||
|
" .groupby([\"Continent\"])\n",
|
|||
|
" .aggregate([\"min\", q1, q2, \"median\", q3, \"max\"])\n",
|
|||
|
")\n",
|
|||
|
"print(quantiles)\n",
|
|||
|
"\n",
|
|||
|
"iqrs = (\n",
|
|||
|
" data[[\"Continent\", \"Population 2020\"]]\n",
|
|||
|
" .groupby([\"Continent\"])\n",
|
|||
|
" .aggregate([low_iqr, iqr, high_iqr])\n",
|
|||
|
")\n",
|
|||
|
"print(iqrs)\n",
|
|||
|
"\n",
|
|||
|
"data.boxplot(column=\"Population 2020\", by=\"Continent\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Визуализация - Гистограмма"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Axes: ylabel='Frequency'>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGvCAYAAAC9yRSTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA4D0lEQVR4nO3de1yUZf7/8fcgB/EAiApIgZJ5TFPzFKklSuEhV9PdNM3TWq6bmIpWumVkWqipmaayta7o5qEs9WtaluFpS7TEQ+kaah6wdNAyQHBFhPv3hw/nt5OHYBiY4fb1fDzux6O57muu+VzCMu+95rrnthiGYQgAAMCkPFxdAAAAQGki7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFPzdHUB7qCwsFCnT59W1apVZbFYXF0OAAAoAsMwdOHCBYWGhsrD4+brN4QdSadPn1ZYWJirywAAAA44deqU7rzzzpueJ+xIqlq1qqSr/1h+fn4urgYAABRFdna2wsLCbO/jN0PYkWwfXfn5+RF2AAAoZ35vCwoblAEAgKkRdgAAgKnxMRYA4JYKCgqUn5/v6jJwG/Ly8lKFChVKPA5hBwBwQ4ZhyGq1KjMz09Wl4DYWEBCgkJCQEn01DGEHAHBD14JOUFCQKlWqxPeQoUwZhqGLFy/q7NmzkqRatWo5PBZhBwBwnYKCAlvQqV69uqvLwW3K19dXknT27FkFBQU5/JEWG5QBANe5tkenUqVKLq4Et7trv4Ml2TdG2AEA3BQfXcHVnPE7SNgBAACmRtgBAKAUdOzYUWPGjHGbcW5nbFAGABRLnQkbyvT1TkzrXqz+Q4YM0ZIlSyRd/Z6W8PBwDRo0SH/729/k6em+b3tbt25VVFSUfv31VwUEBNjaV69eLS8vr1J97f3792vatGn68ssv9fPPP6tOnToaMWKERo8efV2NcXFxOnjwoMLCwvTSSy9pyJAhtvMJCQlavXq1vv/+e/n6+uqBBx7Q9OnT1aBBA1ufS5cuady4cVq5cqXy8vIUExOjBQsWKDg4uNTmx8oOAMB0unTpojNnzujIkSMaN26cXnnlFb3xxhuuLsshgYGBv3ujy5JKTU1VUFCQ3nvvPR08eFAvvviiJk6cqLffftvW5/jx4+revbuioqK0b98+jRkzRk899ZQ+++wzW59t27Zp5MiR2rlzpzZt2qT8/Hw98sgjys3NtfUZO3asPv74Y61atUrbtm3T6dOn1bt371KdH2EHAGA6Pj4+CgkJUe3atfXXv/5V0dHRWrdunSTp119/1aBBg1StWjVVqlRJXbt21ZEjR2zPTUpKUkBAgNauXat69eqpYsWKiomJ0alTp2x9hgwZol69etm95pgxY9SxY8eb1vSvf/1LrVq1UtWqVRUSEqL+/fvbvkPmxIkTioqKkiRVq1ZNFovFtmLy24+xilr/Z599pkaNGqlKlSq28Hczf/7zn/XWW2/poYce0l133aUnn3xSQ4cO1erVq219EhMTFRERoVmzZqlRo0aKjY3VH//4R7355pu2Phs3btSQIUN0zz33qFmzZkpKSlJ6erpSU1MlSVlZWVq0aJFmz56tTp06qWXLllq8eLF27NihnTt33rS+kiLsAABMz9fXV5cvX5Z0Najs3r1b69atU0pKigzDULdu3ewubb548aJee+01LV26VF999ZUyMzPVr1+/EtWQn5+vKVOmaP/+/Vq7dq1OnDhhCzRhYWH66KOPJElpaWk6c+aM3nrrrRuOU9T6Z86cqX/961/avn270tPTNX78+GLVm5WVpcDAQNvjlJQURUdH2/WJiYlRSkrKLceQZBsnNTVV+fn5duM0bNhQ4eHhtxynpNz3w0uTuNln28X9DBoAUHyGYSg5OVmfffaZRo0apSNHjmjdunX66quv9MADD0iSli1bprCwMK1du1Z/+tOfJF0NJm+//bbatm0rSVqyZIkaNWqkr7/+Wm3atHGolj//+c+2/77rrrs0d+5ctW7dWjk5OapSpYotEAQFBdnt2flfxak/MTFRdevWlSTFxsbq1VdfLXKtO3bs0Pvvv68NG/7/e5jVar1uX01wcLCys7P13//+1/YFgNcUFhZqzJgxateunZo0aWIbw9vb+7r5BQcHy2q1Frm+4mJlBwBgOuvXr1eVKlVUsWJFde3aVX379tUrr7yiQ4cOydPT0xZiJKl69epq0KCBDh06ZGvz9PRU69atbY8bNmyogIAAuz7FlZqaqh49eig8PFxVq1bVQw89JElKT08v8hhFrb9SpUq2oCNdvdXCtY/Mfs+BAwfUs2dPxcfH65FHHilybb81cuRIHThwQCtXrnR4DGdhZQcAYDpRUVFauHChvL29FRoa6vSrsDw8PGQYhl3brb7hNzc3VzExMYqJidGyZctUs2ZNpaenKyYmxvbxmjP99uoti8VyXb038p///EedO3fW8OHD9dJLL9mdCwkJUUZGhl1bRkaG/Pz8rlvViY2N1fr167V9+3bdeeeddmNcvnxZmZmZdqs7GRkZCgkJKer0io2VHQCA6VSuXFl33323wsPD7YJOo0aNdOXKFe3atcvW9ssvvygtLU2NGze2tV25ckW7d++2PU5LS1NmZqYaNWokSapZs+Z1G3737dt303q+//57/fLLL5o2bZo6dOighg0bXrfS4u3tLenqfclupqj1O+LgwYOKiorS4MGD9dprr113PjIyUsnJyXZtmzZtUmRkpO2xYRiKjY3VmjVrtHnzZkVERNj1b9mypby8vOzGSUtLU3p6ut04zkbYAQDcNurVq6eePXvq6aef1pdffqn9+/frySef1B133KGePXva+nl5eWnUqFHatWuXUlNTNWTIEN1///22/TqdOnXS7t27tXTpUh05ckTx8fE6cODATV83PDxc3t7emjdvno4dO6Z169ZpypQpdn1q164ti8Wi9evX69y5c8rJyXG4/uI6cOCAoqKi9MgjjyguLk5Wq1VWq1Xnzp2z9RkxYoSOHTum559/Xt9//70WLFigDz74QGPHjrX1GTlypN577z0tX75cVatWtY3z3//+V5Lk7++vYcOGKS4uTlu2bFFqaqqGDh2qyMhI3X///Q7X/3sIOwCA28rixYvVsmVLPfroo4qMjJRhGPrkk0/sPvqpVKmSXnjhBfXv31/t2rVTlSpV9P7779vOx8TEaNKkSXr++efVunVrXbhwQYMGDbrpa9asWVNJSUlatWqVGjdurGnTpmnmzJl2fe644w5NnjxZEyZMUHBwsGJjYx2uv7g+/PBDnTt3Tu+9955q1aplO/5331JERIQ2bNigTZs2qVmzZpo1a5b+8Y9/KCYmxtZn4cKFysrKUseOHe3G+d9/uzfffFOPPvqo+vTpowcffFAhISF2l7iXBotRlA/xTC47O1v+/v7KysqSn5+fU8fmaiwA5dGlS5d0/PhxRUREqGLFiq4up0wlJSVpzJgxyszMdHUp0K1/F4v6/s3KDgAAMDXCDgAAMDXCDgAA/2PIkCF8hGUyhB0AAGBqhB0AAGBqhB0AwE1xwS5czRm/g4QdAMB1rn1ny8WLF11cCW53134HS/I9QtwbCwBwnQoVKiggIMB2S4NKlSrJYrG4uCrcTgzD0MWLF3X27FkFBASoQoUKDo9F2AEA3NC1GzMW9W7ZQGkICAgo8U1CCTsAgBuyWCyqVauWgoKCbnlHb6C0eHl5lWhF5xqXhp3t27frjTfeUGpqqs6cOaM1a9aoV69edn0OHTqkF154Qdu2bdOVK1fUuHFjffTRRwoPD5d09Wukx40bp5UrVyovL08xMTFasGCBgoODXTAjADCfChUqOOUNB3AVl25Qzs3NVbNmzTR//vwbnv/hhx/Uvn17NWzYUFu3btW3336rSZMm2d0bY+zYsfr444+1atUqbdu2TadPn1bv3r3LagoAAMDNuXRlp2vXruratetNz7/44ovq1q2bZsyYYWurW7eu7b+zsrK0aNEiLV++XJ06dZJ09W6wjRo10s6dO0v1dvEAAKB8cNtLzwsLC7VhwwbVr19fMTExCgoKUtu2bbV27Vp
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"data.plot.hist(column=[\"Population 2020\"], bins=80)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Визуализация - Точечная диаграмма"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" Country (or dependency) Population 2020 Yearly Change Net Change\n",
|
|||
|
"no \n",
|
|||
|
"1 China 1439323776 0.39 5540090\n",
|
|||
|
"2 India 1380004385 0.99 13586631\n",
|
|||
|
"3 United States 331002651 0.59 1937734\n",
|
|||
|
"4 Indonesia 273523615 1.07 2898047\n",
|
|||
|
"5 Pakistan 220892340 2.00 4327022\n",
|
|||
|
".. ... ... ... ...\n",
|
|||
|
"231 Montserrat 4992 0.06 3\n",
|
|||
|
"232 Falkland Islands 3480 3.05 103\n",
|
|||
|
"233 Niue 1626 0.68 11\n",
|
|||
|
"234 Tokelau 1357 1.27 17\n",
|
|||
|
"235 Holy See 801 0.25 2\n",
|
|||
|
"\n",
|
|||
|
"[235 rows x 4 columns]\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Axes: xlabel='Country (or dependency)', ylabel='Population 2020'>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAj0AAAHACAYAAABJddlbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABFRklEQVR4nO3deVxWZf7/8fcNCogIgigukTvmimjpqLlTmBNp0+JXHUWzGsutzCmdcm0hM/c0p2VcmkatXGq0tELNJZdcUDNUXDEFBTcEFQ2u3x/9vMc7UW/kxhs8r+fjcT8e3te5zjmfcw5wvz3nOue2GWOMAAAA7nAe7i4AAADgdiD0AAAASyD0AAAASyD0AAAASyD0AAAASyD0AAAASyD0AAAASyD0AAAASyD0AAAASyD0AAAAS7B06Fm9erWio6NVsWJF2Ww2LV68OM/L+Oyzz9SwYUP5+vqqcuXKGjdunOsLBQAA+Wbp0JOZmanw8HBNmzbtlub/5ptv1L17d/Xt21c///yzpk+frokTJ+q9995zcaUAACC/bHzh6O9sNpsWLVqkzp0729uysrL06quvau7cuTpz5ozq1aunsWPHqk2bNpKkbt266fLly/r888/t80ydOlXvvPOOkpKSZLPZbvNWAACA67H0mZ6b6d+/v9avX6958+Zpx44deuKJJ9ShQwclJiZK+j0U+fj4OMxTokQJ/frrrzp8+LA7SgYAANdB6LmOpKQkzZw5U59//rlatmyp6tWra8iQIbr//vs1c+ZMSVJUVJQWLlyouLg45eTkaO/evRo/frwkKTk52Z3lAwCAPyjm7gIKq507dyo7O1thYWEO7VlZWSpTpowk6ZlnntH+/fv18MMP6/Lly/L399egQYM0atQoeXiQJwEAKEwIPdeRkZEhT09PbdmyRZ6eng7T/Pz8JP0+Dmjs2LF66623lJKSorJlyyouLk6SVK1atdteMwAAuD5Cz3VEREQoOztbJ06cUMuWLW/Y19PTU5UqVZIkzZ07V82aNVPZsmVvR5kAAMBJlg49GRkZ2rdvn/39wYMHFR8fr6CgIIWFhal79+7q2bOnxo8fr4iICKWmpiouLk4NGjTQn//8Z6WlpemLL75QmzZtdPHiRfsYoB9++MGNWwUAAHJj6VvWV61apbZt217THhMTo1mzZuny5ct64403NGfOHB09elTBwcH605/+pNGjR6t+/fpKS0tTdHS0du7cKWOMmjVrpjfffFNNmzZ1w9YAAIAbsXToAQAA1sEtRgAAwBIIPQAAwBIsN5A5JydHx44dU6lSpfiaCAAAighjjM6dO6eKFSve8rPwLBd6jh07ptDQUHeXAQAAbsGRI0d011133dK8lgs9pUqVkvT7TvP393dzNQAAwBnp6ekKDQ21f47fCsuFniuXtPz9/Qk9AAAUMfkZmsJAZgAAYAmEHgAAYAmEHgAAYAmEHgAAYAmEHgAAYAmEHgAAYAmEHgAAYAmEHgAAYAmEHgAAYAmEHgAAYAmW+xqKgnQgNUOHT51XlTIlVTW4pLvLAQAAVyH0uMCZ85c0cG68Viem2tta1SyrqV0jFOBb3I2VAQCAK7i85QID58Zr3b40h7Z1+9I0YO42N1UEAAD+iNCTTwdSM7Q6MVXZxji0Zxuj1YmpOpiW6abKAADA1Qg9+XT41PkbTj90ktADAEBhQOjJp8pBvjecXqUMA5oBACgMCD35VK2sn1rVLCtPm82h3dNmU6uaZbmLCwCAQoLQ4wJTu0aoRY1gh7YWNYI1tWuEmyoCAAB/xC3rLhDgW1xz+jTRwbRMHTqZyXN6AAAohAg9LlQ1mLADAEBh5dbLW6tXr1Z0dLQqVqwom82mxYsXOz3vunXrVKxYMTVs2LDA6gMAAHcOt4aezMxMhYeHa9q0aXma78yZM+rZs6fat29fQJUBAIA7jVsvbz300EN66KGH8jxf37591a1bN3l6eubp7BAAALCuInf31syZM3XgwAGNHDnS3aUAAIAipEgNZE5MTNTQoUO1Zs0aFSvmXOlZWVnKysqyv09PTy+o8gAAQCFWZM70ZGdnq1u3bho9erTCwsKcni82NlYBAQH2V2hoaAFWCQAACiubMX/4pkw3sdlsWrRokTp37pzr9DNnzigwMFCenp72tpycHBlj5OnpqW+//Vbt2rW7Zr7czvSEhobq7Nmz8vf3d/l2oGg7kJqhw6fO86wlAChk0tPTFRAQkK/P7yJzecvf3187d+50aJs+fbpWrFihL774QlWrVs11Pm9vb3l7e9+OElGEnTl/SQPnxmt1Yqq9rVXNspraNUIBvsXdWBkAwFXcGnoyMjK0b98++/uDBw8qPj5eQUFBuvvuuzVs2DAdPXpUc+bMkYeHh+rVq+cwf7ly5eTj43NNO5BXA+fGa92+NIe2dfvSNGDuNs3p08RNVQEAXMmtY3o2b96siIgIRUT8/h1VgwcPVkREhEaMGCFJSk5OVlJSkjtLhAUcSM3Q6sRUZf/hSm+2MVqdmKqDaZluqgwA4EqFZkzP7eKKa4K4s6zcc0K9Z/503ekze9+ntrXK3caKAAB/5IrP7yJz9xZQUCoH+d5wepUyDGgGgDsBoQeWV62sn1rVLCtPm82h3dNmU6uaZbmLCwDuEIQeQNLUrhFqUSPYoa1FjWBN7RrhpooAAK5WZG5ZBwpSgG9xzenTRAfTMnXoZCbP6QGAOxChB7hK1WDCDgDcqbi8BQAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALMGtoWf16tWKjo5WxYoVZbPZtHjx4hv2X7hwoR544AGVLVtW/v7+atasmZYvX357igUAAEWaW0NPZmamwsPDNW3aNKf6r169Wg888IC+/vprbdmyRW3btlV0dLS2bdtWwJUCAICizmaMMe4uQpJsNpsWLVqkzp0752m+unXrqkuXLhoxYoRT/dPT0xUQEKCzZ8/K39//FioFAAC3mys+v4v0mJ6cnBydO3dOQUFB7i4FAAAUcsXcXUB+vPvuu8rIyNCTTz553T5ZWVnKysqyv09PT78dpQEAgEKmyJ7p+c9//qPRo0frs88+U7ly5a7bLzY2VgEBAfZXaGjobawSAAAUFkUy9MybN09PP/20PvvsM0VGRt6w77Bhw3T27Fn768iRI7epSgAAUJgUuctbc+fO1VNPPaV58+bpz3/+8037e3t7y9vb+zZUBgAACjO3hp6MjAzt27fP/v7gwYOKj49XUFCQ7r77bg0bNkxHjx7VnDlzJP1+SSsmJkaTJ09W06ZNlZKSIkkqUaKEAgIC3LINAACgaHDr5a3NmzcrIiJCERERkqTBgwcrIiLCfvt5cnKykpKS7P0/+OAD/fbbb+rXr58qVKhgfw0aNMgt9QMAgKKj0Dyn53bhOT0AABQ9ln9ODwAAgLMIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBKK5XWGTZs2af369UpJSZEklS9fXs2aNVOTJk1cXhwAAICrOB16Tpw4occee0zr1q3T3XffrZC
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(cleared_df)\n",
|
|||
|
"cleared_df.head(5).plot.scatter(x=\"Country (or dependency)\", y=\"Population 2020\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Визуализация - Столбчатая диаграмма"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# plot = data.groupby([\"Pclass\", \"Survived\"]).size().unstack().plot.bar(color=[\"pink\", \"green\"])\n",
|
|||
|
"# plot.legend([\"Not survived\", \"Survived\"])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Визуализация - Временные ряды"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 12,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" Year Population Yearly % Yearly Median Fertility Density\n",
|
|||
|
"0 2020 7,794,798,739 1.10% 83,000,320 31 2.47 52\n",
|
|||
|
"1 2025 8,184,437,460 0.98% 77,927,744 32 2.54 55\n",
|
|||
|
"2 2030 8,548,487,400 0.87% 72,809,988 33 2.62 57\n",
|
|||
|
"3 2035 8,887,524,213 0.78% 67,807,363 34 2.70 60\n",
|
|||
|
"4 2040 9,198,847,240 0.69% 62,264,605 35 2.77 62\n",
|
|||
|
"5 2045 9,481,803,274 0.61% 56,591,207 35 2.85 64\n",
|
|||
|
"6 2050 9,735,033,990 0.53% 50,646,143 36 2.95 65\n",
|
|||
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|||
|
"RangeIndex: 7 entries, 0 to 6\n",
|
|||
|
"Data columns (total 7 columns):\n",
|
|||
|
" # Column Non-Null Count Dtype \n",
|
|||
|
"--- ------ -------------- ----- \n",
|
|||
|
" 0 Year 7 non-null int64 \n",
|
|||
|
" 1 Population 7 non-null object \n",
|
|||
|
" 2 Yearly % 7 non-null object \n",
|
|||
|
" 3 Yearly 7 non-null object \n",
|
|||
|
" 4 Median 7 non-null int64 \n",
|
|||
|
" 5 Fertility 7 non-null float64\n",
|
|||
|
" 6 Density 7 non-null int64 \n",
|
|||
|
"dtypes: float64(1), int64(3), object(3)\n",
|
|||
|
"memory usage: 524.0+ bytes\n",
|
|||
|
"['Year' 'Population' 'Yearly %' 'Yearly' 'Median' 'Fertility' 'Density']\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiwAAAHACAYAAACBGTONAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABYcklEQVR4nO3deXQUVd7G8W+ns4csAoEkEMKeQNi3COIIYzAwwKDiAqKyiKMDLsiAwIxsIiLqKAgKjrKjIIgCyghKEBwEZV/CHgQTIBDWrGTrrvcPXnsmJkAaknQneT7n9Dl09a3Kr4oi/VB17y2TYRgGIiIiIk7MxdEFiIiIiNyMAouIiIg4PQUWERERcXoKLCIiIuL0FFhERETE6SmwiIiIiNNTYBERERGnp8AiIiIiTk+BRURERJyeAouIiIg4vXIXWH744Qd69uxJSEgIJpOJlStX2r2NZcuW0aJFC7y9vQkLC+Ott94q/kJFRESkyMpdYMnIyKB58+a8//77t7T+N998Q79+/Xj22WeJi4vjgw8+4N1332XmzJnFXKmIiIgUlak8P/zQZDLx5Zdfcv/999uWZWdn849//IMlS5Zw5coVmjRpwtSpU+nUqRMAjz32GLm5uSxfvty2zowZM3jzzTdJSEjAZDKV8l6IiIhIubvCcjPPPfccW7duZenSpezbt4+HH36Yrl27cuzYMeBaoPH09My3jpeXF6dOneLXX391RMkiIiIVXoUKLAkJCcybN4/ly5dz9913U69ePUaMGEHHjh2ZN28eADExMXzxxRfExsZitVo5evQo//znPwFISkpyZPkiIiIVlqujCyhN+/fvx2Kx0LBhw3zLs7OzqVKlCgBPP/00x48fp0ePHuTm5uLn58eLL77IhAkTcHGpUPlORETEaVSowJKeno7ZbGbnzp2YzeZ8n1WqVAm41u9l6tSpvP7665w9e5bAwEBiY2MBqFu3bqnXLCIiIhUssLRs2RKLxUJycjJ33333DduazWZq1KgBwJIlS2jfvj2BgYGlUaaIiIj8TrkLLOnp6cTHx9venzhxgj179lC5cmUaNmxIv379ePLJJ/nnP/9Jy5YtOX/+PLGxsTRr1ozu3btz4cIFPv/8czp16kRWVpatz8umTZscuFciIiIVW7kb1rxx40Y6d+5cYHn//v2ZP38+ubm5vPbaayxcuJDTp09TtWpV7rzzTiZOnEjTpk25cOECPXv2ZP/+/RiGQfv27Zk8eTJRUVEO2BsRERGBchhYREREpPzRsBcRERFxegosIiIi4vTKRadbq9XKmTNn8PX11dT5IiIiZYRhGKSlpRESEnLTuc7KRWA5c+YMoaGhji5DREREbkFiYiI1a9a8YZtyEVh8fX2Bazvs5+fn4GpERESkKFJTUwkNDbV9j99IuQgsv90G8vPzU2AREREpY4rSnUOdbkVERMTpKbCIiIiI01NgEREREadXLvqwFJXFYiE3N9fRZUgJcXNzK/AUbhERKR8qRGAxDIOzZ89y5coVR5ciJSwgIICgoCDNxyMiUs5UiMDyW1ipVq0a3t7e+jIrhwzDIDMzk+TkZACCg4MdXJGIiBSnch9YLBaLLaxUqVLF0eVICfLy8gIgOTmZatWq6faQiEg5Ynen27S0NIYNG0ZYWBheXl506NCB7du3X7f9gAEDMJlMBV6RkZG2NhMmTCjweURExK3t0e/81mfF29u7WLYnzu23v2f1VRIRKV/sDiyDBw/mu+++Y9GiRezfv5/77ruP6OhoTp8+XWj76dOnk5SUZHslJiZSuXJlHn744XztIiMj87XbvHnzre3Rdeg2UMWgv2cRkfLJrltCV69eZcWKFaxatYo//OEPwLWrI1999RWzZs3itddeK7COv78//v7+tvcrV67k8uXLDBw4MH8hrq4EBQXdyj6IiIhIOWfXFZa8vDwsFguenp75lnt5eRX5isicOXOIjo4mLCws3/Jjx44REhJC3bp16devHwkJCdfdRnZ2NqmpqfleUjI6derEsGHDnGY7IiJSMdkVWHx9fWnfvj2TJk3izJkzWCwWFi9ezNatW0lKSrrp+mfOnOGbb75h8ODB+ZZHRUUxf/581q5dy6xZszhx4gR33303aWlphW5nypQptis3/v7+5fZJzf/b/8fd3Z369evz6quvkpeX5+jSrmvjxo2YTKYCQ8i/+OILJk2a5JiiRESkzLO7D8uiRYswDIMaNWrg4eHBe++9R9++fXFxufmmFixYQEBAAPfff3++5d26dePhhx+mWbNmxMTE8O9//5srV66wbNmyQrczZswYUlJSbK/ExER7d6PM6Nq1K0lJSRw7doy//e1vTJgwgbfeesvRZdmtcuXKRXoap4iIOJ/k1Cz2nbri0BrsDiz16tVj06ZNpKenk5iYyLZt28jNzaVu3bo3XM8wDObOncsTTzyBu7v7DdsGBATQsGFD4uPjC/3cw8PD9mTm8v6EZg8PD4KCgggLC+Ovf/0r0dHRrF69msuXL/Pkk09yxx134O3tTbdu3Th27Jhtvfnz5xMQEMDKlStp0KABnp6exMTE5At3AwYMKBAehw0bRqdOna5bz6JFi2jTpg2+vr4EBQXx2GOP2eY+OXnyJJ07dwbgjjvuwGQyMWDAAKDgLaGi1r9u3ToaNWpEpUqVbOFNRERKR06elQ83Hafz2xt5fslusvMsDqvllp8l5OPjQ3BwMJcvX2bdunX06tXrhu03bdpEfHw8Tz311E23nZ6ezvHjx0ts8i/DMMjMyXPIyzCM26rdy8uLnJwcBgwYwI4dO1i9ejVbt27FMAz+9Kc/5RvOm5mZyeTJk1m4cCE//vgjV65coU+fPrf183Nzc5k0aRJ79+5l5cqVnDx50hZKQkNDWbFiBQBHjhwhKSmJ6dOnF7qdotb/9ttvs2jRIn744QcSEhIYMWLEbdUvIiJF8/3hZGKm/cCUbw6TkWMhwMuNC+k5DqvH7onj1q1bh2EYhIeHEx8fz8iRI4mIiLCN+hkzZgynT59m4cKF+dabM2cOUVFRNGnSpMA2R4wYQc+ePQkLC+PMmTOMHz8es9lM3759b3G3buxqroXG49aVyLZv5uCrMXi72z9fn2EYxMbGsm7dOrp168bKlSv58ccf6dChAwCffPIJoaGhrFy50jZkPDc3l5kzZxIVFQVcuyXXqFEjtm3bRrt27W6p/kGDBtn+XLduXd577z3atm1Leno6lSpVonLlygBUq1aNgICAQrdx7NgxVq9eXaT6Z8+eTb169QB47rnnePXVV2+pbhERKZpfzqcz6euDfH/kPABVK3kwqms4vVvVxMXFcVNH2P3NmZKSwpgxYzh16hSVK1emd+/eTJ48GTc3NwCSkpIKjPBJSUlhxYoV1/3f9qlTp+jbty8XL14kMDCQjh078tNPPxEYGHgLu1S+fP3111SqVInc3FysViuPPfYYDz74IF9//bUtiABUqVKF8PBwDh06ZFvm6upK27Ztbe8jIiIICAjg0KFDtxxYdu7cyYQJE9i7dy+XL1/GarUCkJCQQOPGjYu0jUOHDuHq6nrT+r29vW1hBa5Nt//b7ScRESleaVm5zNwQz9wfT5BrMXAzmxh4Vx2e/2N9fD3dHF2e/YHlkUce4ZFHHrnu5/Pnzy+wzN/fn8zMzOuus3TpUnvLuC1ebmYOvhpTqj/zf3+2PTp37sysWbNwd3cnJCQEV1dXVq9eXSy1uLi4FLhFdaMZYjMyMoiJiSEmJoZPPvmEwMBAEhISiImJISen+C8T/haCf2MymW77lpqIiORntRp8sfs0U9ce5nxaNgCdwgMZ26Mx9QIrObi6/yr3zxIqjMlkuqXbMo7g4+ND/fr18y1r1KgReXl5/Pzzz7ZbKhcvXuTIkSP5rnLk5eWxY8cO29WUI0eOcOXKFRo1agRAYGAgcXFx+ba9Z8+eAkHhN4cPH+bixYu88cYbtqHkO3bsyNfmtw7VFsv1O2YVtX4RESlZexKvMH71AfYmXgGgTlUfxvZoxB8jqju2sELccqdbcZwGDRrQq1cvnn76aTZv3szevXt5/PHHqVGjRr7Oz25ubjz//PP
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from datetime import datetime\n",
|
|||
|
"import matplotlib.dates as md\n",
|
|||
|
"\n",
|
|||
|
"ts = pd.read_csv(\"data/world-population-forcast-2020-2050.csv\", encoding=\"ISO-8859-1\")\n",
|
|||
|
"print(ts)\n",
|
|||
|
"ts.iloc[:, 1] = ts.iloc[:, 1].apply(lambda row: int(\"\".join(str(row).split(\",\"))))\n",
|
|||
|
"ts.info()\n",
|
|||
|
"\n",
|
|||
|
"print(ts.columns.values)\n",
|
|||
|
"plot = ts.plot.line(x=\"Year\", y=\"Population\")"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": ".venv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.4"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|