MII_Salin_Oleg_PIbd-33/lec1.ipynb

738 lines
132 KiB
Plaintext
Raw Normal View History

2024-09-21 09:46:46 +04:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Работа с NumPy"
]
},
{
"cell_type": "code",
2024-10-22 18:54:39 +04:00
"execution_count": 1,
2024-09-21 09:46:46 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"matrix = \n",
" [[4 5 0]\n",
" [9 9 9]] \n",
"\n",
"tmatrix = \n",
" [[4 9]\n",
" [5 9]\n",
" [0 9]] \n",
"\n",
"vector = \n",
" [4 5 0 9 9 9] \n",
"\n",
"tvector = \n",
" [[4]\n",
" [5]\n",
" [0]\n",
" [9]\n",
" [9]\n",
" [9]] \n",
"\n",
"list_matrix = \n",
" [array([4, 5, 0]), array([9, 9, 9])] \n",
"\n",
"matrix as str = \n",
" [[4 5 0]\n",
" [9 9 9]] \n",
"\n",
"matrix type is <class 'numpy.ndarray'> \n",
"\n",
"vector type is <class 'numpy.ndarray'> \n",
"\n",
"list_matrix type is <class 'list'> \n",
"\n",
"str_matrix type is <class 'str'> \n",
"\n",
"formatted_vector = \n",
" 4; 5; 0; 9; 9; 9 \n",
"\n"
]
}
],
"source": [
"import numpy as np\n",
"\n",
"matrix = np.array([[4, 5, 0], [9, 9, 9]])\n",
"print(\"matrix = \\n\", matrix, \"\\n\")\n",
"\n",
"tmatrix = matrix.T\n",
"print(\"tmatrix = \\n\", tmatrix, \"\\n\")\n",
"\n",
"vector = np.ravel(matrix)\n",
"print(\"vector = \\n\", vector, \"\\n\")\n",
"\n",
"tvector = np.reshape(vector, (6, 1))\n",
"print(\"tvector = \\n\", tvector, \"\\n\")\n",
"\n",
"list_matrix = list(matrix)\n",
"print(\"list_matrix = \\n\", list_matrix, \"\\n\")\n",
"\n",
"str_matrix = str(matrix)\n",
"print(\"matrix as str = \\n\", str_matrix, \"\\n\")\n",
"\n",
"print(\"matrix type is\", type(matrix), \"\\n\")\n",
"\n",
"print(\"vector type is\", type(vector), \"\\n\")\n",
"\n",
"print(\"list_matrix type is\", type(list_matrix), \"\\n\")\n",
"\n",
"print(\"str_matrix type is\", type(str_matrix), \"\\n\")\n",
"\n",
"formatted_vector = \"; \".join(map(str, vector))\n",
"print(\"formatted_vector = \\n\", formatted_vector, \"\\n\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Работа с Pandas DataFrame\n",
"\n",
"https://pandas.pydata.org/docs/user_guide/10min.html"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Работа с данными - чтение и запись CSV"
]
},
{
"cell_type": "code",
2024-10-22 18:54:39 +04:00
"execution_count": 2,
2024-09-21 09:46:46 +04:00
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.read_csv(\"data/world-population-by-country-2020.csv\", index_col=\"no\")\n",
"\n",
"df.to_csv(\"test.csv\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Работа с данными - основные команды"
]
},
{
"cell_type": "code",
2024-10-22 18:54:39 +04:00
"execution_count": 3,
2024-09-21 09:46:46 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Country (or dependency) Population 2020 Yearly Change Net Change\n",
"no \n",
"1 China 1439323776 0.39 5540090\n",
"2 India 1380004385 0.99 13586631\n",
"3 United States 331002651 0.59 1937734\n",
"4 Indonesia 273523615 1.07 2898047\n",
"5 Pakistan 220892340 2.00 4327022\n",
" Country (or dependency) Population 2020 Yearly Change Net Change\n",
"no \n",
"231 Montserrat 4992 0.06 3\n",
"232 Falkland Islands 3480 3.05 103\n",
"233 Niue 1626 0.68 11\n",
"234 Tokelau 1357 1.27 17\n",
"235 Holy See 801 0.25 2\n"
]
}
],
"source": [
"# df.info()\n",
"\n",
"# print(df.describe().transpose())\n",
"\n",
"from click import clear\n",
"\n",
"\n",
"cleared_df = df.drop(\n",
" df.columns.difference([\n",
" \"Country (or dependency)\", \"Population 2020\", \"Yearly Change\", \"Net Change\"\n",
" ]\n",
" ),\n",
" axis=1,\n",
")\n",
"# print(cleared_df.head())\n",
"# print(cleared_df.tail())\n",
"cleared_df['Population 2020'] = cleared_df['Population 2020'].apply(\n",
" lambda x: int(\"\".join(x.split(\",\")))\n",
")\n",
"cleared_df[\"Net Change\"] = cleared_df[\"Net Change\"].apply(\n",
" lambda x: int(\"\".join(x.split(\",\")))\n",
")\n",
"cleared_df[\"Yearly Change\"] = cleared_df[\"Yearly Change\"].apply(\n",
" lambda x: float(\"\".join(x.rstrip('%')))\n",
")\n",
"\n",
"sorted_df = cleared_df.sort_values(\n",
" [\"Population 2020\", \"Net Change\", \"Country (or dependency)\"], ascending=[False, False, True]\n",
")\n",
"print(sorted_df.head())\n",
"print(sorted_df.tail())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Работа с данными - работа с элементами"
]
},
{
"cell_type": "code",
2024-10-22 18:54:39 +04:00
"execution_count": 4,
2024-09-21 09:46:46 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"no\n",
"1 China\n",
"2 India\n",
"3 United States\n",
"4 Indonesia\n",
"5 Pakistan\n",
" ... \n",
"231 Montserrat\n",
"232 Falkland Islands\n",
"233 Niue\n",
"234 Tokelau\n",
"235 Holy See\n",
"Name: Country (or dependency), Length: 235, dtype: object\n",
"Country (or dependency) Israel\n",
"Population 2020 8,655,535\n",
"Yearly Change 1.60%\n",
"Net Change 136,158\n",
"Density (P/Km²) 400\n",
"Land Area (Km²) 21,640\n",
"Migrants (net) 10,000\n",
"Fert. Rate 3\n",
"Med. Age 30\n",
"Urban Pop % 93%\n",
"World Share 0.11%\n",
"Name: 100, dtype: object\n",
"Israel\n",
" Country (or dependency) Population 2020\n",
"no \n",
"100 Israel 8,655,535\n",
"101 Switzerland 8,654,622\n",
"102 Togo 8,278,724\n",
"103 Sierra Leone 7,976,983\n",
"104 Hong Kong 7,496,981\n",
".. ... ...\n",
"196 St. Vincent & Grenadines 110,940\n",
"197 Aruba 106,766\n",
"198 Tonga 105,695\n",
"199 U.S. Virgin Islands 104,425\n",
"200 Seychelles 98,347\n",
"\n",
"[101 rows x 2 columns]\n",
" Country (or dependency) Population 2020 Yearly Change Net Change \\\n",
"no \n",
"1 China 1,439,323,776 0.39% 5,540,090 \n",
"2 India 1,380,004,385 0.99% 13,586,631 \n",
"3 United States 331,002,651 0.59% 1,937,734 \n",
"\n",
" Density (P/Km²) Land Area (Km²) Migrants (net) Fert. Rate Med. Age \\\n",
"no \n",
"1 153 9,388,211 -348,399 1.7 38 \n",
"2 464 2,973,190 -532,687 2.2 28 \n",
"3 36 9,147,420 954,806 1.8 38 \n",
"\n",
" Urban Pop % World Share \n",
"no \n",
"1 61% 18.47% \n",
"2 35% 17.70% \n",
"3 83% 4.25% \n",
"Country (or dependency) China\n",
"Population 2020 1,439,323,776\n",
"Yearly Change 0.39%\n",
"Net Change 5,540,090\n",
"Density (P/Km²) 153\n",
"Land Area (Km²) 9,388,211\n",
"Migrants (net) -348,399\n",
"Fert. Rate 1.7\n",
"Med. Age 38\n",
"Urban Pop % 61%\n",
"World Share 18.47%\n",
"Name: 1, dtype: object\n",
" Country (or dependency) Population 2020\n",
"no \n",
"3 United States 331,002,651\n",
"4 Indonesia 273,523,615\n",
"5 Pakistan 220,892,340\n",
" Country (or dependency) Yearly Change\n",
"no \n",
"4 Indonesia 1.07%\n",
"5 Pakistan 2.00%\n"
]
}
],
"source": [
"print(df[\"Country (or dependency)\"])\n",
"\n",
"print(df.loc[100])\n",
"\n",
"print(df.loc[100, \"Country (or dependency)\"])\n",
"\n",
"print(df.loc[100:200, [\"Country (or dependency)\", \"Population 2020\"]])\n",
"\n",
"print(df[0:3])\n",
"\n",
"print(df.iloc[0])\n",
"\n",
"print(df.iloc[2:5, 0:2])\n",
"\n",
"print(df.iloc[[3, 4], [0, 2]])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Работа с данными - отбор и группировка"
]
},
{
"cell_type": "code",
2024-10-22 18:54:39 +04:00
"execution_count": 5,
2024-09-21 09:46:46 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Population 2020 Yearly Change Net Change \\\n",
"Country (or dependency) \n",
"China 1439323776 0.39 5540090 \n",
"India 1380004385 0.99 13586631 \n",
"United States 331002651 0.59 1937734 \n",
"Indonesia 273523615 1.07 2898047 \n",
"Pakistan 220892340 2.00 4327022 \n",
"... ... ... ... \n",
"Montserrat 4992 0.06 3 \n",
"Falkland Islands 3480 3.05 103 \n",
"Niue 1626 0.68 11 \n",
"Tokelau 1357 1.27 17 \n",
"Holy See 801 0.25 2 \n",
"\n",
" Capital Continent \n",
"Country (or dependency) \n",
"China Beijing Asia \n",
"India New Delhi Asia \n",
"United States Washington, D.C. North America \n",
"Indonesia Jakarta Asia \n",
"Pakistan Islamabad Asia \n",
"... ... ... \n",
"Montserrat Brades North America \n",
"Falkland Islands Stanley South America \n",
"Niue Alofi Oceania \n",
"Tokelau Nukunonu Oceania \n",
"Holy See NaN NaN \n",
"\n",
"[235 rows x 5 columns]\n"
]
}
],
"source": [
"# s_values = df[\"Sex\"].unique()\n",
"# print(s_values)\n",
"df2 = pd.read_csv(\n",
" \"data/countries-continents-capitals.csv\", index_col=\"Country/Territory\",\n",
" encoding = \"ISO-8859-1\"\n",
")\n",
"\n",
"\n",
"# for s_value in s_values:\n",
"\n",
"\n",
"# count = df[df[\"Sex\"] == s_value].shape[0]\n",
"\n",
"\n",
"# s_total += count\n",
"\n",
"\n",
"# print(s_value, \"count =\", count)\n",
"\n",
"\n",
"# print(\"Total count = \", s_total)\n",
"\n",
"extended_df = cleared_df.set_index(\"Country (or dependency)\").join(\n",
" df2\n",
")\n",
"print(extended_df)\n",
"\n",
"\n",
"# print(extended_df.groupby([\"Continent\"]).agg({\"population\" : [\"sum\"]}))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Визуализация - Исходные данные"
]
},
{
"cell_type": "code",
2024-10-22 18:54:39 +04:00
"execution_count": 6,
2024-09-21 09:46:46 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Population 2020 Yearly Change Continent\n",
"Country (or dependency) \n",
"China 1439323776 0.39 Asia\n",
"India 1380004385 0.99 Asia\n",
"United States 331002651 0.59 North America\n",
"Indonesia 273523615 1.07 Asia\n",
"Pakistan 220892340 2.00 Asia\n",
"... ... ... ...\n",
"Montserrat 4992 0.06 North America\n",
"Falkland Islands 3480 3.05 South America\n",
"Niue 1626 0.68 Oceania\n",
"Tokelau 1357 1.27 Oceania\n",
"Holy See 801 0.25 NaN\n",
"\n",
"[235 rows x 3 columns]\n"
]
}
],
"source": [
"data = extended_df[[\"Population 2020\", \"Yearly Change\", \"Continent\"]].copy()\n",
"data.dropna(subset=[\"Population 2020\"], inplace=True)\n",
"print(data)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Визуализация - Сводка пяти чисел\n",
"\n",
"<img src=\"assets/quantile.png\" width=\"400\" style=\"background-color: white\">"
]
},
{
"cell_type": "code",
2024-10-22 18:54:39 +04:00
"execution_count": 7,
2024-09-21 09:46:46 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Population 2020 \\\n",
" min q1 q2 median \n",
"Continent \n",
"Africa 98347 2509845.75 13042506.5 13042506.5 \n",
"Asia 437479 5985985.50 18138682.5 18138682.5 \n",
"Europe 33691 1326535.00 5459642.0 5459642.0 \n",
"North America 4992 67288.00 395436.0 395436.0 \n",
"Oceania 1357 27368.25 144112.0 144112.0 \n",
"South America 3480 1458346.50 14658037.5 14658037.5 \n",
"\n",
" \n",
" q3 max \n",
"Continent \n",
"Africa 31118563.75 206139589 \n",
"Asia 52054338.75 1439323776 \n",
"Europe 10423054.00 145934462 \n",
"North America 6589966.75 331002651 \n",
"Oceania 488471.75 25499884 \n",
"South America 31837875.50 212559417 \n",
" Population 2020 \n",
" low_iqr iqr high_iqr\n",
"Continent \n",
"Africa 0 28608718.00 7.403164e+07\n",
"Asia 0 46068353.25 1.211569e+08\n",
"Europe 0 9096519.00 2.406783e+07\n",
"North America 0 6522678.75 1.637398e+07\n",
"Oceania 0 461103.50 1.180127e+06\n",
"South America 0 30379529.00 7.740717e+07\n"
]
},
{
"data": {
"text/plain": [
"<Axes: title={'center': 'Population 2020'}, xlabel='Continent'>"
]
},
2024-10-22 18:54:39 +04:00
"execution_count": 7,
2024-09-21 09:46:46 +04:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjwAAAHNCAYAAAAaKaG7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABrVklEQVR4nO3de1zO9/8/8MfVVV2dRHQmlUNIUTKJkZAYTcPmNGJjNmxIQzbHbc6nfT9OHxtyZk7ZMLQmchpGm7OisKkckw5yVa/fH37X++PS8aor1eVxv926bdfr/Xq/3q/363rX++n1er3fL5kQQoCIiIhIh+lVdAWIiIiIyhsDHiIiItJ5DHiIiIhI5zHgISIiIp3HgIeIiIh0HgMeIiIi0nkMeIiIiEjnMeAhIiIinceAh4iIiHQeAx6iciSTyTB9+vSKrobOi46OhkwmQ3R0dJH5pk+fDplMhgcPHryeilUyiYmJkMlkCA8Pr+iqEL12DHioSgoPD4dMJlP7sba2hp+fH3799deKrl6ZXb58GdOnT0diYmJFV4XKYPfu3ejWrRssLS1haGgIe3t7fPDBB/j999/L9bibN2/GkiVLyvUY2sDrnF4n/YquAFFZzJw5E87OzhBCICUlBeHh4XjnnXfwyy+/oEePHhVdvVK7fPkyZsyYgQ4dOsDJyamiq0MaEkLgo48+Qnh4ODw9PRESEgJbW1skJSVh9+7d6NSpE44fP442bdqUy/E3b96MixcvYuzYsWrpjo6OyMrKgoGBQbkcV1O8zul1YsBDVVq3bt3QsmVL6fPHH38MGxsbbNmypUoHPK9TTk4O8vLyYGhoWNFV0RkLFy5EeHg4xo4di0WLFkEmk0nbvvrqK2zYsAH6+q//z69MJoORkdFrPy5RZcAhLdIpNWrUgLGxcb6bSUZGBsaPHw8HBwcoFAo0atQICxYsgBACAJCVlYXGjRujcePGyMrKkvZ79OgR7Ozs0KZNG+Tm5gIAhgwZAjMzM9y8eRMBAQEwNTWFvb09Zs6cKZVXlPPnz6Nbt24wNzeHmZkZOnXqhFOnTknbw8PD8f777wMA/Pz8pCG74uanbN++Ha6urjAyMoKbmxt2796NIUOGqP3LWTWHY8GCBViyZAnq168PhUKBy5cvAwB+//13tGvXDqampqhRowZ69uyJK1euqB3n1TJVVPNjXiaTyTB69Ghs2rQJjRo1gpGREby8vHD06NF8+//777/46KOPYGNjA4VCgaZNm2LNmjX58v3zzz8ICgqCqakprK2tMW7cOGRnZxfZNq968OABPvjgA5ibm6NWrVoYM2YMnj17Jm339fVF8+bNC9y3UaNGCAgIKLTsrKwszJ49G40bN8aCBQvytQkADBo0CK1atZI+37x5E++//z5q1qwJExMTtG7dGvv27VPbRzVP6aeffsJ3332HOnXqwMjICJ06dUJ8fLyUr0OHDti3bx9u3bolXTuq76ugOTyq6/nff/9FUFAQzMzMYGVlhdDQUOmaV8nLy8OSJUvQtGlTGBkZwcbGBiNGjMDjx4/V8jk5OaFHjx44duwYWrVqBSMjI9SrVw/r16+X8pT2OicqNUFUBa1du1YAEL/99pu4f/++uHfvnrh48aIYMWKE0NPTE4cOHZLy5uXliY4dOwqZTCaGDRsmli5dKgIDAwUAMXbsWCnfqVOnhFwuF+PGjZPS+vXrJ4yNjcW1a9ektODgYGFkZCQaNmwoBg0aJJYuXSp69OghAIgpU6ao1ROAmDZtmvT54sWLwtTUVNjZ2YlvvvlGzJkzRzg7OwuFQiFOnTolhBDixo0b4osvvhAAxOTJk8WGDRvEhg0bRHJycqHtsXfvXiGTyUSzZs3EokWLxJQpU4SFhYVwc3MTjo6OUr6EhAQBQLi6uop69eqJOXPmiMWLF4tbt26JyMhIoa+vL1xcXMS8efPEjBkzhKWlpbCwsBAJCQlq5/9ymSrTpk0Tr/5JASDc3NyEpaWlmDlzppg7d65wdHQUxsbG4sKFC1K+5ORkUadOHeHg4CBmzpwpVqxYId59910BQCxevFjKl5mZKVxcXISRkZGYMGGCWLJkifDy8hLNmjUTAMThw4cLbaOX6+ju7i4CAwPF0qVLxYcffigAiEGDBkn5fvjhBwFArY5CCHH69GkBQKxfv77QYxw6dEgAEDNnziyyLi+fu42NjahWrZr46quvxKJFi0Tz5s2Fnp6e2LVrl5Tv8OHDAoDw9PQUXl5eYvHixWL69OnCxMREtGrVSu34Hh4ewtLSUrp2du/eLYT43/e/du1aKb/qem7atKn46KOPxIoVK0Tv3r0FALF8+XK1ug4bNkzo6+uL4cOHi5UrV4qJEycKU1NT8dZbb4nnz59L+RwdHUWjRo2EjY2NmDx5sli6dKlo0aKFkMlk4uLFi0KI0l3nRGXBgIeqJFXA8+qPQqEQ4eHhankjIiIEAPHtt9+qpffp00fIZDIRHx8vpYWFhQk9PT1x9OhRsX37dgFALFmyRG2/4OBgAUB8/vnnUlpeXp7o3r27MDQ0FPfv35fSXw14goKChKGhobhx44aUdvfuXVGtWjXRvn17KU117OJu4Cru7u6iTp064unTp1JadHS0AFBgwGNubi7u3bunVoaHh4ewtrYWDx8+lNL++usvoaenJwYPHqx2/poEPADE2bNnpbRbt24JIyMj8d5770lpH3/8sbCzsxMPHjxQ279fv36ievXqIjMzUwghxJIlSwQA8dNPP0l5MjIyRIMGDTQKeN5991219JEjRwoA4q+//hJCCJGamiqMjIzExIkT1fJ98cUXwtTUVKSnpxd6jO+//14AkIKM4owdO1YAEDExMVLa06dPhbOzs3BychK5ublCiP8FPE2aNBHZ2dn5jvdycNa9e/cCv6PCAp6CAjRVYKUSExMjAIhNmzap5Ttw4EC+dEdHRwFAHD16VEq7d++eUCgUYvz48VKaptc5UVlwSIuqtGXLliEyMhKRkZHYuHEj/Pz8MGzYMOzatUvKs3//fsjlcnzxxRdq+44fPx5CCLWnuqZPn46mTZsiODgYI0eOhK+vb779VEaPHi39v2ro5vnz5/jtt98KzJ+bm4tDhw4hKCgI9erVk9Lt7OwwYMAAHDt2DGlpaRq3wd27d3HhwgUMHjwYZmZmUrqvry/c3d0L3Kd3796wsrKSPiclJSE2NhZDhgxBzZo1pfRmzZrB398f+/fv17heKj4+PvDy8pI+161bFz179sTBgweRm5sLIQR27tyJwMBACCHw4MED6ScgIABPnjzBuXPnALz4Lu3s7NCnTx+pPBMTE3zyySca1WnUqFFqnz///HOpfACoXr06evbsiS1btkjDlLm5udi2bZs0nFYY1XdYrVq1EtVl//79aNWqFd5++20pzczMDJ988gkSExOl4UaVoUOHqs23ateuHYAXw2Jl8emnn6p9bteunVqZ27dvR/Xq1eHv76/2HXl5ecHMzAyHDx9W29/V1VWqGwBYWVmhUaNGZa4nUWkx4KEqrVWrVujcuTM6d+6MgQMHYt++fXB1dZWCDwC4desW7O3t892AmjRpIm1XMTQ0xJo1a5CQkICnT59i7dq1Bc7B0NPTUwtaAMDFxQUACn3E9v79+8jMzESjRo3ybWvSpAny8vJw586dkp/8/6eqf4MGDfJtKygNAJydnQsso7C6PXjwABkZGRrXDQAaNmyYL83FxQWZmZm4f/8+7t+/j9TUVKxatQpWVlZqP0OHDgUA3Lt3T6pngwYN8n0nBdVbkzrVr18fenp6at/d4MGDcfv2bcTExAAAfvvtN6SkpGDQoEFFlm1ubg4AePr0aYnqcuvWrULbXbX9ZXXr1lX7bGFhAQD55tFowsjISC0AVpX7cplxcXF48uQJrK2t831P6enp0ndUWD0LKpPodeJTWqRT9PT04Ofnh++//x5xcXFo2rSpxmUcPHgQAPDs2TPExcXlCw50gbGxcan3LSgABJBvgmtJ5eXlAQA+/PBDBAcHF5inWbNmpSq7pAo6p4CAANjY2GDjxo1o3749Nm7cCFtbW3Tu3LnIsho3bgwAuHDhAoKCgrR
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"def q1(x):\n",
" return x.quantile(0.25)\n",
"\n",
"\n",
"# median = quantile(0.5)\n",
"def q2(x):\n",
" return x.quantile(0.5)\n",
"\n",
"\n",
"def q3(x):\n",
" return x.quantile(0.75)\n",
"\n",
"\n",
"def iqr(x):\n",
" return q3(x) - q1(x)\n",
"\n",
"\n",
"def low_iqr(x):\n",
" return max(0, q1(x) - 1.5 * iqr(x))\n",
"\n",
"\n",
"def high_iqr(x):\n",
" return q3(x) + 1.5 * iqr(x)\n",
"\n",
"\n",
"quantiles = (\n",
" data[[\"Continent\", \"Population 2020\"]]\n",
" .groupby([\"Continent\"])\n",
" .aggregate([\"min\", q1, q2, \"median\", q3, \"max\"])\n",
")\n",
"print(quantiles)\n",
"\n",
"iqrs = (\n",
" data[[\"Continent\", \"Population 2020\"]]\n",
" .groupby([\"Continent\"])\n",
" .aggregate([low_iqr, iqr, high_iqr])\n",
")\n",
"print(iqrs)\n",
"\n",
"data.boxplot(column=\"Population 2020\", by=\"Continent\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Визуализация - Гистограмма"
]
},
{
"cell_type": "code",
2024-10-22 18:54:39 +04:00
"execution_count": 8,
2024-09-21 09:46:46 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: ylabel='Frequency'>"
]
},
2024-10-22 18:54:39 +04:00
"execution_count": 8,
2024-09-21 09:46:46 +04:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGvCAYAAAC9yRSTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA4D0lEQVR4nO3de1yUZf7/8fcgB/EAiApIgZJ5TFPzFKklSuEhV9PdNM3TWq6bmIpWumVkWqipmaayta7o5qEs9WtaluFpS7TEQ+kaah6wdNAyQHBFhPv3hw/nt5OHYBiY4fb1fDzux6O57muu+VzCMu+95rrnthiGYQgAAMCkPFxdAAAAQGki7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFPzdHUB7qCwsFCnT59W1apVZbFYXF0OAAAoAsMwdOHCBYWGhsrD4+brN4QdSadPn1ZYWJirywAAAA44deqU7rzzzpueJ+xIqlq1qqSr/1h+fn4urgYAABRFdna2wsLCbO/jN0PYkWwfXfn5+RF2AAAoZ35vCwoblAEAgKkRdgAAgKnxMRYA4JYKCgqUn5/v6jJwG/Ly8lKFChVKPA5hBwBwQ4ZhyGq1KjMz09Wl4DYWEBCgkJCQEn01DGEHAHBD14JOUFCQKlWqxPeQoUwZhqGLFy/q7NmzkqRatWo5PBZhBwBwnYKCAlvQqV69uqvLwW3K19dXknT27FkFBQU5/JEWG5QBANe5tkenUqVKLq4Et7trv4Ml2TdG2AEA3BQfXcHVnPE7SNgBAACmRtgBAKAUdOzYUWPGjHGbcW5nbFAGABRLnQkbyvT1TkzrXqz+Q4YM0ZIlSyRd/Z6W8PBwDRo0SH/729/k6em+b3tbt25VVFSUfv31VwUEBNjaV69eLS8vr1J97f3792vatGn68ssv9fPPP6tOnToaMWKERo8efV2NcXFxOnjwoMLCwvTSSy9pyJAhtvMJCQlavXq1vv/+e/n6+uqBBx7Q9OnT1aBBA1ufS5cuady4cVq5cqXy8vIUExOjBQsWKDg4uNTmx8oOAMB0unTpojNnzujIkSMaN26cXnnlFb3xxhuuLsshgYGBv3ujy5JKTU1VUFCQ3nvvPR08eFAvvviiJk6cqLffftvW5/jx4+revbuioqK0b98+jRkzRk899ZQ+++wzW59t27Zp5MiR2rlzpzZt2qT8/Hw98sgjys3NtfUZO3asPv74Y61atUrbtm3T6dOn1bt371KdH2EHAGA6Pj4+CgkJUe3atfXXv/5V0dHRWrdunSTp119/1aBBg1StWjVVqlRJXbt21ZEjR2zPTUpKUkBAgNauXat69eqpYsWKiomJ0alTp2x9hgwZol69etm95pgxY9SxY8eb1vSvf/1LrVq1UtWqVRUSEqL+/fvbvkPmxIkTioqKkiRVq1ZNFovFtmLy24+xilr/Z599pkaNGqlKlSq28Hczf/7zn/XWW2/poYce0l133aUnn3xSQ4cO1erVq219EhMTFRERoVmzZqlRo0aKjY3VH//4R7355pu2Phs3btSQIUN0zz33qFmzZkpKSlJ6erpSU1MlSVlZWVq0aJFmz56tTp06qWXLllq8eLF27NihnTt33rS+kiLsAABMz9fXV5cvX5Z0Najs3r1b69atU0pKigzDULdu3ewubb548aJee+01LV26VF999ZUyMzPVr1+/EtWQn5+vKVOmaP/+/Vq7dq1OnDhhCzRhYWH66KOPJElpaWk6c+aM3nrrrRuOU9T6Z86cqX/961/avn270tPTNX78+GLVm5WVpcDAQNvjlJQURUdH2/WJiYlRSkrKLceQZBsnNTVV+fn5duM0bNhQ4eHhtxynpNz3w0uTuNln28X9DBoAUHyGYSg5OVmfffaZRo0apSNHjmjdunX66quv9MADD0iSli1bprCwMK1du1Z/+tOfJF0NJm+//bbatm0rSVqyZIkaNWqkr7/+Wm3atHGolj//+c+2/77rrrs0d+5ctW7dWjk5OapSpYotEAQFBdnt2flfxak/MTFRdevWlSTFxsbq1VdfLXKtO3bs0Pvvv68NG/7/e5jVar1uX01wcLCys7P13//+1/YFgNcUFhZqzJgxateunZo0aWIbw9vb+7r5BQcHy2q1Frm+4mJlBwBgOuvXr1eVKlVUsWJFde3aVX379tUrr7yiQ4cOydPT0xZiJKl69epq0KCBDh06ZGvz9PRU69atbY8bNmyogIAAuz7FlZqaqh49eig8PFxVq1bVQw89JElKT08v8hhFrb9SpUq2oCNdvdXCtY/Mfs+BAwfUs2dPxcfH65FHHilybb81cuRIHThwQCtXrnR4DGdhZQcAYDpRUVFauHChvL29FRoa6vSrsDw8PGQYhl3brb7hNzc3VzExMYqJidGyZctUs2ZNpaenKyYmxvbxmjP99uoti8VyXb038p///EedO3fW8OHD9dJLL9mdCwkJUUZGhl1bRkaG/Pz8rlvViY2N1fr167V9+3bdeeeddmNcvnxZmZmZdqs7GRkZCgkJKer0io2VHQCA6VSuXFl33323wsPD7YJOo0aNdOXKFe3atcvW9ssvvygtLU2NGze2tV25ckW7d++2PU5LS1NmZqYaNWokSapZs+Z1G3737dt303q+//57/fLLL5o2bZo6dOighg0bXrfS4u3tLenqfclupqj1O+LgwYOKiorS4MGD9dprr113PjIyUsnJyXZtmzZtUmRkpO2xYRiKjY3VmjVrtHnzZkVERNj1b9mypby8vOzGSUtLU3p6ut04zkbYAQDcNurVq6eePXvq6aef1pdffqn9+/frySef1B133KGePXva+nl5eWnUqFHatWuXUlNTNWTIEN1///22/TqdOnXS7t27tXTpUh05ckTx8fE6cODATV83PDxc3t7emjdvno4dO6Z169ZpypQpdn1q164ti8Wi9evX69y5c8rJyXG4/uI6cOCAoqKi9MgjjyguLk5Wq1VWq1Xnzp2z9RkxYoSOHTum559/Xt9//70WLFigDz74QGPHjrX1GTlypN577z0tX75cVatWtY3z3//+V5Lk7++vYcOGKS4uTlu2bFFqaqqGDh2qyMhI3X///Q7X/3sIOwCA28rixYvVsmVLPfroo4qMjJRhGPrkk0/sPvqpVKmSXnjhBfXv31/t2rVTlSpV9P7779vOx8TEaNKkSXr++efVunVrXbhwQYMGDbrpa9asWVNJSUlatWqVGjdurGnTpmnmzJl2fe644w5NnjxZEyZMUHBwsGJjYx2uv7g+/PBDnTt3Tu+9955q1aplO/5331JERIQ2bNigTZs2qVmzZpo1a5b+8Y9/KCYmxtZn4cKFysrKUseOHe3G+d9/uzfffFOPPvqo+vTpowcffFAhISF2l7iXBotRlA/xTC47O1v+/v7KysqSn5+fU8fmaiwA5dGlS5d0/PhxRUREqGLFiq4up0wlJSVpzJgxyszMdHUp0K1/F4v6/s3KDgAAMDXCDgAAMDXCDgAA/2PIkCF8hGUyhB0AAGBqhB0AAGBqhB0AwE1xwS5czRm/g4QdAMB1rn1ny8WLF11cCW53134HS/I9QtwbCwBwnQoVKiggIMB2S4NKlSrJYrG4uCrcTgzD0MWLF3X27FkFBASoQoUKDo9F2AEA3NC1GzMW9W7ZQGkICAgo8U1CCTsAgBuyWCyqVauWgoKCbnlHb6C0eHl5lWhF5xqXhp3t27frjTfeUGpqqs6cOaM1a9aoV69edn0OHTqkF154Qdu2bdOVK1fUuHFjffTRRwoPD5d09Wukx40bp5UrVyovL08xMTFasGCBgoODXTAjADCfChUqOOUNB3AVl25Qzs3NVbNmzTR//vwbnv/hhx/Uvn17NWzYUFu3btW3336rSZMm2d0bY+zYsfr444+1atUqbdu2TadPn1bv3r3LagoAAMDNuXRlp2vXruratetNz7/44ovq1q2bZsyYYWurW7eu7b+zsrK0aNEiLV++XJ06dZJ09W6wjRo10s6dO0v1dvEAAKB8cNtLzwsLC7VhwwbVr19fMTExCgoKUtu2bbV27Vp
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data.plot.hist(column=[\"Population 2020\"], bins=80)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Визуализация - Точечная диаграмма"
]
},
{
"cell_type": "code",
2024-10-22 18:54:39 +04:00
"execution_count": 9,
2024-09-21 09:46:46 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Country (or dependency) Population 2020 Yearly Change Net Change\n",
"no \n",
"1 China 1439323776 0.39 5540090\n",
"2 India 1380004385 0.99 13586631\n",
"3 United States 331002651 0.59 1937734\n",
"4 Indonesia 273523615 1.07 2898047\n",
"5 Pakistan 220892340 2.00 4327022\n",
".. ... ... ... ...\n",
"231 Montserrat 4992 0.06 3\n",
"232 Falkland Islands 3480 3.05 103\n",
"233 Niue 1626 0.68 11\n",
"234 Tokelau 1357 1.27 17\n",
"235 Holy See 801 0.25 2\n",
"\n",
"[235 rows x 4 columns]\n"
]
},
{
"data": {
"text/plain": [
"<Axes: xlabel='Country (or dependency)', ylabel='Population 2020'>"
]
},
2024-10-22 18:54:39 +04:00
"execution_count": 9,
2024-09-21 09:46:46 +04:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAj0AAAHACAYAAABJddlbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABFRklEQVR4nO3deVxWZf7/8fcNCogIgigukTvmimjpqLlTmBNp0+JXHUWzGsutzCmdcm0hM/c0p2VcmkatXGq0tELNJZdcUDNUXDEFBTcEFQ2u3x/9vMc7UW/kxhs8r+fjcT8e3te5zjmfcw5wvz3nOue2GWOMAAAA7nAe7i4AAADgdiD0AAAASyD0AAAASyD0AAAASyD0AAAASyD0AAAASyD0AAAASyD0AAAASyD0AAAASyD0AAAAS7B06Fm9erWio6NVsWJF2Ww2LV68OM/L+Oyzz9SwYUP5+vqqcuXKGjdunOsLBQAA+Wbp0JOZmanw8HBNmzbtlub/5ptv1L17d/Xt21c///yzpk+frokTJ+q9995zcaUAACC/bHzh6O9sNpsWLVqkzp0729uysrL06quvau7cuTpz5ozq1aunsWPHqk2bNpKkbt266fLly/r888/t80ydOlXvvPOOkpKSZLPZbvNWAACA67H0mZ6b6d+/v9avX6958+Zpx44deuKJJ9ShQwclJiZK+j0U+fj4OMxTokQJ/frrrzp8+LA7SgYAANdB6LmOpKQkzZw5U59//rlatmyp6tWra8iQIbr//vs1c+ZMSVJUVJQWLlyouLg45eTkaO/evRo/frwkKTk52Z3lAwCAPyjm7gIKq507dyo7O1thYWEO7VlZWSpTpowk6ZlnntH+/fv18MMP6/Lly/L399egQYM0atQoeXiQJwEAKEwIPdeRkZEhT09PbdmyRZ6eng7T/Pz8JP0+Dmjs2LF66623lJKSorJlyyouLk6SVK1atdteMwAAuD5Cz3VEREQoOztbJ06cUMuWLW/Y19PTU5UqVZIkzZ07V82aNVPZsmVvR5kAAMBJlg49GRkZ2rdvn/39wYMHFR8fr6CgIIWFhal79+7q2bOnxo8fr4iICKWmpiouLk4NGjTQn//8Z6WlpemLL75QmzZtdPHiRfsYoB9++MGNWwUAAHJj6VvWV61apbZt217THhMTo1mzZuny5ct64403NGfOHB09elTBwcH605/+pNGjR6t+/fpKS0tTdHS0du7cKWOMmjVrpjfffFNNmzZ1w9YAAIAbsXToAQAA1sEtRgAAwBIIPQAAwBIsN5A5JydHx44dU6lSpfiaCAAAighjjM6dO6eKFSve8rPwLBd6jh07ptDQUHeXAQAAbsGRI0d011133dK8lgs9pUqVkvT7TvP393dzNQAAwBnp6ekKDQ21f47fCsuFniuXtPz9/Qk9AAAUMfkZmsJAZgAAYAmEHgAAYAmEHgAAYAmEHgAAYAmEHgAAYAmEHgAAYAmEHgAAYAmEHgAAYAmEHgAAYAmEHgAAYAmW+xqKgnQgNUOHT51XlTIlVTW4pLvLAQAAVyH0uMCZ85c0cG68Viem2tta1SyrqV0jFOBb3I2VAQCAK7i85QID58Zr3b40h7Z1+9I0YO42N1UEAAD+iNCTTwdSM7Q6MVXZxji0Zxuj1YmpOpiW6abKAADA1Qg9+XT41PkbTj90ktADAEBhQOjJp8pBvjecXqUMA5oBACgMCD35VK2sn1rVLCtPm82h3dNmU6uaZbmLCwCAQoLQ4wJTu0aoRY1gh7YWNYI1tWuEmyoCAAB/xC3rLhDgW1xz+jTRwbRMHTqZyXN6AAAohAg9LlQ1mLADAEBh5dbLW6tXr1Z0dLQqVqwom82mxYsXOz3vunXrVKxYMTVs2LDA6gMAAHcOt4aezMxMhYeHa9q0aXma78yZM+rZs6fat29fQJUBAIA7jVsvbz300EN66KGH8jxf37591a1bN3l6eubp7BAAALCuInf31syZM3XgwAGNHDnS3aUAAIAipEgNZE5MTNTQoUO1Zs0aFSvmXOlZWVnKysqyv09PTy+o8gAAQCFWZM70ZGdnq1u3bho9erTCwsKcni82NlYBAQH2V2hoaAFWCQAACiubMX/4pkw3sdlsWrRokTp37pzr9DNnzigwMFCenp72tpycHBlj5OnpqW+//Vbt2rW7Zr7czvSEhobq7Nmz8vf3d/l2oGg7kJqhw6fO86wlAChk0tPTFRAQkK/P7yJzecvf3187d+50aJs+fbpWrFihL774QlWrVs11Pm9vb3l7e9+OElGEnTl/SQPnxmt1Yqq9rVXNspraNUIBvsXdWBkAwFXcGnoyMjK0b98++/uDBw8qPj5eQUFBuvvuuzVs2DAdPXpUc+bMkYeHh+rVq+cwf7ly5eTj43NNO5BXA+fGa92+NIe2dfvSNGDuNs3p08RNVQEAXMmtY3o2b96siIgIRUT8/h1VgwcPVkREhEaMGCFJSk5OVlJSkjtLhAUcSM3Q6sRUZf/hSm+2MVqdmKqDaZluqgwA4EqFZkzP7eKKa4K4s6zcc0K9Z/503ekze9+ntrXK3caKAAB/5IrP7yJz9xZQUCoH+d5wepUyDGgGgDsBoQeWV62sn1rVLCtPm82h3dNmU6uaZbmLCwDuEIQeQNLUrhFqUSPYoa1FjWBN7RrhpooAAK5WZG5ZBwpSgG9xzenTRAfTMnXoZCbP6QGAOxChB7hK1WDCDgDcqbi8BQAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALIHQAwAALMGtoWf16tWKjo5WxYoVZbPZtHjx4hv2X7hwoR544AGVLVtW/v7+atasmZYvX357igUAAEWaW0NPZmamwsPDNW3aNKf6r169Wg888IC+/vprbdmyRW3btlV0dLS2bdtWwJUCAICizmaMMe4uQpJsNpsWLVqkzp0752m+unXrqkuXLhoxYoRT/dPT0xUQEKCzZ8/K39//FioFAAC3mys+v4v0mJ6cnBydO3dOQUFB7i4FAAAUcsXcXUB+vPvuu8rIyNCTTz553T5ZWVnKysqyv09PT78dpQEAgEKmyJ7p+c9//qPRo0frs88+U7ly5a7bLzY2VgEBAfZXaGjobawSAAAUFkUy9MybN09PP/20PvvsM0VGRt6w77Bhw3T27Fn768iRI7epSgAAUJgUuctbc+fO1VNPPaV58+bpz3/+8037e3t7y9vb+zZUBgAACjO3hp6MjAzt27fP/v7gwYOKj49XUFCQ7r77bg0bNkxHjx7VnDlzJP1+SSsmJkaTJ09W06ZNlZKSIkkqUaKEAgIC3LINAACgaHDr5a3NmzcrIiJCERERkqTBgwcrIiLCfvt5cnKykpKS7P0/+OAD/fbbb+rXr58qVKhgfw0aNMgt9QMAgKKj0Dyn53bhOT0AABQ9ln9ODwAAgLMIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBIIPQAAwBKK5XWGTZs2af369UpJSZEklS9fXs2aNVOTJk1cXhwAAICrOB16Tpw4occee0zr1q3T3XffrZC
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"print(cleared_df)\n",
"cleared_df.head(5).plot.scatter(x=\"Country (or dependency)\", y=\"Population 2020\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Визуализация - Столбчатая диаграмма"
]
},
{
"cell_type": "code",
2024-10-22 18:54:39 +04:00
"execution_count": 10,
2024-09-21 09:46:46 +04:00
"metadata": {},
"outputs": [],
"source": [
"# plot = data.groupby([\"Pclass\", \"Survived\"]).size().unstack().plot.bar(color=[\"pink\", \"green\"])\n",
"# plot.legend([\"Not survived\", \"Survived\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Визуализация - Временные ряды"
]
},
{
"cell_type": "code",
2024-10-22 18:54:39 +04:00
"execution_count": 11,
2024-09-21 09:46:46 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Year Population Yearly % Yearly Median Fertility Density\n",
"0 2020 7,794,798,739 1.10% 83,000,320 31 2.47 52\n",
"1 2025 8,184,437,460 0.98% 77,927,744 32 2.54 55\n",
"2 2030 8,548,487,400 0.87% 72,809,988 33 2.62 57\n",
"3 2035 8,887,524,213 0.78% 67,807,363 34 2.70 60\n",
"4 2040 9,198,847,240 0.69% 62,264,605 35 2.77 62\n",
"5 2045 9,481,803,274 0.61% 56,591,207 35 2.85 64\n",
"6 2050 9,735,033,990 0.53% 50,646,143 36 2.95 65\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 7 entries, 0 to 6\n",
"Data columns (total 7 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Year 7 non-null int64 \n",
" 1 Population 7 non-null object \n",
" 2 Yearly % 7 non-null object \n",
" 3 Yearly 7 non-null object \n",
" 4 Median 7 non-null int64 \n",
" 5 Fertility 7 non-null float64\n",
" 6 Density 7 non-null int64 \n",
"dtypes: float64(1), int64(3), object(3)\n",
"memory usage: 524.0+ bytes\n",
"['Year' 'Population' 'Yearly %' 'Yearly' 'Median' 'Fertility' 'Density']\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiwAAAHACAYAAACBGTONAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABYcklEQVR4nO3deXQUVd7G8W+ns4csAoEkEMKeQNi3COIIYzAwwKDiAqKyiKMDLsiAwIxsIiLqKAgKjrKjIIgCyghKEBwEZV/CHgQTIBDWrGTrrvcPXnsmJkAaknQneT7n9Dl09a3Kr4oi/VB17y2TYRgGIiIiIk7MxdEFiIiIiNyMAouIiIg4PQUWERERcXoKLCIiIuL0FFhERETE6SmwiIiIiNNTYBERERGnp8AiIiIiTk+BRURERJyeAouIiIg4vXIXWH744Qd69uxJSEgIJpOJlStX2r2NZcuW0aJFC7y9vQkLC+Ott94q/kJFRESkyMpdYMnIyKB58+a8//77t7T+N998Q79+/Xj22WeJi4vjgw8+4N1332XmzJnFXKmIiIgUlak8P/zQZDLx5Zdfcv/999uWZWdn849//IMlS5Zw5coVmjRpwtSpU+nUqRMAjz32GLm5uSxfvty2zowZM3jzzTdJSEjAZDKV8l6IiIhIubvCcjPPPfccW7duZenSpezbt4+HH36Yrl27cuzYMeBaoPH09My3jpeXF6dOneLXX391RMkiIiIVXoUKLAkJCcybN4/ly5dz9913U69ePUaMGEHHjh2ZN28eADExMXzxxRfExsZitVo5evQo//znPwFISkpyZPkiIiIVlqujCyhN+/fvx2Kx0LBhw3zLs7OzqVKlCgBPP/00x48fp0ePHuTm5uLn58eLL77IhAkTcHGpUPlORETEaVSowJKeno7ZbGbnzp2YzeZ8n1WqVAm41u9l6tSpvP7665w9e5bAwEBiY2MBqFu3bqnXLCIiIhUssLRs2RKLxUJycjJ33333DduazWZq1KgBwJIlS2jfvj2BgYGlUaaIiIj8TrkLLOnp6cTHx9venzhxgj179lC5cmUaNmxIv379ePLJJ/nnP/9Jy5YtOX/+PLGxsTRr1ozu3btz4cIFPv/8czp16kRWVpatz8umTZscuFciIiIVW7kb1rxx40Y6d+5cYHn//v2ZP38+ubm5vPbaayxcuJDTp09TtWpV7rzzTiZOnEjTpk25cOECPXv2ZP/+/RiGQfv27Zk8eTJRUVEO2BsRERGBchhYREREpPzRsBcRERFxegosIiIi4vTKRadbq9XKmTNn8PX11dT5IiIiZYRhGKSlpRESEnLTuc7KRWA5c+YMoaGhji5DREREbkFiYiI1a9a8YZtyEVh8fX2Bazvs5+fn4GpERESkKFJTUwkNDbV9j99IuQgsv90G8vPzU2AREREpY4rSnUOdbkVERMTpKbCIiIiI01NgEREREadXLvqwFJXFYiE3N9fRZUgJcXNzK/AUbhERKR8qRGAxDIOzZ89y5coVR5ciJSwgIICgoCDNxyMiUs5UiMDyW1ipVq0a3t7e+jIrhwzDIDMzk+TkZACCg4MdXJGIiBSnch9YLBaLLaxUqVLF0eVICfLy8gIgOTmZatWq6faQiEg5Ynen27S0NIYNG0ZYWBheXl506NCB7du3X7f9gAEDMJlMBV6RkZG2NhMmTCjweURExK3t0e/81mfF29u7WLYnzu23v2f1VRIRKV/sDiyDBw/mu+++Y9GiRezfv5/77ruP6OhoTp8+XWj76dOnk5SUZHslJiZSuXJlHn744XztIiMj87XbvHnzre3Rdeg2UMWgv2cRkfLJrltCV69eZcWKFaxatYo//OEPwLWrI1999RWzZs3itddeK7COv78//v7+tvcrV67k8uXLDBw4MH8hrq4EBQXdyj6IiIhIOWfXFZa8vDwsFguenp75lnt5eRX5isicOXOIjo4mLCws3/Jjx44REhJC3bp16devHwkJCdfdRnZ2NqmpqfleUjI6derEsGHDnGY7IiJSMdkVWHx9fWnfvj2TJk3izJkzWCwWFi9ezNatW0lKSrrp+mfOnOGbb75h8ODB+ZZHRUUxf/581q5dy6xZszhx4gR33303aWlphW5nypQptis3/v7+5fZJzf/b/8fd3Z369evz6quvkpeX5+jSrmvjxo2YTKYCQ8i/+OILJk2a5JiiRESkzLO7D8uiRYswDIMaNWrg4eHBe++9R9++fXFxufmmFixYQEBAAPfff3++5d26dePhhx+mWbNmxMTE8O9//5srV66wbNmyQrczZswYUlJSbK/ExER7d6PM6Nq1K0lJSRw7doy//e1vTJgwgbfeesvRZdmtcuXKRXoap4iIOJ/k1Cz2nbri0BrsDiz16tVj06ZNpKenk5iYyLZt28jNzaVu3bo3XM8wDObOncsTTzyBu7v7DdsGBATQsGFD4uPjC/3cw8PD9mTm8v6EZg8PD4KCgggLC+Ovf/0r0dHRrF69msuXL/Pkk09yxx134O3tTbdu3Th27Jhtvfnz5xMQEMDKlStp0KABnp6exMTE5At3AwYMKBAehw0bRqdOna5bz6JFi2jTpg2+vr4EBQXx2GOP2eY+OXnyJJ07dwbgjjvuwGQyMWDAAKDgLaGi1r9u3ToaNWpEpUqVbOFNRERKR06elQ83Hafz2xt5fslusvMsDqvllp8l5OPjQ3BwMJcvX2bdunX06tXrhu03bdpEfHw8Tz311E23nZ6ezvHjx0ts8i/DMMjMyXPIyzCM26rdy8uLnJwcBgwYwI4dO1i9ejVbt27FMAz+9Kc/5RvOm5mZyeTJk1m4cCE//vgjV65coU+fPrf183Nzc5k0aRJ79+5l5cqVnDx50hZKQkNDWbFiBQBHjhwhKSmJ6dOnF7qdotb/9ttvs2jRIn744QcSEhIYMWLEbdUvIiJF8/3hZGKm/cCUbw6TkWMhwMuNC+k5DqvH7onj1q1bh2EYhIeHEx8fz8iRI4mIiLCN+hkzZgynT59m4cKF+dabM2cOUVFRNGnSpMA2R4wYQc+ePQkLC+PMmTOMHz8es9lM3759b3G3buxqroXG49aVyLZv5uCrMXi72z9fn2EYxMbGsm7dOrp168bKlSv58ccf6dChAwCffPIJoaGhrFy50jZkPDc3l5kzZxIVFQVcuyXXqFEjtm3bRrt27W6p/kGDBtn+XLduXd577z3atm1Leno6lSpVonLlygBUq1aNgICAQrdx7NgxVq9eXaT6Z8+eTb169QB47rnnePXVV2+pbhERKZpfzqcz6euDfH/kPABVK3kwqms4vVvVxMXFcVNH2P3NmZKSwpgxYzh16hSVK1emd+/eTJ48GTc3NwCSkpIKjPBJSUlhxYoV1/3f9qlTp+jbty8XL14kMDCQjh078tNPPxEYGHgLu1S+fP3111SqVInc3FysViuPPfYYDz74IF9//bUtiABUqVKF8PBwDh06ZFvm6upK27Ztbe8jIiIICAjg0KFDtxxYdu7cyYQJE9i7dy+XL1/GarUCkJCQQOPGjYu0jUOHDuHq6nrT+r29vW1hBa5Nt//b7ScRESleaVm5zNwQz9wfT5BrMXAzmxh4Vx2e/2N9fD3dHF2e/YHlkUce4ZFHHrnu5/Pnzy+wzN/fn8zMzOuus3TpUnvLuC1ebmYOvhpTqj/zf3+2PTp37sysWbNwd3cnJCQEV1dXVq9eXSy1uLi4FLhFdaMZYjMyMoiJiSEmJoZPPvmEwMBAEhISiImJISen+C8T/haCf2MymW77lpqIiORntRp8sfs0U9ce5nxaNgCdwgMZ26Mx9QIrObi6/yr3zxIqjMlkuqXbMo7g4+ND/fr18y1r1KgReXl5/Pzzz7ZbKhcvXuTIkSP5rnLk5eWxY8cO29WUI0eOcOXKFRo1agRAYGAgcXFx+ba9Z8+eAkHhN4cPH+bixYu88cYbtqHkO3bsyNfmtw7VFsv1O2YVtX4RESlZexKvMH71AfYmXgGgTlUfxvZoxB8jqju2sELccqdbcZwGDRrQq1cvnn76aTZv3szevXt5/PHHqVGjRr7Oz25ubjz//PP
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from datetime import datetime\n",
"import matplotlib.dates as md\n",
"\n",
"ts = pd.read_csv(\"data/world-population-forcast-2020-2050.csv\", encoding=\"ISO-8859-1\")\n",
"print(ts)\n",
"ts.iloc[:, 1] = ts.iloc[:, 1].apply(lambda row: int(\"\".join(str(row).split(\",\"))))\n",
"ts.info()\n",
"\n",
"print(ts.columns.values)\n",
"plot = ts.plot.line(x=\"Year\", y=\"Population\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}