738 lines
132 KiB
Plaintext
738 lines
132 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Работа с NumPy"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"matrix = \n",
|
||
" [[4 5 0]\n",
|
||
" [9 9 9]] \n",
|
||
"\n",
|
||
"tmatrix = \n",
|
||
" [[4 9]\n",
|
||
" [5 9]\n",
|
||
" [0 9]] \n",
|
||
"\n",
|
||
"vector = \n",
|
||
" [4 5 0 9 9 9] \n",
|
||
"\n",
|
||
"tvector = \n",
|
||
" [[4]\n",
|
||
" [5]\n",
|
||
" [0]\n",
|
||
" [9]\n",
|
||
" [9]\n",
|
||
" [9]] \n",
|
||
"\n",
|
||
"list_matrix = \n",
|
||
" [array([4, 5, 0]), array([9, 9, 9])] \n",
|
||
"\n",
|
||
"matrix as str = \n",
|
||
" [[4 5 0]\n",
|
||
" [9 9 9]] \n",
|
||
"\n",
|
||
"matrix type is <class 'numpy.ndarray'> \n",
|
||
"\n",
|
||
"vector type is <class 'numpy.ndarray'> \n",
|
||
"\n",
|
||
"list_matrix type is <class 'list'> \n",
|
||
"\n",
|
||
"str_matrix type is <class 'str'> \n",
|
||
"\n",
|
||
"formatted_vector = \n",
|
||
" 4; 5; 0; 9; 9; 9 \n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import numpy as np\n",
|
||
"\n",
|
||
"matrix = np.array([[4, 5, 0], [9, 9, 9]])\n",
|
||
"print(\"matrix = \\n\", matrix, \"\\n\")\n",
|
||
"\n",
|
||
"tmatrix = matrix.T\n",
|
||
"print(\"tmatrix = \\n\", tmatrix, \"\\n\")\n",
|
||
"\n",
|
||
"vector = np.ravel(matrix)\n",
|
||
"print(\"vector = \\n\", vector, \"\\n\")\n",
|
||
"\n",
|
||
"tvector = np.reshape(vector, (6, 1))\n",
|
||
"print(\"tvector = \\n\", tvector, \"\\n\")\n",
|
||
"\n",
|
||
"list_matrix = list(matrix)\n",
|
||
"print(\"list_matrix = \\n\", list_matrix, \"\\n\")\n",
|
||
"\n",
|
||
"str_matrix = str(matrix)\n",
|
||
"print(\"matrix as str = \\n\", str_matrix, \"\\n\")\n",
|
||
"\n",
|
||
"print(\"matrix type is\", type(matrix), \"\\n\")\n",
|
||
"\n",
|
||
"print(\"vector type is\", type(vector), \"\\n\")\n",
|
||
"\n",
|
||
"print(\"list_matrix type is\", type(list_matrix), \"\\n\")\n",
|
||
"\n",
|
||
"print(\"str_matrix type is\", type(str_matrix), \"\\n\")\n",
|
||
"\n",
|
||
"formatted_vector = \"; \".join(map(str, vector))\n",
|
||
"print(\"formatted_vector = \\n\", formatted_vector, \"\\n\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Работа с Pandas DataFrame\n",
|
||
"\n",
|
||
"https://pandas.pydata.org/docs/user_guide/10min.html"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Работа с данными - чтение и запись CSV"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"df = pd.read_csv(\"data/world-population-by-country-2020.csv\", index_col=\"no\")\n",
|
||
"\n",
|
||
"df.to_csv(\"test.csv\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Работа с данными - основные команды"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" Country (or dependency) Population 2020 Yearly Change Net Change\n",
|
||
"no \n",
|
||
"1 China 1439323776 0.39 5540090\n",
|
||
"2 India 1380004385 0.99 13586631\n",
|
||
"3 United States 331002651 0.59 1937734\n",
|
||
"4 Indonesia 273523615 1.07 2898047\n",
|
||
"5 Pakistan 220892340 2.00 4327022\n",
|
||
" Country (or dependency) Population 2020 Yearly Change Net Change\n",
|
||
"no \n",
|
||
"231 Montserrat 4992 0.06 3\n",
|
||
"232 Falkland Islands 3480 3.05 103\n",
|
||
"233 Niue 1626 0.68 11\n",
|
||
"234 Tokelau 1357 1.27 17\n",
|
||
"235 Holy See 801 0.25 2\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# df.info()\n",
|
||
"\n",
|
||
"# print(df.describe().transpose())\n",
|
||
"\n",
|
||
"from click import clear\n",
|
||
"\n",
|
||
"\n",
|
||
"cleared_df = df.drop(\n",
|
||
" df.columns.difference([\n",
|
||
" \"Country (or dependency)\", \"Population 2020\", \"Yearly Change\", \"Net Change\"\n",
|
||
" ]\n",
|
||
" ),\n",
|
||
" axis=1,\n",
|
||
")\n",
|
||
"# print(cleared_df.head())\n",
|
||
"# print(cleared_df.tail())\n",
|
||
"cleared_df['Population 2020'] = cleared_df['Population 2020'].apply(\n",
|
||
" lambda x: int(\"\".join(x.split(\",\")))\n",
|
||
")\n",
|
||
"cleared_df[\"Net Change\"] = cleared_df[\"Net Change\"].apply(\n",
|
||
" lambda x: int(\"\".join(x.split(\",\")))\n",
|
||
")\n",
|
||
"cleared_df[\"Yearly Change\"] = cleared_df[\"Yearly Change\"].apply(\n",
|
||
" lambda x: float(\"\".join(x.rstrip('%')))\n",
|
||
")\n",
|
||
"\n",
|
||
"sorted_df = cleared_df.sort_values(\n",
|
||
" [\"Population 2020\", \"Net Change\", \"Country (or dependency)\"], ascending=[False, False, True]\n",
|
||
")\n",
|
||
"print(sorted_df.head())\n",
|
||
"print(sorted_df.tail())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Работа с данными - работа с элементами"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"no\n",
|
||
"1 China\n",
|
||
"2 India\n",
|
||
"3 United States\n",
|
||
"4 Indonesia\n",
|
||
"5 Pakistan\n",
|
||
" ... \n",
|
||
"231 Montserrat\n",
|
||
"232 Falkland Islands\n",
|
||
"233 Niue\n",
|
||
"234 Tokelau\n",
|
||
"235 Holy See\n",
|
||
"Name: Country (or dependency), Length: 235, dtype: object\n",
|
||
"Country (or dependency) Israel\n",
|
||
"Population 2020 8,655,535\n",
|
||
"Yearly Change 1.60%\n",
|
||
"Net Change 136,158\n",
|
||
"Density (P/Km²) 400\n",
|
||
"Land Area (Km²) 21,640\n",
|
||
"Migrants (net) 10,000\n",
|
||
"Fert. Rate 3\n",
|
||
"Med. Age 30\n",
|
||
"Urban Pop % 93%\n",
|
||
"World Share 0.11%\n",
|
||
"Name: 100, dtype: object\n",
|
||
"Israel\n",
|
||
" Country (or dependency) Population 2020\n",
|
||
"no \n",
|
||
"100 Israel 8,655,535\n",
|
||
"101 Switzerland 8,654,622\n",
|
||
"102 Togo 8,278,724\n",
|
||
"103 Sierra Leone 7,976,983\n",
|
||
"104 Hong Kong 7,496,981\n",
|
||
".. ... ...\n",
|
||
"196 St. Vincent & Grenadines 110,940\n",
|
||
"197 Aruba 106,766\n",
|
||
"198 Tonga 105,695\n",
|
||
"199 U.S. Virgin Islands 104,425\n",
|
||
"200 Seychelles 98,347\n",
|
||
"\n",
|
||
"[101 rows x 2 columns]\n",
|
||
" Country (or dependency) Population 2020 Yearly Change Net Change \\\n",
|
||
"no \n",
|
||
"1 China 1,439,323,776 0.39% 5,540,090 \n",
|
||
"2 India 1,380,004,385 0.99% 13,586,631 \n",
|
||
"3 United States 331,002,651 0.59% 1,937,734 \n",
|
||
"\n",
|
||
" Density (P/Km²) Land Area (Km²) Migrants (net) Fert. Rate Med. Age \\\n",
|
||
"no \n",
|
||
"1 153 9,388,211 -348,399 1.7 38 \n",
|
||
"2 464 2,973,190 -532,687 2.2 28 \n",
|
||
"3 36 9,147,420 954,806 1.8 38 \n",
|
||
"\n",
|
||
" Urban Pop % World Share \n",
|
||
"no \n",
|
||
"1 61% 18.47% \n",
|
||
"2 35% 17.70% \n",
|
||
"3 83% 4.25% \n",
|
||
"Country (or dependency) China\n",
|
||
"Population 2020 1,439,323,776\n",
|
||
"Yearly Change 0.39%\n",
|
||
"Net Change 5,540,090\n",
|
||
"Density (P/Km²) 153\n",
|
||
"Land Area (Km²) 9,388,211\n",
|
||
"Migrants (net) -348,399\n",
|
||
"Fert. Rate 1.7\n",
|
||
"Med. Age 38\n",
|
||
"Urban Pop % 61%\n",
|
||
"World Share 18.47%\n",
|
||
"Name: 1, dtype: object\n",
|
||
" Country (or dependency) Population 2020\n",
|
||
"no \n",
|
||
"3 United States 331,002,651\n",
|
||
"4 Indonesia 273,523,615\n",
|
||
"5 Pakistan 220,892,340\n",
|
||
" Country (or dependency) Yearly Change\n",
|
||
"no \n",
|
||
"4 Indonesia 1.07%\n",
|
||
"5 Pakistan 2.00%\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(df[\"Country (or dependency)\"])\n",
|
||
"\n",
|
||
"print(df.loc[100])\n",
|
||
"\n",
|
||
"print(df.loc[100, \"Country (or dependency)\"])\n",
|
||
"\n",
|
||
"print(df.loc[100:200, [\"Country (or dependency)\", \"Population 2020\"]])\n",
|
||
"\n",
|
||
"print(df[0:3])\n",
|
||
"\n",
|
||
"print(df.iloc[0])\n",
|
||
"\n",
|
||
"print(df.iloc[2:5, 0:2])\n",
|
||
"\n",
|
||
"print(df.iloc[[3, 4], [0, 2]])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Работа с данными - отбор и группировка"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" Population 2020 Yearly Change Net Change \\\n",
|
||
"Country (or dependency) \n",
|
||
"China 1439323776 0.39 5540090 \n",
|
||
"India 1380004385 0.99 13586631 \n",
|
||
"United States 331002651 0.59 1937734 \n",
|
||
"Indonesia 273523615 1.07 2898047 \n",
|
||
"Pakistan 220892340 2.00 4327022 \n",
|
||
"... ... ... ... \n",
|
||
"Montserrat 4992 0.06 3 \n",
|
||
"Falkland Islands 3480 3.05 103 \n",
|
||
"Niue 1626 0.68 11 \n",
|
||
"Tokelau 1357 1.27 17 \n",
|
||
"Holy See 801 0.25 2 \n",
|
||
"\n",
|
||
" Capital Continent \n",
|
||
"Country (or dependency) \n",
|
||
"China Beijing Asia \n",
|
||
"India New Delhi Asia \n",
|
||
"United States Washington, D.C. North America \n",
|
||
"Indonesia Jakarta Asia \n",
|
||
"Pakistan Islamabad Asia \n",
|
||
"... ... ... \n",
|
||
"Montserrat Brades North America \n",
|
||
"Falkland Islands Stanley South America \n",
|
||
"Niue Alofi Oceania \n",
|
||
"Tokelau Nukunonu Oceania \n",
|
||
"Holy See NaN NaN \n",
|
||
"\n",
|
||
"[235 rows x 5 columns]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# s_values = df[\"Sex\"].unique()\n",
|
||
"# print(s_values)\n",
|
||
"df2 = pd.read_csv(\n",
|
||
" \"data/countries-continents-capitals.csv\", index_col=\"Country/Territory\",\n",
|
||
" encoding = \"ISO-8859-1\"\n",
|
||
")\n",
|
||
"\n",
|
||
"\n",
|
||
"# for s_value in s_values:\n",
|
||
"\n",
|
||
"\n",
|
||
"# count = df[df[\"Sex\"] == s_value].shape[0]\n",
|
||
"\n",
|
||
"\n",
|
||
"# s_total += count\n",
|
||
"\n",
|
||
"\n",
|
||
"# print(s_value, \"count =\", count)\n",
|
||
"\n",
|
||
"\n",
|
||
"# print(\"Total count = \", s_total)\n",
|
||
"\n",
|
||
"extended_df = cleared_df.set_index(\"Country (or dependency)\").join(\n",
|
||
" df2\n",
|
||
")\n",
|
||
"print(extended_df)\n",
|
||
"\n",
|
||
"\n",
|
||
"# print(extended_df.groupby([\"Continent\"]).agg({\"population\" : [\"sum\"]}))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Визуализация - Исходные данные"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" Population 2020 Yearly Change Continent\n",
|
||
"Country (or dependency) \n",
|
||
"China 1439323776 0.39 Asia\n",
|
||
"India 1380004385 0.99 Asia\n",
|
||
"United States 331002651 0.59 North America\n",
|
||
"Indonesia 273523615 1.07 Asia\n",
|
||
"Pakistan 220892340 2.00 Asia\n",
|
||
"... ... ... ...\n",
|
||
"Montserrat 4992 0.06 North America\n",
|
||
"Falkland Islands 3480 3.05 South America\n",
|
||
"Niue 1626 0.68 Oceania\n",
|
||
"Tokelau 1357 1.27 Oceania\n",
|
||
"Holy See 801 0.25 NaN\n",
|
||
"\n",
|
||
"[235 rows x 3 columns]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"data = extended_df[[\"Population 2020\", \"Yearly Change\", \"Continent\"]].copy()\n",
|
||
"data.dropna(subset=[\"Population 2020\"], inplace=True)\n",
|
||
"print(data)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Визуализация - Сводка пяти чисел\n",
|
||
"\n",
|
||
"<img src=\"assets/quantile.png\" width=\"400\" style=\"background-color: white\">"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" Population 2020 \\\n",
|
||
" min q1 q2 median \n",
|
||
"Continent \n",
|
||
"Africa 98347 2509845.75 13042506.5 13042506.5 \n",
|
||
"Asia 437479 5985985.50 18138682.5 18138682.5 \n",
|
||
"Europe 33691 1326535.00 5459642.0 5459642.0 \n",
|
||
"North America 4992 67288.00 395436.0 395436.0 \n",
|
||
"Oceania 1357 27368.25 144112.0 144112.0 \n",
|
||
"South America 3480 1458346.50 14658037.5 14658037.5 \n",
|
||
"\n",
|
||
" \n",
|
||
" q3 max \n",
|
||
"Continent \n",
|
||
"Africa 31118563.75 206139589 \n",
|
||
"Asia 52054338.75 1439323776 \n",
|
||
"Europe 10423054.00 145934462 \n",
|
||
"North America 6589966.75 331002651 \n",
|
||
"Oceania 488471.75 25499884 \n",
|
||
"South America 31837875.50 212559417 \n",
|
||
" Population 2020 \n",
|
||
" low_iqr iqr high_iqr\n",
|
||
"Continent \n",
|
||
"Africa 0 28608718.00 7.403164e+07\n",
|
||
"Asia 0 46068353.25 1.211569e+08\n",
|
||
"Europe 0 9096519.00 2.406783e+07\n",
|
||
"North America 0 6522678.75 1.637398e+07\n",
|
||
"Oceania 0 461103.50 1.180127e+06\n",
|
||
"South America 0 30379529.00 7.740717e+07\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<Axes: title={'center': 'Population 2020'}, xlabel='Continent'>"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"def q1(x):\n",
|
||
" return x.quantile(0.25)\n",
|
||
"\n",
|
||
"\n",
|
||
"# median = quantile(0.5)\n",
|
||
"def q2(x):\n",
|
||
" return x.quantile(0.5)\n",
|
||
"\n",
|
||
"\n",
|
||
"def q3(x):\n",
|
||
" return x.quantile(0.75)\n",
|
||
"\n",
|
||
"\n",
|
||
"def iqr(x):\n",
|
||
" return q3(x) - q1(x)\n",
|
||
"\n",
|
||
"\n",
|
||
"def low_iqr(x):\n",
|
||
" return max(0, q1(x) - 1.5 * iqr(x))\n",
|
||
"\n",
|
||
"\n",
|
||
"def high_iqr(x):\n",
|
||
" return q3(x) + 1.5 * iqr(x)\n",
|
||
"\n",
|
||
"\n",
|
||
"quantiles = (\n",
|
||
" data[[\"Continent\", \"Population 2020\"]]\n",
|
||
" .groupby([\"Continent\"])\n",
|
||
" .aggregate([\"min\", q1, q2, \"median\", q3, \"max\"])\n",
|
||
")\n",
|
||
"print(quantiles)\n",
|
||
"\n",
|
||
"iqrs = (\n",
|
||
" data[[\"Continent\", \"Population 2020\"]]\n",
|
||
" .groupby([\"Continent\"])\n",
|
||
" .aggregate([low_iqr, iqr, high_iqr])\n",
|
||
")\n",
|
||
"print(iqrs)\n",
|
||
"\n",
|
||
"data.boxplot(column=\"Population 2020\", by=\"Continent\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Визуализация - Гистограмма"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<Axes: ylabel='Frequency'>"
|
||
]
|
||
},
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.plot.hist(column=[\"Population 2020\"], bins=80)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Визуализация - Точечная диаграмма"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" Country (or dependency) Population 2020 Yearly Change Net Change\n",
|
||
"no \n",
|
||
"1 China 1439323776 0.39 5540090\n",
|
||
"2 India 1380004385 0.99 13586631\n",
|
||
"3 United States 331002651 0.59 1937734\n",
|
||
"4 Indonesia 273523615 1.07 2898047\n",
|
||
"5 Pakistan 220892340 2.00 4327022\n",
|
||
".. ... ... ... ...\n",
|
||
"231 Montserrat 4992 0.06 3\n",
|
||
"232 Falkland Islands 3480 3.05 103\n",
|
||
"233 Niue 1626 0.68 11\n",
|
||
"234 Tokelau 1357 1.27 17\n",
|
||
"235 Holy See 801 0.25 2\n",
|
||
"\n",
|
||
"[235 rows x 4 columns]\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<Axes: xlabel='Country (or dependency)', ylabel='Population 2020'>"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"print(cleared_df)\n",
|
||
"cleared_df.head(5).plot.scatter(x=\"Country (or dependency)\", y=\"Population 2020\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Визуализация - Столбчатая диаграмма"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# plot = data.groupby([\"Pclass\", \"Survived\"]).size().unstack().plot.bar(color=[\"pink\", \"green\"])\n",
|
||
"# plot.legend([\"Not survived\", \"Survived\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Визуализация - Временные ряды"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" Year Population Yearly % Yearly Median Fertility Density\n",
|
||
"0 2020 7,794,798,739 1.10% 83,000,320 31 2.47 52\n",
|
||
"1 2025 8,184,437,460 0.98% 77,927,744 32 2.54 55\n",
|
||
"2 2030 8,548,487,400 0.87% 72,809,988 33 2.62 57\n",
|
||
"3 2035 8,887,524,213 0.78% 67,807,363 34 2.70 60\n",
|
||
"4 2040 9,198,847,240 0.69% 62,264,605 35 2.77 62\n",
|
||
"5 2045 9,481,803,274 0.61% 56,591,207 35 2.85 64\n",
|
||
"6 2050 9,735,033,990 0.53% 50,646,143 36 2.95 65\n",
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"RangeIndex: 7 entries, 0 to 6\n",
|
||
"Data columns (total 7 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 Year 7 non-null int64 \n",
|
||
" 1 Population 7 non-null object \n",
|
||
" 2 Yearly % 7 non-null object \n",
|
||
" 3 Yearly 7 non-null object \n",
|
||
" 4 Median 7 non-null int64 \n",
|
||
" 5 Fertility 7 non-null float64\n",
|
||
" 6 Density 7 non-null int64 \n",
|
||
"dtypes: float64(1), int64(3), object(3)\n",
|
||
"memory usage: 524.0+ bytes\n",
|
||
"['Year' 'Population' 'Yearly %' 'Yearly' 'Median' 'Fertility' 'Density']\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"from datetime import datetime\n",
|
||
"import matplotlib.dates as md\n",
|
||
"\n",
|
||
"ts = pd.read_csv(\"data/world-population-forcast-2020-2050.csv\", encoding=\"ISO-8859-1\")\n",
|
||
"print(ts)\n",
|
||
"ts.iloc[:, 1] = ts.iloc[:, 1].apply(lambda row: int(\"\".join(str(row).split(\",\"))))\n",
|
||
"ts.info()\n",
|
||
"\n",
|
||
"print(ts.columns.values)\n",
|
||
"plot = ts.plot.line(x=\"Year\", y=\"Population\")"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": ".venv",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.4"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|