{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Работа с NumPy" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "matrix = np.array([[4, 5, 0], [10, 9, 9]])\n", "print(\"matrix = \\n\", matrix, \"\\n\")\n", "\n", "tmatrix = matrix.T\n", "print(\"tmatrix = \\n\", tmatrix, \"\\n\")\n", "\n", "vector = np.ravel(matrix)\n", "print(\"vector = \\n\", vector, \"\\n\")\n", "\n", "tvector = np.reshape(vector, (6, 1))\n", "print(\"tvector = \\n\", tvector, \"\\n\")\n", "\n", "list_matrix = list(matrix)\n", "print(\"list_matrix = \\n\", list_matrix, \"\\n\")\n", "\n", "str_matrix = str(matrix)\n", "print(\"matrix as str = \\n\", str_matrix, \"\\n\")\n", "\n", "print(\"matrix type is\", type(matrix), \"\\n\")\n", "\n", "print(\"vector type is\", type(vector), \"\\n\")\n", "\n", "print(\"list_matrix type is\", type(list_matrix), \"\\n\")\n", "\n", "print(\"str_matrix type is\", type(str_matrix), \"\\n\")\n", "\n", "formatted_vector = \"; \".join(map(str, vector))\n", "print(\"formatted_vector = \\n\", formatted_vector, \"\\n\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Работа с Pandas DataFrame\n", "\n", "https://pandas.pydata.org/docs/user_guide/10min.html" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Работа с данными - чтение и запись CSV" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "df = pd.read_csv(\"data/titanic.csv\", index_col=\"PassengerId\")\n", "\n", "df.to_csv(\"test.csv\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Работа с данными - основные команды" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.info()\n", "\n", "print(df.describe().transpose())\n", "\n", "cleared_df = df.drop([\"Name\", \"Ticket\", \"Embarked\"], axis=1)\n", "print(cleared_df.head())\n", "print(cleared_df.tail())\n", "\n", "sorted_df = cleared_df.sort_values(by=\"Age\")\n", "print(sorted_df.head())\n", "print(sorted_df.tail())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Работа с данными - работа с элементами" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(df[\"Age\"])\n", "\n", "print(df.loc[100])\n", "\n", "print(df.loc[100, \"Name\"])\n", "\n", "print(df.loc[100:200, [\"Age\", \"Name\"]])\n", "\n", "print(df[0:3])\n", "\n", "print(df.iloc[0])\n", "\n", "print(df.iloc[3:5, 0:2])\n", "\n", "print(df.iloc[[3, 4], [0, 1]])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Работа с данными - отбор и группировка" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "s_values = df[\"Sex\"].unique()\n", "print(s_values)\n", "\n", "s_total = 0\n", "for s_value in s_values:\n", " count = df[df[\"Sex\"] == s_value].shape[0]\n", " s_total += count\n", " print(s_value, \"count =\", count)\n", "print(\"Total count = \", s_total)\n", "\n", "print(df.groupby([\"Pclass\", \"Survived\"]).size().reset_index(name=\"Count\")) # type: ignore" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Визуализация - Исходные данные" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = df[[\"Pclass\", \"Survived\", \"Age\"]].copy()\n", "data.dropna(subset=[\"Age\"], inplace=True)\n", "print(data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Визуализация - Сводка пяти чисел\n", "\n", "" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def q1(x):\n", " return x.quantile(0.25)\n", "\n", "\n", "# median = quantile(0.5)\n", "def q2(x):\n", " return x.quantile(0.5)\n", "\n", "\n", "def q3(x):\n", " return x.quantile(0.75)\n", "\n", "\n", "def iqr(x):\n", " return q3(x) - q1(x)\n", "\n", "\n", "def low_iqr(x):\n", " return max(0, q1(x) - 1.5 * iqr(x))\n", "\n", "\n", "def high_iqr(x):\n", " return q3(x) + 1.5 * iqr(x)\n", "\n", "\n", "quantiles = data[[\"Pclass\", \"Age\"]].groupby([\"Pclass\"]).aggregate([\"min\", q1, q2, \"median\", q3, \"max\"])\n", "print(quantiles)\n", "\n", "iqrs = data[[\"Pclass\", \"Age\"]].groupby([\"Pclass\"]).aggregate([low_iqr, iqr, high_iqr])\n", "print(iqrs)\n", "\n", "data.boxplot(column=\"Age\", by=\"Pclass\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Визуализация - Гистограмма" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.plot.hist(column=[\"Age\"], bins=80)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Визуализация - Точечная диаграмма" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.plot.scatter(x=\"Age\", y=\"Sex\")\n", "\n", "df.plot.scatter(x=\"Pclass\", y=\"Age\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Визуализация - Столбчатая диаграмма" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plot = data.groupby([\"Pclass\", \"Survived\"]).size().unstack().plot.bar(color=[\"pink\", \"green\"])\n", "plot.legend([\"Not survived\", \"Survived\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Визуализация - Временные ряды" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from datetime import datetime\n", "import matplotlib.dates as md\n", "\n", "ts = pd.read_csv(\"data/dollar.csv\")\n", "ts[\"date\"] = ts.apply(lambda row: datetime.strptime(row[\"my_date\"], \"%d.%m.%Y\"), axis=1)\n", "ts.info()\n", "\n", "print(ts)\n", "\n", "plot = ts.plot.line(x=\"date\", y=\"my_value\")\n", "plot.xaxis.set_major_locator(md.DayLocator(interval=10))\n", "plot.xaxis.set_major_formatter(md.DateFormatter(\"%d.%m.%Y\"))\n", "plot.tick_params(axis=\"x\", labelrotation=90)" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.5" } }, "nbformat": 4, "nbformat_minor": 2 }