MII/mai/lab1.ipynb

2705 lines
302 KiB
Plaintext
Raw Permalink Normal View History

2024-12-14 15:49:48 +04:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Лабораторная работа 1\n",
"\n",
"Вариант - 11\n",
"Датасет - цены на бриллианты"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Загрузка и сохранение данных"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.read_csv(\"data/Diamonds Prices2022.csv\")\n",
"df.to_csv(\"data/Diamonds Prices2022 updated.csv\", index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Получение сведений о датафрейме с данными"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. Общая информация о датафрейме"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 53943 entries, 0 to 53942\n",
"Data columns (total 11 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 53943 non-null int64 \n",
" 1 carat 53943 non-null float64\n",
" 2 cut 53943 non-null object \n",
" 3 color 53943 non-null object \n",
" 4 clarity 53943 non-null object \n",
" 5 depth 53943 non-null float64\n",
" 6 table 53943 non-null float64\n",
" 7 price 53943 non-null int64 \n",
" 8 x 53943 non-null float64\n",
" 9 y 53943 non-null float64\n",
" 10 z 53943 non-null float64\n",
"dtypes: float64(6), int64(2), object(3)\n",
"memory usage: 4.5+ MB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"2. Статистическая информация"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>carat</th>\n",
" <th>depth</th>\n",
" <th>table</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>53943.000000</td>\n",
" <td>53943.000000</td>\n",
" <td>53943.000000</td>\n",
" <td>53943.000000</td>\n",
" <td>53943.000000</td>\n",
" <td>53943.000000</td>\n",
" <td>53943.000000</td>\n",
" <td>53943.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>26972.000000</td>\n",
" <td>0.797935</td>\n",
" <td>61.749322</td>\n",
" <td>57.457251</td>\n",
" <td>3932.734294</td>\n",
" <td>5.731158</td>\n",
" <td>5.734526</td>\n",
" <td>3.538730</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>15572.147122</td>\n",
" <td>0.473999</td>\n",
" <td>1.432626</td>\n",
" <td>2.234549</td>\n",
" <td>3989.338447</td>\n",
" <td>1.121730</td>\n",
" <td>1.142103</td>\n",
" <td>0.705679</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>0.200000</td>\n",
" <td>43.000000</td>\n",
" <td>43.000000</td>\n",
" <td>326.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>13486.500000</td>\n",
" <td>0.400000</td>\n",
" <td>61.000000</td>\n",
" <td>56.000000</td>\n",
" <td>950.000000</td>\n",
" <td>4.710000</td>\n",
" <td>4.720000</td>\n",
" <td>2.910000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>26972.000000</td>\n",
" <td>0.700000</td>\n",
" <td>61.800000</td>\n",
" <td>57.000000</td>\n",
" <td>2401.000000</td>\n",
" <td>5.700000</td>\n",
" <td>5.710000</td>\n",
" <td>3.530000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>40457.500000</td>\n",
" <td>1.040000</td>\n",
" <td>62.500000</td>\n",
" <td>59.000000</td>\n",
" <td>5324.000000</td>\n",
" <td>6.540000</td>\n",
" <td>6.540000</td>\n",
" <td>4.040000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>53943.000000</td>\n",
" <td>5.010000</td>\n",
" <td>79.000000</td>\n",
" <td>95.000000</td>\n",
" <td>18823.000000</td>\n",
" <td>10.740000</td>\n",
" <td>58.900000</td>\n",
" <td>31.800000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id carat depth table price \\\n",
"count 53943.000000 53943.000000 53943.000000 53943.000000 53943.000000 \n",
"mean 26972.000000 0.797935 61.749322 57.457251 3932.734294 \n",
"std 15572.147122 0.473999 1.432626 2.234549 3989.338447 \n",
"min 1.000000 0.200000 43.000000 43.000000 326.000000 \n",
"25% 13486.500000 0.400000 61.000000 56.000000 950.000000 \n",
"50% 26972.000000 0.700000 61.800000 57.000000 2401.000000 \n",
"75% 40457.500000 1.040000 62.500000 59.000000 5324.000000 \n",
"max 53943.000000 5.010000 79.000000 95.000000 18823.000000 \n",
"\n",
" x y z \n",
"count 53943.000000 53943.000000 53943.000000 \n",
"mean 5.731158 5.734526 3.538730 \n",
"std 1.121730 1.142103 0.705679 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 4.710000 4.720000 2.910000 \n",
"50% 5.700000 5.710000 3.530000 \n",
"75% 6.540000 6.540000 4.040000 \n",
"max 10.740000 58.900000 31.800000 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Получение сведений о колонках датафрейма"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. Названия колонок"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['id', 'carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price',\n",
" 'x', 'y', 'z'],\n",
" dtype='object')"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вывод отдельных строк и столбцов"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. Столбец \"carat\""
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>carat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53938</th>\n",
" <td>0.86</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53939</th>\n",
" <td>0.75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53940</th>\n",
" <td>0.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53941</th>\n",
" <td>0.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53942</th>\n",
" <td>0.70</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>53943 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" carat\n",
"0 0.23\n",
"1 0.21\n",
"2 0.23\n",
"3 0.29\n",
"4 0.31\n",
"... ...\n",
"53938 0.86\n",
"53939 0.75\n",
"53940 0.71\n",
"53941 0.71\n",
"53942 0.70\n",
"\n",
"[53943 rows x 1 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"carat\"]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"2. Несколько столбцокв"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>cut</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Premium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>Premium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53938</th>\n",
" <td>53939</td>\n",
" <td>Premium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53939</th>\n",
" <td>53940</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53940</th>\n",
" <td>53941</td>\n",
" <td>Premium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53941</th>\n",
" <td>53942</td>\n",
" <td>Premium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53942</th>\n",
" <td>53943</td>\n",
" <td>Very Good</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>53943 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" id cut\n",
"0 1 Ideal\n",
"1 2 Premium\n",
"2 3 Good\n",
"3 4 Premium\n",
"4 5 Good\n",
"... ... ...\n",
"53938 53939 Premium\n",
"53939 53940 Ideal\n",
"53940 53941 Premium\n",
"53941 53942 Premium\n",
"53942 53943 Very Good\n",
"\n",
"[53943 rows x 2 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"id\", \"cut\"]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"3. Первая строка"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>depth</th>\n",
" <th>table</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.23</td>\n",
" <td>Ideal</td>\n",
" <td>E</td>\n",
" <td>SI2</td>\n",
" <td>61.5</td>\n",
" <td>55.0</td>\n",
" <td>326</td>\n",
" <td>3.95</td>\n",
" <td>3.98</td>\n",
" <td>2.43</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id carat cut color clarity depth table price x y z\n",
"0 1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[[0]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"4. Вывод по условию"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>depth</th>\n",
" <th>table</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>31</td>\n",
" <td>0.23</td>\n",
" <td>Very Good</td>\n",
" <td>F</td>\n",
" <td>VS1</td>\n",
" <td>60.0</td>\n",
" <td>57.0</td>\n",
" <td>402</td>\n",
" <td>4.00</td>\n",
" <td>4.03</td>\n",
" <td>2.41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>32</td>\n",
" <td>0.23</td>\n",
" <td>Very Good</td>\n",
" <td>F</td>\n",
" <td>VS1</td>\n",
" <td>59.8</td>\n",
" <td>57.0</td>\n",
" <td>402</td>\n",
" <td>4.04</td>\n",
" <td>4.06</td>\n",
" <td>2.42</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>33</td>\n",
" <td>0.23</td>\n",
" <td>Very Good</td>\n",
" <td>E</td>\n",
" <td>VS1</td>\n",
" <td>60.7</td>\n",
" <td>59.0</td>\n",
" <td>402</td>\n",
" <td>3.97</td>\n",
" <td>4.01</td>\n",
" <td>2.42</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>34</td>\n",
" <td>0.23</td>\n",
" <td>Very Good</td>\n",
" <td>E</td>\n",
" <td>VS1</td>\n",
" <td>59.5</td>\n",
" <td>58.0</td>\n",
" <td>402</td>\n",
" <td>4.01</td>\n",
" <td>4.06</td>\n",
" <td>2.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>35</td>\n",
" <td>0.23</td>\n",
" <td>Very Good</td>\n",
" <td>D</td>\n",
" <td>VS1</td>\n",
" <td>61.9</td>\n",
" <td>58.0</td>\n",
" <td>402</td>\n",
" <td>3.92</td>\n",
" <td>3.96</td>\n",
" <td>2.44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53938</th>\n",
" <td>53939</td>\n",
" <td>0.86</td>\n",
" <td>Premium</td>\n",
" <td>H</td>\n",
" <td>SI2</td>\n",
" <td>61.0</td>\n",
" <td>58.0</td>\n",
" <td>2757</td>\n",
" <td>6.15</td>\n",
" <td>6.12</td>\n",
" <td>3.74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53939</th>\n",
" <td>53940</td>\n",
" <td>0.75</td>\n",
" <td>Ideal</td>\n",
" <td>D</td>\n",
" <td>SI2</td>\n",
" <td>62.2</td>\n",
" <td>55.0</td>\n",
" <td>2757</td>\n",
" <td>5.83</td>\n",
" <td>5.87</td>\n",
" <td>3.64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53940</th>\n",
" <td>53941</td>\n",
" <td>0.71</td>\n",
" <td>Premium</td>\n",
" <td>E</td>\n",
" <td>SI1</td>\n",
" <td>60.5</td>\n",
" <td>55.0</td>\n",
" <td>2756</td>\n",
" <td>5.79</td>\n",
" <td>5.74</td>\n",
" <td>3.49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53941</th>\n",
" <td>53942</td>\n",
" <td>0.71</td>\n",
" <td>Premium</td>\n",
" <td>F</td>\n",
" <td>SI1</td>\n",
" <td>59.8</td>\n",
" <td>62.0</td>\n",
" <td>2756</td>\n",
" <td>5.74</td>\n",
" <td>5.73</td>\n",
" <td>3.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53942</th>\n",
" <td>53943</td>\n",
" <td>0.70</td>\n",
" <td>Very Good</td>\n",
" <td>E</td>\n",
" <td>VS2</td>\n",
" <td>60.5</td>\n",
" <td>59.0</td>\n",
" <td>2757</td>\n",
" <td>5.71</td>\n",
" <td>5.76</td>\n",
" <td>3.47</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>53692 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" id carat cut color clarity depth table price x y \\\n",
"30 31 0.23 Very Good F VS1 60.0 57.0 402 4.00 4.03 \n",
"31 32 0.23 Very Good F VS1 59.8 57.0 402 4.04 4.06 \n",
"32 33 0.23 Very Good E VS1 60.7 59.0 402 3.97 4.01 \n",
"33 34 0.23 Very Good E VS1 59.5 58.0 402 4.01 4.06 \n",
"34 35 0.23 Very Good D VS1 61.9 58.0 402 3.92 3.96 \n",
"... ... ... ... ... ... ... ... ... ... ... \n",
"53938 53939 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 \n",
"53939 53940 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 \n",
"53940 53941 0.71 Premium E SI1 60.5 55.0 2756 5.79 5.74 \n",
"53941 53942 0.71 Premium F SI1 59.8 62.0 2756 5.74 5.73 \n",
"53942 53943 0.70 Very Good E VS2 60.5 59.0 2757 5.71 5.76 \n",
"\n",
" z \n",
"30 2.41 \n",
"31 2.42 \n",
"32 2.42 \n",
"33 2.40 \n",
"34 2.44 \n",
"... ... \n",
"53938 3.74 \n",
"53939 3.64 \n",
"53940 3.49 \n",
"53941 3.43 \n",
"53942 3.47 \n",
"\n",
"[53692 rows x 11 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df[\"price\"] > 400]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Группировка и агрегация данных"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. Средняя стоимость по типу огранки"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>price</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cut</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Fair</th>\n",
" <td>4358.757764</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Good</th>\n",
" <td>3928.864452</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ideal</th>\n",
" <td>3457.541970</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Premium</th>\n",
" <td>4583.992605</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Very Good</th>\n",
" <td>3981.658529</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" price\n",
"cut \n",
"Fair 4358.757764\n",
"Good 3928.864452\n",
"Ideal 3457.541970\n",
"Premium 4583.992605\n",
"Very Good 3981.658529"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby([\"cut\"])[[\"price\"]].mean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"2. Средний вес по типу огранки"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>carat</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cut</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Fair</th>\n",
" <td>1.046137</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Good</th>\n",
" <td>0.849185</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ideal</th>\n",
" <td>0.702837</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Premium</th>\n",
" <td>0.891929</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Very Good</th>\n",
" <td>0.806373</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" carat\n",
"cut \n",
"Fair 1.046137\n",
"Good 0.849185\n",
"Ideal 0.702837\n",
"Premium 0.891929\n",
"Very Good 0.806373"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby(\"cut\")[[\"carat\"]].mean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Сортировка данных"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. Сортировка по цене по убыванию"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>depth</th>\n",
" <th>table</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>27749</th>\n",
" <td>27750</td>\n",
" <td>2.29</td>\n",
" <td>Premium</td>\n",
" <td>I</td>\n",
" <td>VS2</td>\n",
" <td>60.8</td>\n",
" <td>60.0</td>\n",
" <td>18823</td>\n",
" <td>8.50</td>\n",
" <td>8.47</td>\n",
" <td>5.16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27748</th>\n",
" <td>27749</td>\n",
" <td>2.00</td>\n",
" <td>Very Good</td>\n",
" <td>G</td>\n",
" <td>SI1</td>\n",
" <td>63.5</td>\n",
" <td>56.0</td>\n",
" <td>18818</td>\n",
" <td>7.90</td>\n",
" <td>7.97</td>\n",
" <td>5.04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27747</th>\n",
" <td>27748</td>\n",
" <td>1.51</td>\n",
" <td>Ideal</td>\n",
" <td>G</td>\n",
" <td>IF</td>\n",
" <td>61.7</td>\n",
" <td>55.0</td>\n",
" <td>18806</td>\n",
" <td>7.37</td>\n",
" <td>7.41</td>\n",
" <td>4.56</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27746</th>\n",
" <td>27747</td>\n",
" <td>2.07</td>\n",
" <td>Ideal</td>\n",
" <td>G</td>\n",
" <td>SI2</td>\n",
" <td>62.5</td>\n",
" <td>55.0</td>\n",
" <td>18804</td>\n",
" <td>8.20</td>\n",
" <td>8.13</td>\n",
" <td>5.11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27745</th>\n",
" <td>27746</td>\n",
" <td>2.00</td>\n",
" <td>Very Good</td>\n",
" <td>H</td>\n",
" <td>SI1</td>\n",
" <td>62.8</td>\n",
" <td>57.0</td>\n",
" <td>18803</td>\n",
" <td>7.95</td>\n",
" <td>8.00</td>\n",
" <td>5.01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0.31</td>\n",
" <td>Good</td>\n",
" <td>J</td>\n",
" <td>SI2</td>\n",
" <td>63.3</td>\n",
" <td>58.0</td>\n",
" <td>335</td>\n",
" <td>4.34</td>\n",
" <td>4.35</td>\n",
" <td>2.75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.29</td>\n",
" <td>Premium</td>\n",
" <td>I</td>\n",
" <td>VS2</td>\n",
" <td>62.4</td>\n",
" <td>58.0</td>\n",
" <td>334</td>\n",
" <td>4.20</td>\n",
" <td>4.23</td>\n",
" <td>2.63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.23</td>\n",
" <td>Good</td>\n",
" <td>E</td>\n",
" <td>VS1</td>\n",
" <td>56.9</td>\n",
" <td>65.0</td>\n",
" <td>327</td>\n",
" <td>4.05</td>\n",
" <td>4.07</td>\n",
" <td>2.31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.21</td>\n",
" <td>Premium</td>\n",
" <td>E</td>\n",
" <td>SI1</td>\n",
" <td>59.8</td>\n",
" <td>61.0</td>\n",
" <td>326</td>\n",
" <td>3.89</td>\n",
" <td>3.84</td>\n",
" <td>2.31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.23</td>\n",
" <td>Ideal</td>\n",
" <td>E</td>\n",
" <td>SI2</td>\n",
" <td>61.5</td>\n",
" <td>55.0</td>\n",
" <td>326</td>\n",
" <td>3.95</td>\n",
" <td>3.98</td>\n",
" <td>2.43</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>53943 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" id carat cut color clarity depth table price x y \\\n",
"27749 27750 2.29 Premium I VS2 60.8 60.0 18823 8.50 8.47 \n",
"27748 27749 2.00 Very Good G SI1 63.5 56.0 18818 7.90 7.97 \n",
"27747 27748 1.51 Ideal G IF 61.7 55.0 18806 7.37 7.41 \n",
"27746 27747 2.07 Ideal G SI2 62.5 55.0 18804 8.20 8.13 \n",
"27745 27746 2.00 Very Good H SI1 62.8 57.0 18803 7.95 8.00 \n",
"... ... ... ... ... ... ... ... ... ... ... \n",
"4 5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 \n",
"3 4 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 \n",
"2 3 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 \n",
"1 2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 \n",
"0 1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 \n",
"\n",
" z \n",
"27749 5.16 \n",
"27748 5.04 \n",
"27747 4.56 \n",
"27746 5.11 \n",
"27745 5.01 \n",
"... ... \n",
"4 2.75 \n",
"3 2.63 \n",
"2 2.31 \n",
"1 2.31 \n",
"0 2.43 \n",
"\n",
"[53943 rows x 11 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort_values(\"price\", ascending=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"2. Сортировка по нескольким столбцам"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>depth</th>\n",
" <th>table</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>31591</th>\n",
" <td>31592</td>\n",
" <td>0.20</td>\n",
" <td>Premium</td>\n",
" <td>E</td>\n",
" <td>VS2</td>\n",
" <td>59.8</td>\n",
" <td>62.0</td>\n",
" <td>367</td>\n",
" <td>3.79</td>\n",
" <td>3.77</td>\n",
" <td>2.26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31592</th>\n",
" <td>31593</td>\n",
" <td>0.20</td>\n",
" <td>Premium</td>\n",
" <td>E</td>\n",
" <td>VS2</td>\n",
" <td>59.0</td>\n",
" <td>60.0</td>\n",
" <td>367</td>\n",
" <td>3.81</td>\n",
" <td>3.78</td>\n",
" <td>2.24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31593</th>\n",
" <td>31594</td>\n",
" <td>0.20</td>\n",
" <td>Premium</td>\n",
" <td>E</td>\n",
" <td>VS2</td>\n",
" <td>61.1</td>\n",
" <td>59.0</td>\n",
" <td>367</td>\n",
" <td>3.81</td>\n",
" <td>3.78</td>\n",
" <td>2.32</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31594</th>\n",
" <td>31595</td>\n",
" <td>0.20</td>\n",
" <td>Premium</td>\n",
" <td>E</td>\n",
" <td>VS2</td>\n",
" <td>59.7</td>\n",
" <td>62.0</td>\n",
" <td>367</td>\n",
" <td>3.84</td>\n",
" <td>3.80</td>\n",
" <td>2.28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31595</th>\n",
" <td>31596</td>\n",
" <td>0.20</td>\n",
" <td>Ideal</td>\n",
" <td>E</td>\n",
" <td>VS2</td>\n",
" <td>59.7</td>\n",
" <td>55.0</td>\n",
" <td>367</td>\n",
" <td>3.86</td>\n",
" <td>3.84</td>\n",
" <td>2.30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25998</th>\n",
" <td>25999</td>\n",
" <td>4.01</td>\n",
" <td>Premium</td>\n",
" <td>I</td>\n",
" <td>I1</td>\n",
" <td>61.0</td>\n",
" <td>61.0</td>\n",
" <td>15223</td>\n",
" <td>10.14</td>\n",
" <td>10.10</td>\n",
" <td>6.17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25999</th>\n",
" <td>26000</td>\n",
" <td>4.01</td>\n",
" <td>Premium</td>\n",
" <td>J</td>\n",
" <td>I1</td>\n",
" <td>62.5</td>\n",
" <td>62.0</td>\n",
" <td>15223</td>\n",
" <td>10.02</td>\n",
" <td>9.94</td>\n",
" <td>6.24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27130</th>\n",
" <td>27131</td>\n",
" <td>4.13</td>\n",
" <td>Fair</td>\n",
" <td>H</td>\n",
" <td>I1</td>\n",
" <td>64.8</td>\n",
" <td>61.0</td>\n",
" <td>17329</td>\n",
" <td>10.00</td>\n",
" <td>9.85</td>\n",
" <td>6.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27630</th>\n",
" <td>27631</td>\n",
" <td>4.50</td>\n",
" <td>Fair</td>\n",
" <td>J</td>\n",
" <td>I1</td>\n",
" <td>65.8</td>\n",
" <td>58.0</td>\n",
" <td>18531</td>\n",
" <td>10.23</td>\n",
" <td>10.16</td>\n",
" <td>6.72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27415</th>\n",
" <td>27416</td>\n",
" <td>5.01</td>\n",
" <td>Fair</td>\n",
" <td>J</td>\n",
" <td>I1</td>\n",
" <td>65.5</td>\n",
" <td>59.0</td>\n",
" <td>18018</td>\n",
" <td>10.74</td>\n",
" <td>10.54</td>\n",
" <td>6.98</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>53943 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" id carat cut color clarity depth table price x y \\\n",
"31591 31592 0.20 Premium E VS2 59.8 62.0 367 3.79 3.77 \n",
"31592 31593 0.20 Premium E VS2 59.0 60.0 367 3.81 3.78 \n",
"31593 31594 0.20 Premium E VS2 61.1 59.0 367 3.81 3.78 \n",
"31594 31595 0.20 Premium E VS2 59.7 62.0 367 3.84 3.80 \n",
"31595 31596 0.20 Ideal E VS2 59.7 55.0 367 3.86 3.84 \n",
"... ... ... ... ... ... ... ... ... ... ... \n",
"25998 25999 4.01 Premium I I1 61.0 61.0 15223 10.14 10.10 \n",
"25999 26000 4.01 Premium J I1 62.5 62.0 15223 10.02 9.94 \n",
"27130 27131 4.13 Fair H I1 64.8 61.0 17329 10.00 9.85 \n",
"27630 27631 4.50 Fair J I1 65.8 58.0 18531 10.23 10.16 \n",
"27415 27416 5.01 Fair J I1 65.5 59.0 18018 10.74 10.54 \n",
"\n",
" z \n",
"31591 2.26 \n",
"31592 2.24 \n",
"31593 2.32 \n",
"31594 2.28 \n",
"31595 2.30 \n",
"... ... \n",
"25998 6.17 \n",
"25999 6.24 \n",
"27130 6.43 \n",
"27630 6.72 \n",
"27415 6.98 \n",
"\n",
"[53943 rows x 11 columns]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort_values([\"carat\", \"price\"], ascending=[True, False])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Удаление строк/столбцов"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. Удаление столбца"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>table</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.23</td>\n",
" <td>Ideal</td>\n",
" <td>E</td>\n",
" <td>SI2</td>\n",
" <td>55.0</td>\n",
" <td>326</td>\n",
" <td>3.95</td>\n",
" <td>3.98</td>\n",
" <td>2.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.21</td>\n",
" <td>Premium</td>\n",
" <td>E</td>\n",
" <td>SI1</td>\n",
" <td>61.0</td>\n",
" <td>326</td>\n",
" <td>3.89</td>\n",
" <td>3.84</td>\n",
" <td>2.31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.23</td>\n",
" <td>Good</td>\n",
" <td>E</td>\n",
" <td>VS1</td>\n",
" <td>65.0</td>\n",
" <td>327</td>\n",
" <td>4.05</td>\n",
" <td>4.07</td>\n",
" <td>2.31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.29</td>\n",
" <td>Premium</td>\n",
" <td>I</td>\n",
" <td>VS2</td>\n",
" <td>58.0</td>\n",
" <td>334</td>\n",
" <td>4.20</td>\n",
" <td>4.23</td>\n",
" <td>2.63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0.31</td>\n",
" <td>Good</td>\n",
" <td>J</td>\n",
" <td>SI2</td>\n",
" <td>58.0</td>\n",
" <td>335</td>\n",
" <td>4.34</td>\n",
" <td>4.35</td>\n",
" <td>2.75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53938</th>\n",
" <td>53939</td>\n",
" <td>0.86</td>\n",
" <td>Premium</td>\n",
" <td>H</td>\n",
" <td>SI2</td>\n",
" <td>58.0</td>\n",
" <td>2757</td>\n",
" <td>6.15</td>\n",
" <td>6.12</td>\n",
" <td>3.74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53939</th>\n",
" <td>53940</td>\n",
" <td>0.75</td>\n",
" <td>Ideal</td>\n",
" <td>D</td>\n",
" <td>SI2</td>\n",
" <td>55.0</td>\n",
" <td>2757</td>\n",
" <td>5.83</td>\n",
" <td>5.87</td>\n",
" <td>3.64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53940</th>\n",
" <td>53941</td>\n",
" <td>0.71</td>\n",
" <td>Premium</td>\n",
" <td>E</td>\n",
" <td>SI1</td>\n",
" <td>55.0</td>\n",
" <td>2756</td>\n",
" <td>5.79</td>\n",
" <td>5.74</td>\n",
" <td>3.49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53941</th>\n",
" <td>53942</td>\n",
" <td>0.71</td>\n",
" <td>Premium</td>\n",
" <td>F</td>\n",
" <td>SI1</td>\n",
" <td>62.0</td>\n",
" <td>2756</td>\n",
" <td>5.74</td>\n",
" <td>5.73</td>\n",
" <td>3.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53942</th>\n",
" <td>53943</td>\n",
" <td>0.70</td>\n",
" <td>Very Good</td>\n",
" <td>E</td>\n",
" <td>VS2</td>\n",
" <td>59.0</td>\n",
" <td>2757</td>\n",
" <td>5.71</td>\n",
" <td>5.76</td>\n",
" <td>3.47</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>53943 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" id carat cut color clarity table price x y z\n",
"0 1 0.23 Ideal E SI2 55.0 326 3.95 3.98 2.43\n",
"1 2 0.21 Premium E SI1 61.0 326 3.89 3.84 2.31\n",
"2 3 0.23 Good E VS1 65.0 327 4.05 4.07 2.31\n",
"3 4 0.29 Premium I VS2 58.0 334 4.20 4.23 2.63\n",
"4 5 0.31 Good J SI2 58.0 335 4.34 4.35 2.75\n",
"... ... ... ... ... ... ... ... ... ... ...\n",
"53938 53939 0.86 Premium H SI2 58.0 2757 6.15 6.12 3.74\n",
"53939 53940 0.75 Ideal D SI2 55.0 2757 5.83 5.87 3.64\n",
"53940 53941 0.71 Premium E SI1 55.0 2756 5.79 5.74 3.49\n",
"53941 53942 0.71 Premium F SI1 62.0 2756 5.74 5.73 3.43\n",
"53942 53943 0.70 Very Good E VS2 59.0 2757 5.71 5.76 3.47\n",
"\n",
"[53943 rows x 10 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.drop(\"depth\", axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Удаление строки"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>depth</th>\n",
" <th>table</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.21</td>\n",
" <td>Premium</td>\n",
" <td>E</td>\n",
" <td>SI1</td>\n",
" <td>59.8</td>\n",
" <td>61.0</td>\n",
" <td>326</td>\n",
" <td>3.89</td>\n",
" <td>3.84</td>\n",
" <td>2.31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.23</td>\n",
" <td>Good</td>\n",
" <td>E</td>\n",
" <td>VS1</td>\n",
" <td>56.9</td>\n",
" <td>65.0</td>\n",
" <td>327</td>\n",
" <td>4.05</td>\n",
" <td>4.07</td>\n",
" <td>2.31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.29</td>\n",
" <td>Premium</td>\n",
" <td>I</td>\n",
" <td>VS2</td>\n",
" <td>62.4</td>\n",
" <td>58.0</td>\n",
" <td>334</td>\n",
" <td>4.20</td>\n",
" <td>4.23</td>\n",
" <td>2.63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0.31</td>\n",
" <td>Good</td>\n",
" <td>J</td>\n",
" <td>SI2</td>\n",
" <td>63.3</td>\n",
" <td>58.0</td>\n",
" <td>335</td>\n",
" <td>4.34</td>\n",
" <td>4.35</td>\n",
" <td>2.75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>0.24</td>\n",
" <td>Very Good</td>\n",
" <td>J</td>\n",
" <td>VVS2</td>\n",
" <td>62.8</td>\n",
" <td>57.0</td>\n",
" <td>336</td>\n",
" <td>3.94</td>\n",
" <td>3.96</td>\n",
" <td>2.48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53938</th>\n",
" <td>53939</td>\n",
" <td>0.86</td>\n",
" <td>Premium</td>\n",
" <td>H</td>\n",
" <td>SI2</td>\n",
" <td>61.0</td>\n",
" <td>58.0</td>\n",
" <td>2757</td>\n",
" <td>6.15</td>\n",
" <td>6.12</td>\n",
" <td>3.74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53939</th>\n",
" <td>53940</td>\n",
" <td>0.75</td>\n",
" <td>Ideal</td>\n",
" <td>D</td>\n",
" <td>SI2</td>\n",
" <td>62.2</td>\n",
" <td>55.0</td>\n",
" <td>2757</td>\n",
" <td>5.83</td>\n",
" <td>5.87</td>\n",
" <td>3.64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53940</th>\n",
" <td>53941</td>\n",
" <td>0.71</td>\n",
" <td>Premium</td>\n",
" <td>E</td>\n",
" <td>SI1</td>\n",
" <td>60.5</td>\n",
" <td>55.0</td>\n",
" <td>2756</td>\n",
" <td>5.79</td>\n",
" <td>5.74</td>\n",
" <td>3.49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53941</th>\n",
" <td>53942</td>\n",
" <td>0.71</td>\n",
" <td>Premium</td>\n",
" <td>F</td>\n",
" <td>SI1</td>\n",
" <td>59.8</td>\n",
" <td>62.0</td>\n",
" <td>2756</td>\n",
" <td>5.74</td>\n",
" <td>5.73</td>\n",
" <td>3.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53942</th>\n",
" <td>53943</td>\n",
" <td>0.70</td>\n",
" <td>Very Good</td>\n",
" <td>E</td>\n",
" <td>VS2</td>\n",
" <td>60.5</td>\n",
" <td>59.0</td>\n",
" <td>2757</td>\n",
" <td>5.71</td>\n",
" <td>5.76</td>\n",
" <td>3.47</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>53942 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" id carat cut color clarity depth table price x y \\\n",
"1 2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 \n",
"2 3 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 \n",
"3 4 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 \n",
"4 5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 \n",
"5 6 0.24 Very Good J VVS2 62.8 57.0 336 3.94 3.96 \n",
"... ... ... ... ... ... ... ... ... ... ... \n",
"53938 53939 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 \n",
"53939 53940 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 \n",
"53940 53941 0.71 Premium E SI1 60.5 55.0 2756 5.79 5.74 \n",
"53941 53942 0.71 Premium F SI1 59.8 62.0 2756 5.74 5.73 \n",
"53942 53943 0.70 Very Good E VS2 60.5 59.0 2757 5.71 5.76 \n",
"\n",
" z \n",
"1 2.31 \n",
"2 2.31 \n",
"3 2.63 \n",
"4 2.75 \n",
"5 2.48 \n",
"... ... \n",
"53938 3.74 \n",
"53939 3.64 \n",
"53940 3.49 \n",
"53941 3.43 \n",
"53942 3.47 \n",
"\n",
"[53942 rows x 11 columns]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.drop(0, axis=0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Создание новых столбцов"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. Создание нового столбца \"стоимость 1 карата\""
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>price_carat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1417.391304</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1552.380952</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1421.739130</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1151.724138</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1080.645161</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53938</th>\n",
" <td>3205.813953</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53939</th>\n",
" <td>3676.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53940</th>\n",
" <td>3881.690141</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53941</th>\n",
" <td>3881.690141</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53942</th>\n",
" <td>3938.571429</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>53943 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" price_carat\n",
"0 1417.391304\n",
"1 1552.380952\n",
"2 1421.739130\n",
"3 1151.724138\n",
"4 1080.645161\n",
"... ...\n",
"53938 3205.813953\n",
"53939 3676.000000\n",
"53940 3881.690141\n",
"53941 3881.690141\n",
"53942 3938.571429\n",
"\n",
"[53943 rows x 1 columns]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"price_carat\"] = df[\"price\"] / df[\"carat\"]\n",
"df[[\"price_carat\"]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Удаление строк с пустыми значениями"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. Удаление строк с NaN"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>depth</th>\n",
" <th>table</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" <th>price_carat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.23</td>\n",
" <td>Ideal</td>\n",
" <td>E</td>\n",
" <td>SI2</td>\n",
" <td>61.5</td>\n",
" <td>55.0</td>\n",
" <td>326</td>\n",
" <td>3.95</td>\n",
" <td>3.98</td>\n",
" <td>2.43</td>\n",
" <td>1417.391304</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>0.21</td>\n",
" <td>Premium</td>\n",
" <td>E</td>\n",
" <td>SI1</td>\n",
" <td>59.8</td>\n",
" <td>61.0</td>\n",
" <td>326</td>\n",
" <td>3.89</td>\n",
" <td>3.84</td>\n",
" <td>2.31</td>\n",
" <td>1552.380952</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>0.23</td>\n",
" <td>Good</td>\n",
" <td>E</td>\n",
" <td>VS1</td>\n",
" <td>56.9</td>\n",
" <td>65.0</td>\n",
" <td>327</td>\n",
" <td>4.05</td>\n",
" <td>4.07</td>\n",
" <td>2.31</td>\n",
" <td>1421.739130</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0.29</td>\n",
" <td>Premium</td>\n",
" <td>I</td>\n",
" <td>VS2</td>\n",
" <td>62.4</td>\n",
" <td>58.0</td>\n",
" <td>334</td>\n",
" <td>4.20</td>\n",
" <td>4.23</td>\n",
" <td>2.63</td>\n",
" <td>1151.724138</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0.31</td>\n",
" <td>Good</td>\n",
" <td>J</td>\n",
" <td>SI2</td>\n",
" <td>63.3</td>\n",
" <td>58.0</td>\n",
" <td>335</td>\n",
" <td>4.34</td>\n",
" <td>4.35</td>\n",
" <td>2.75</td>\n",
" <td>1080.645161</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53938</th>\n",
" <td>53939</td>\n",
" <td>0.86</td>\n",
" <td>Premium</td>\n",
" <td>H</td>\n",
" <td>SI2</td>\n",
" <td>61.0</td>\n",
" <td>58.0</td>\n",
" <td>2757</td>\n",
" <td>6.15</td>\n",
" <td>6.12</td>\n",
" <td>3.74</td>\n",
" <td>3205.813953</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53939</th>\n",
" <td>53940</td>\n",
" <td>0.75</td>\n",
" <td>Ideal</td>\n",
" <td>D</td>\n",
" <td>SI2</td>\n",
" <td>62.2</td>\n",
" <td>55.0</td>\n",
" <td>2757</td>\n",
" <td>5.83</td>\n",
" <td>5.87</td>\n",
" <td>3.64</td>\n",
" <td>3676.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53940</th>\n",
" <td>53941</td>\n",
" <td>0.71</td>\n",
" <td>Premium</td>\n",
" <td>E</td>\n",
" <td>SI1</td>\n",
" <td>60.5</td>\n",
" <td>55.0</td>\n",
" <td>2756</td>\n",
" <td>5.79</td>\n",
" <td>5.74</td>\n",
" <td>3.49</td>\n",
" <td>3881.690141</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53941</th>\n",
" <td>53942</td>\n",
" <td>0.71</td>\n",
" <td>Premium</td>\n",
" <td>F</td>\n",
" <td>SI1</td>\n",
" <td>59.8</td>\n",
" <td>62.0</td>\n",
" <td>2756</td>\n",
" <td>5.74</td>\n",
" <td>5.73</td>\n",
" <td>3.43</td>\n",
" <td>3881.690141</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53942</th>\n",
" <td>53943</td>\n",
" <td>0.70</td>\n",
" <td>Very Good</td>\n",
" <td>E</td>\n",
" <td>VS2</td>\n",
" <td>60.5</td>\n",
" <td>59.0</td>\n",
" <td>2757</td>\n",
" <td>5.71</td>\n",
" <td>5.76</td>\n",
" <td>3.47</td>\n",
" <td>3938.571429</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>53943 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" id carat cut color clarity depth table price x y \\\n",
"0 1 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 \n",
"1 2 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 \n",
"2 3 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 \n",
"3 4 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 \n",
"4 5 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 \n",
"... ... ... ... ... ... ... ... ... ... ... \n",
"53938 53939 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 \n",
"53939 53940 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 \n",
"53940 53941 0.71 Premium E SI1 60.5 55.0 2756 5.79 5.74 \n",
"53941 53942 0.71 Premium F SI1 59.8 62.0 2756 5.74 5.73 \n",
"53942 53943 0.70 Very Good E VS2 60.5 59.0 2757 5.71 5.76 \n",
"\n",
" z price_carat \n",
"0 2.43 1417.391304 \n",
"1 2.31 1552.380952 \n",
"2 2.31 1421.739130 \n",
"3 2.63 1151.724138 \n",
"4 2.75 1080.645161 \n",
"... ... ... \n",
"53938 3.74 3205.813953 \n",
"53939 3.64 3676.000000 \n",
"53940 3.49 3881.690141 \n",
"53941 3.43 3881.690141 \n",
"53942 3.47 3938.571429 \n",
"\n",
"[53943 rows x 12 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dropna()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"2. Заполнить пустые значения для определённого столбца"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"df.fillna({\"price\": df[\"price\"].mean()}, inplace=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Заполнение пустых значений"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. Заполнение средним значением"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"cut_mapping = {'Fair': 0, 'Good': 1, 'Very Good': 2, 'Premium': 3, 'Ideal': 4}\n",
"df['cut'] = df['cut'].map(cut_mapping)\n",
"\n",
"color_mapping = {'J': 0, 'I': 1, 'H': 2, 'G': 3, 'F': 4, 'E': 5, 'D': 6} \n",
"df['color'] = df['color'].map(color_mapping)\n",
"\n",
"clarity_mapping = {'I1': 0, 'SI2': 1, 'SI1': 2, 'VS2': 3, 'VS1': 4, 'VVS2': 5, 'VVS1': 6, 'IF': 7} \n",
"df['clarity'] = df['clarity'].map(clarity_mapping)\n",
"\n",
"df.fillna(df.mean(), inplace=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Визуализация данных с Pandas и Matplotlib"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. Линейная диаграмма (plot). Вес бриллиантов"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAHHCAYAAACRAnNyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABi3UlEQVR4nO3dd3hT1f8H8He6W+imhZZRyix7lCFbZYmIgBtBEQeiOEBRwQX4FeGnoogiIoioqMUBiOxZkJZZKFAKhQKFAoWWQvdOzu+P0pA0STOa5N6079fz9CG599xzTy5J7idnKoQQAkREREQy5CR1AYiIiIgMYaBCREREssVAhYiIiGSLgQoRERHJFgMVIiIiki0GKkRERCRbDFSIiIhIthioEBERkWwxUCEiIiLZYqBC5MCeeeYZNG3a1OJj69ata90Cmen69et45JFHEBgYCIVCgQULFph1fEpKChQKBVasWKHeNmvWLCgUCusWVIYUCgVmzZoldTGIbI6BCpGV/fHHH1AoFFizZo3Ovk6dOkGhUGDXrl06+5o0aYLevXvbo4hmKSgowKxZsxAdHW31vKdOnYotW7ZgxowZ+OWXX3DfffdZ/RxE5NgYqBBZWd++fQEAe/fu1dqek5ODhIQEuLi4ICYmRmtfamoqUlNT1ceaaunSpUhKSqpegY0oKCjA7NmzbRKo7Ny5EyNHjsS0adMwbtw4REREVDvP999/H4WFhVYoHRHJgYvUBSCqaUJDQxEeHq4TqOzbtw9CCDz66KM6+yqemxuouLq6Vq+wEktPT4efn59V83RxcYGLC7/aiGoK1qgQ2UDfvn1x9OhRrV/2MTExaNeuHYYNG4b9+/dDpVJp7VMoFOjTp49628qVKxEZGQlPT08EBATgiSeeQGpqqtZ59PVRyczMxFNPPQUfHx/4+flh/PjxOHbsmE5fjgpXrlzBqFGjULduXQQFBWHatGlQKpUAyvuABAUFAQBmz54NhUJhUt+I8+fP49FHH0VAQAC8vLxw1113YcOGDer9K1asgEKhgBACixYtUudblaysLDzzzDPw9fVVv66srCyddPr6qPz444+49957ERwcDHd3d7Rt2xaLFy/WObZp06Z44IEHEB0djW7dusHT0xMdOnRQ1yatXr0aHTp0gIeHByIjI3H06FGdPHbu3Il+/fqhTp068PPzw8iRI3Hq1Cm9ZUxOTsYzzzwDPz8/+Pr6YsKECSgoKNBKW1xcjKlTpyIoKAje3t548MEHcfnyZZ3z5ubmYsqUKWjatCnc3d0RHByMwYMH48iRI1VeVyK5Y6BCZAN9+/ZFaWkpDhw4oN4WExOD3r17o3fv3sjOzkZCQoLWvoiICAQGBgIA5syZg6effhotW7bEF198gSlTpmDHjh3o37+/3ptzBZVKhREjRuD333/H+PHjMWfOHKSlpWH8+PF60yuVSgwdOhSBgYH4/PPPMWDAAMyfPx/ff/89ACAoKEh9Qx89ejR++eUX/PLLL3jooYcMluH69evo3bs3tmzZgpdffhlz5sxBUVERHnzwQXW/nf79++OXX34BAAwePFidryFCCIwcORK//PILxo0bh48//hiXL182+LoqW7x4McLCwvDuu+9i/vz5aNy4MV5++WUsWrRIJ21ycjKefPJJjBgxAnPnzsWtW7cwYsQI/Prrr5g6dSrGjRuH2bNn49y5c3jssce0As7t27dj6NChSE9Px6xZs/DGG28gNjYWffr0QUpKis65HnvsMeTm5mLu3Ll47LHHsGLFCsyePVsrzfPPP48FCxZgyJAhmDdvHlxdXTF8+HCdvCZNmoTFixfj4Ycfxrfffotp06bB09NTJ0gicjiCiKzu5MmTAoD43//+J4QQorS0VNSpU0f89NNPQggh6tevLxYtWiSEECInJ0c4OzuLF154QQghREpKinB2dhZz5szRyvPEiRPCxcVFa/v48eNFWFiY+vnff/8tAIgFCxaotymVSnHvvfcKAOLHH3/UOhaA+Oijj7TO06VLFxEZGal+npGRIQCImTNnmvTap0yZIgCI//77T70tNzdXhIeHi6ZNmwqlUqneDkBMnjzZaJ5r164VAMSnn36q3lZWVib69eun87pmzpwpKn+1FRQU6OQ5dOhQ0axZM61tYWFhAoCIjY1Vb9uyZYsAIDw9PcXFixfV25csWSIAiF27dqm3de7cWQQHB4vMzEz1tmPHjgknJyfx9NNP65Tx2Wef1Tr/6NGjRWBgoPp5fHy8ACBefvllrXRPPvmkzv+Jr6+vSdeSyNGwRoXIBtq0aYPAwEB135Njx44hPz9fPaqnd+/e6g61+/btg1KpVPdPWb16NVQqFR577DHcuHFD/degQQO0bNlS74ihCps3b4arqyteeOEF9TYnJydMnjzZ4DGTJk3Set6vXz+cP3/eshcOYOPGjejRo4dWf5u6deti4sSJSElJQWJiokV5uri44KWXXlJvc3Z2xquvvmrS8Z6enurH2dnZuHHjBgYMGIDz588jOztbK23btm3Rq1cv9fOePXsCAO699140adJEZ3vFtUpLS0N8fDyeeeYZBAQEqNN17NgRgwcPxsaNG3XKpe/aZ2ZmIicnR/26AeC1117TSjdlyhSdvPz8/HDgwAFcvXrVwFUgckwMVIhsQKFQoHfv3uq+KDExMQgODkaLFi0AaAcqFf9W3NjPnj0LIQRatmyJoKAgrb9Tp04hPT3d4HkvXryIkJAQeHl5aW2vOG9lHh4e6j4oFfz9/XHr1i3LXvjtMrRu3Vpne5s2bdT7LckzJCREZ94XfefRJyYmBoMGDVL3GwkKCsK7774LADqBimYwAgC+vr4AgMaNG+vdXnGtKl6Xodd+48YN5OfnV3kuf39/nTydnJzQvHlzrXT6zvHpp58iISEBjRs3Ro8ePTBr1qxqBZxEcsGu8UQ20rdvX/z77784ceKEun9Khd69e+Ott97ClStXsHfvXoSGhqJZs2YAyvuZKBQKbNq0Cc7Ozjr5WnOSNn351zTnzp3DwIEDERERgS+++AKNGzeGm5sbNm7ciC+//FKrjwlg+JoY2i6EsLhs1szzscceQ79+/bBmzRps3boVn332Gf7v//4Pq1evxrBhwywuI5HUGKgQ2YjmfCoxMTFa1fWRkZFwd3dHdHQ0Dhw4gPvvv1+9r3nz5hBCIDw8HK1atTLrnGFhYdi1axcKCgq0alWSk5Mtfh3mzvIaFhamd26X06dPq/ebKywsDDt27EBeXp5WoGbKHDL//vsviouLsW7dOq0ajKqa0CxR8boMvfZ69eqhTp06ZuepUqlw7tw5rVoUQ687JCQEL7/8Ml5++WWkp6eja9eumDNnDgMVcmhs+iGykW7dusHDwwO//vorrly5olWj4u7ujq5du2LRokXIz8/X6s/x0EMPwdnZGbNnz9b5ZS2EQGZmpsFzDh06FKWlpVi6dKl6m0ql0ju6xVQVAU9Vo4003X///Th48CD27dun3pafn4/vv/8eTZs2Rdu2bc0uw/3334+ysjKtIcVKpRJff/210WMrai00r2V2djZ+/PFHs8tRlZCQEHTu3Bk//fST1rVKSEjA1q1btYJRU1UEGAsXLtTaXnmpAaVSqdOEFRwcjNDQUBQXF5t9XiI5YY0KkY24ubmhe/fu+O+//+Du7o7IyEit/b1798b8+fMBaE/01rx5c3z88ceYMWMGUlJSMGrUKHh7e+PChQtYs2YNJk6ciGnTpuk956hRo9CjRw+8+eabSE5ORkREBNatW4ebN28CML92BCjviNq2bVusWrUKrVq1QkBAANq3b4/27dvrTT99+nT8/vvvGDZsGF577TUEBATgp59+woULF/D333/Dycn830cjRoxAnz59MH36dKSkpKBt27ZYvXq1zs1ZnyFDhsDNzQ0jRozAiy++iLy8PCxduhTBwcFIS0szuyxV+eyzzzBs2DD06tULzz33HAoLC/H111/D19fXonV5OnfujDFjxuDbb79FdnY2evfujR07dujUkOXm5qJRo0Z45JFH0KlTJ9StWxfbt2/HoUOH1O8xIkfFGhUiG6oIQCqaejRVTO7
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"df.plot(x=\"id\", y=\"carat\", kind=\"line\")\n",
"\n",
"plt.xlabel(\"id\") \n",
"plt.ylabel(\"weight\")\n",
"plt.title(\"Weight of diamonds\")\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"2. Столбчатая диаграмма (bar). Соотношение цены и веса"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlQAAAHYCAYAAACLAnYXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABBGUlEQVR4nO3deVhWdf7/8dcNyCYCIrIVKu4rrpNhLliMaI5F+bM0U2tQxy6t1DJ1MnOpdDK30nTM1BZtm8pKHQ33DW00ceurowbZAli53OKCLPfvj76cL7eAIge87xufj+s613DOeZ9z3odRefU5H85tsdlsNgEAAKDM3BzdAAAAgKsjUAEAAJhEoAIAADCJQAUAAGASgQoAAMAkAhUAAIBJBCoAAACTCFQAAAAmEagAAABMIlABcGqPPfaY6tSp4+g2KoSZe3vsscfk5+dXvg0BKDMCFYAKtWzZMlksFmPx9vZWw4YNNWLECGVmZjq6vSI+/vhjWSwWff7550X2tWzZUhaLRZs2bSqyr1atWurQocPNaPGGXLx4UZMmTdLmzZsd3QpQqRGoANwUU6ZM0Xvvvad58+apQ4cOWrBggWJiYnTx4sVrHvfWW2/p6NGjN6lLqWPHjpKk7du32223Wq06dOiQPDw8tGPHDrt9P/74o3788Ufj2NK6Gfd28eJFTZ48mUAFVDAPRzcA4NbQo0cPtWvXTpI0ePBg1ahRQ7NmzdIXX3yhfv36Fam/cOGCqlatqipVqtzUPiMiIhQVFVUkUCUnJ8tms6lPnz5F9hWs32igutn3BqDiMEIFwCHuvvtuSVJqaqoxH+jEiRO69957Va1aNfXv319S8fOM8vPzNXfuXLVo0ULe3t6qWbOmunfvrj179tjVvf/++2rbtq18fHwUFBSkvn376scff7xubx07dtS+fft06dIlY9uOHTvUrFkz9ejRQ7t27VJ+fr7dPovForvuuuuGrl3cvf3+++8aMGCA/P39FRgYqEGDBmn//v2yWCxatmxZkV5//vlnJSQkyM/PTzVr1tSzzz6rvLw8SVJaWppq1qwpSZo8ebLx2HXSpEnX/R4AuDEEKgAOceLECUlSjRo1JEm5ubmKj49XSEiIXnvtNfXu3bvEYxMTEzVy5EhFRkbqH//4h8aNGydvb2/t2rXLqHn55Zc1cOBANWjQQLNmzdLIkSO1YcMGde7cWWfPnr1mbx07dlROTo52795tbNuxY4c6dOigDh066Ny5czp06JDdvsaNGxv3UtZr5+fnq1evXvrggw80aNAgvfzyy0pPT9egQYOKrc/Ly1N8fLxq1Kih1157TV26dNHMmTO1aNEiSVLNmjW1YMECSdIDDzyg9957T++9954efPDBa94/gDKwAUAFWrp0qU2Sbf369bZff/3V9uOPP9o+/PBDW40aNWw+Pj62n376yTZo0CCbJNu4ceOKHD9o0CBb7dq1jfWNGzfaJNmeeuqpIrX5+fk2m81mS0tLs7m7u9tefvllu/0HDx60eXh4FNl+tcOHD9sk2aZOnWqz2Wy2nJwcW9WqVW3vvPOOzWaz2UJDQ23z58+32Ww2m9Vqtbm7u9uGDBlyw9e++t4+/fRTmyTbnDlzjG15eXm2u+++2ybJtnTpUrtjJdmmTJlid53WrVvb2rZta6z/+uuvNkm2F1988Zr3DMAcRqgA3BRxcXGqWbOmIiMj1bdvX/n5+enzzz/XbbfdZtQ88cQT1z3Pp59+KovFohdffLHIPovFIkn67LPPlJ+fr4ceeki//fabsYSFhalBgwbF/pZeYU2aNFGNGjWMuVH79+/XhQsXjN/i69ChgzExPTk5WXl5ecb8KTPXXrt2rapUqaIhQ4YY29zc3DR8+PASjxk2bJjdeqdOnfT9999f8/4AlD8mpQO4KebPn6+GDRvKw8NDoaGhatSokdzc/u+/6Tw8PHT77bdf9zwnTpxQRESEgoKCSqw5duyYbDabGjRoUOz+gsngWVlZysrKMra7u7urZs2aslgs6tChg7Zu3ar8/Hzt2LFDISEhql+/vqQ/AtW8efMkyQhWBYGqtNcuzg8//KDw8HD5+vrabS+47tUK5o8VVr16dZ05c6bEawCoGAQqADfFHXfcYfyWX3G8vLzsApYZ+fn5slgs+ve//y13d/ci+wteiPnaa69p8uTJxvbatWsrLS1N0h8B6auvvtLBgweN+VMFOnTooDFjxujnn3/W9u3bFRERobp1697QtctDcecH4BgEKgAupV69elq3bp1Onz5d4ihVvXr1ZLPZFBUVpYYNG5Z4roEDB9q96sDHx8f4uvD7qHbs2KGRI0ca+9q2bSsvLy9t3rxZu3fv1r333nvD1y5O7dq1tWnTJl28eNFulOr48eM3dJ7CCh6DAqhYzKEC4FJ69+4tm81mN7JUwGazSZIefPBBubu7a/Lkyca2wjW///67JKlu3bqKi4szlsKvPWjXrp28vb21fPly/fzzz3YjVF5eXmrTpo3mz5+vCxcu2IWy0l67OPHx8crJydFbb71lbMvPz9f8+fNL860pVkEwu95vNgIwhxEqAC6la9euGjBggF5//XUdO3ZM3bt3V35+vrZt26auXbtqxIgRqlevnl566SWNHz9eaWlpSkhIULVq1ZSamqrPP/9cQ4cO1bPPPnvN63h6eupPf/qTtm3bJi8vL7Vt29Zuf4cOHTRz5kxJ9i/0NHPthIQE3XHHHXrmmWd0/PhxNW7cWF9++aVOnz4tqWyjTT4+PmratKk++ugjNWzYUEFBQWrevLmaN29+w+cCUDJGqAC4nKVLl2rGjBlKTU3VmDFj9Morr+jSpUt2o0jjxo3Tp59+Kjc3N02ePFnPPvusvvzyS3Xr1k333Xdfqa5TEJQKHvEVVjCaVa1aNbVs2dJuX1mv7e7urtWrV+vhhx/WO++8o+eff14RERHGCJW3t3ep+r7a4sWLddttt2nUqFHq16+f/vWvf5XpPABKZrFdPSYNAHAqK1eu1AMPPKDt27fbPZYE4DwIVADgRC5dumQ3OT4vL0/dunXTnj17lJGRYbcPgPNgDhUAOJEnn3xSly5dUkxMjLKzs/XZZ59p586deuWVVwhTgBNjhAoAnMiKFSs0c+ZMHT9+XJcvX1b9+vX1xBNPaMSIEY5uDcA1EKgAAABM4rf8AAAATCJQAQAAmMSk9HKSn5+vX375RdWqVeOjHgAAcBE2m03nz59XRESEqc8TJVCVk19++UWRkZGObgMAAJTBjz/+qNtvv73MxxOoykm1atUk/fF/iL+/v4O7AQAApWG1WhUZGWn8HC8rAlU5KXjM5+/vT6ACAMDFmJ2uw6R0AAAAkwhUAAAAJhGoAAAATGIOFQAATio/P19XrlxxdBsurUqVKnJ3d6/w6xCoAABwQleuXFFqaqry8/Md3YrLCwwMVFhYWIW+J5JABQCAk7HZbEpPT5e7u7siIyNNvXDyVmaz2XTx4kWdOnVKkhQeHl5h1yJQAQDgZHJzc3Xx4kVFRETI19fX0e24NB8fH0nSqVOnFBISUmGP/4i8AAA4mby8PEmSp6engzupHApCaU5OToVdg0AFAICT4rNhy8fN+D4SqAAAAEwiUAEAAIdJS0uTxWJRSkqKo1sxhUnpAAC4iDrjVt/U66VN71nh14iMjFR6erqCg4Mr/FoViUAFAAAc4sqVK/L09FRYWJijWzGNR34AAKBcxMbGasSIERoxYoQCAgIUHBysF154QTabTZJUp04dTZ06VQMHDpS/v7+GDh1a7CO/w4cP6y9/+Yv8/f1VrVo1derUSSdOnDD2L168WE2aNJG3t7caN26sN99882bfahGMUAEAgHLzzjvvKDExUd9884327NmjoUOHqlatWhoyZIgk6bXXXtPEiRP14osvFnv8zz//rM6dOys2NlYbN26Uv7+/duzYodzcXEnS8uXLNXHiRM2bN0+tW7fWvn37NGTIEFWtWlWDBg26afd5NQIVAAAoN5GRkZo9e7YsFosaNWqkgwcPavbs2Uaguvvuu/XMM88Y9WlpaXbHz58/XwEBAfrwww9VpUoVSVLDhg2N/S+++KJmzpy
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df.plot(x=\"carat\", y=\"price\", kind=\"bar\")\n",
"\n",
"plt.xlabel(\"weight\") \n",
"plt.ylabel(\"price\")\n",
"plt.title(\"Price-Weight\")\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"3. Гистограмма (hist). Частота встречаемости по глубине"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAHHCAYAAACiOWx7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABKUklEQVR4nO3deXyNd97/8fdJOCE4sWZRQSxFaqsgMsVUpYK0U6VTSis02h8TBmltU7W001LutnRs7XQqOqXFTKutDEpsU1JLNLW0UjRttGRRkkMQJNfvj965bqcJLmk4Ea/n43E9btd1fc51Pt9zzel539cWm2EYhgAAAHBVHu5uAAAA4FZAaAIAALCA0AQAAGABoQkAAMACQhMAAIAFhCYAAAALCE0AAAAWEJoAAAAsIDQBAABYQGgCADc7dOiQevToIR8fH9lsNq1ateq6Xr9582bZbDZt3rz5hvR3LdOmTZPNZtOJEyfc8v7AzUJoAm4jcXFxstlsxU4TJ050d3u3raioKO3bt08vvfSS/vnPf6p9+/bubqlYL7/88nUHOqA8qeDuBgDcfC+88IKCgoJclrVs2dJN3dzezp07p8TERD333HMaOXKku9u5qpdfflmPPPKI+vTp4+5WALcgNAG3oV69elk+mnH+/HnZ7XZ5eHBg+kbIysqSJFWvXt29jQC4Jv4rCMBUeG3MBx98oMmTJ+uOO+6Qt7e3nE6nJGnHjh3q2bOnfHx85O3trd///vfatm1bke18/vnn6tChgypVqqTGjRvrzTffNK97KfT999/LZrMpLi6uyOttNpumTZvmsuynn37Sk08+KT8/P3l5eemuu+7SO++8U2z/K1as0EsvvaR69eqpUqVK6t69uw4fPlzkfXbs2KHevXurRo0aqlKlilq3bq25c+dKkhYvXiybzaYvv/yyyOtefvlleXp66qeffrrq5/nll1+qV69ecjgcqlq1qrp3764vvvjCXD9t2jQ1aNBAkjRu3DjZbDY1bNjwqtv88ccf1adPH1WpUkW+vr4aO3as8vLyiq21sr8K98vBgwf16KOPyuFwqFatWho9erTOnz9v1tlsNuXm5mrJkiXmKd0hQ4a4bCs7O1tDhgxR9erV5ePjo6FDh+rs2bNXHQ9wK+FIE3AbysnJKXLRbu3atc1/v/jii7Lb7Xr22WeVl5cnu92ujRs3qlevXgoJCdHUqVPl4eGhxYsX67777tN///tfdezYUZK0b98+9ejRQ3Xq1NG0adN06dIlTZ06VX5+fiXuNyMjQ506dZLNZtPIkSNVp04drVmzRtHR0XI6nRozZoxL/cyZM+Xh4aFnn31WOTk5mjVrlgYNGqQdO3aYNevXr9cDDzyggIAAjR49Wv7+/vrmm2+0evVqjR49Wo888ohiYmK0dOlS3X333S7bX7p0qe69917dcccdV+z5wIED6tKlixwOh8aPH6+KFSvqzTff1L333qstW7YoNDRUffv2VfXq1TV27Fg99thj6t27t6pWrXrFbZ47d07du3dXWlqa/vznP6tu3br65z//qY0bNxaptbq/Cj366KNq2LChZsyYoS+++EJvvPGGTp06pXfffVeS9M9//lPDhg1Tx44d9fTTT0uSGjduXGQbQUFBmjFjhvbs2aO3335bvr6+euWVV644JuCWYgC4bSxevNiQVOxkGIaxadMmQ5LRqFEj4+zZs+brCgoKjKZNmxoRERFGQUGBufzs2bNGUFCQcf/995vL+vTpY1SqVMn44YcfzGVff/214enpaVz+n5zU1FRDkrF48eIifUoypk6das5HR0cbAQEBxokTJ1zqBgwYYPj4+Ji9FvbfokULIy8vz6ybO3euIcnYt2+fYRiGcenSJSMoKMho0KCBcerUKZdtXj6+xx57zKhbt66Rn59vLtuzZ88V+75cnz59DLvdbhw5csRcduzYMaNatWpG165di3wOs2fPvur2DMMw5syZY0gyVqxYYS7Lzc01mjRpYkgyNm3aZI7B6v6aOnWqIcn4wx/+4PJef/rTnwxJxldffWUuq1KlihEVFVWkr8JtPPnkky7LH374YaNWrVrXHBdwq+D0HHAbmj9/vtavX+8yXS4qKkqVK1c255OTk3Xo0CENHDhQP//8s06cOKETJ04oNzdX3bt319atW1VQUKD8/HytW7dOffr0Uf369c3Xt2jRQhERESXq1TAM/fvf/9aDDz4owzDM9z5x4oQiIiKUk5OjPXv2uLxm6NChstvt5nyXLl0kSd99952kX06bpaamasyYMUWuJbr8FOLgwYN17Ngxbdq0yVy2dOlSVa5cWf369btiz/n5+frss8/Up08fNWrUyFweEBCggQMH6vPPPzdPeV6P//znPwoICNAjjzxiLvP29jaP/BSyur8uFxMT4zI/atQo8z2tGj58uMt8ly5d9PPPP5dorEBZxOk54DbUsWPHq14I/us76w4dOiTplzB1JTk5OcrLy9O5c+fUtGnTIuubNWt2XT/AhbKyspSdna233npLb731VrE1mZmZLvOXBzZJqlGjhiTp1KlTkqQjR45IuvYdg/fff78CAgK0dOlSde/eXQUFBXr//ff10EMPqVq1alft+ezZs2rWrFmRdS1atFBBQYGOHj2qu+6666rv/2s//PCDmjRp4hLsJBV5H6v7q/BzkVRknzVu3FgeHh76/vvvLfd3tc/d4XBY3g5QVhGaABRx+VEmSeZRidmzZ6tt27bFvqZq1apXvCC5OL/+4S+Un59f7Hs//vjjVwwBrVu3dpn39PQsts4wDMv9FW5n4MCB+vvf/64FCxZo27ZtOnbsmB5//PHr2s7NZnV/Xc2V9s/VlNbnDpRVhCYA11R4wa/D4VB4ePgV6+rUqaPKlSubRzoul5KS4jJfeBQiOzvbZfkPP/xQZJvVqlVTfn7+Vd/7ehSOZ//+/dfc5uDBg/Xqq6/q008/1Zo1a1SnTp1rnmqsU6eOvL29i4xZkg4ePCgPDw8FBgZed98NGjTQ/v37ZRiGS6j59ftY3V+XO3TokMsRxsOHD6ugoMDlbr6SBCmgPOGaJgDXFBISosaNG+t//ud/dObMmSLrC5815OnpqYiICK1atUppaWnm+m+++Ubr1q1zeY3D4VDt2rW1detWl+ULFixwmff09FS/fv3073//W/v377/ie1+Pdu3aKSgoSHPmzCkS2n59VKR169Zq3bq13n77bf373//WgAEDVKHC1f//TU9PT/Xo0UMff/yxy+mtjIwMLVu2TJ07dy7R6arevXvr2LFj+te//mUuO3v2bJHTllb31+Xmz5/vMv+3v/1N0i/P9CpUpUqVIp8XcDvhSBOAa/Lw8NDbb7+tXr166a677tLQoUN1xx136KefftKmTZvkcDj06aefSpKmT5+utWvXqkuXLvrTn/6kS5cu6W9/+5vuuusu7d2712W7w4YN08yZMzVs2DC1b99eW7du1bffflvk/WfOnKlNmzYpNDRUTz31lIKDg3Xy5Ent2bNHGzZs0MmTJ697PAsXLtSDDz6otm3baujQoQoICNDBgwd14MCBIgFv8ODBevbZZyXJ8qm5v/71r1q/fr06d+6sP/3pT6pQoYLefPNN5eXladasWdfVb6GnnnpK8+bN0+DBg5WUlKSAgAD985//lLe3d5HxWd1fhVJTU/WHP/xBPXv2VGJiot577z0NHDhQbdq0MWtCQkK0YcMGvfbaa6pbt66CgoIUGhpaorEAtyR33roH4OYqfOTArl27il1feMv+ypUri13/5ZdfGn379jVq1apleHl5GQ0aNDAeffRRIyEhwaVuy5YtRkhIiGG3241GjRoZixYtMm9Lv9zZs2eN6Ohow8fHx6hWrZrx6KOPGpmZmUUeOWAYhpGRkWHExMQYgYGBRsWKFQ1/f3+je/fuxltvvXXN/q/0eIPPP//cuP/++41q1aoZVapUMVq3bm387W9/KzLu48ePG56ensadd95Z7OdyJXv27DEiIiKMqlWrGt7e3ka3bt2M7du3F9ublUcOGIZh/PDDD8Yf/vAHw9vb26hdu7YxevRoY+3atS6PHChkZX8V7pevv/7
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df[\"depth\"].plot(kind=\"hist\")\n",
"\n",
"plt.xlabel(\"depth\") \n",
"plt.ylabel(\"Frequency\") \n",
"plt.title(\"Frequency of depth\") \n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"4. Ящик с усами (box). Вес"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAGzCAYAAAABsTylAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA6mElEQVR4nO3de1yUZf7/8feAchAFT6DgCRRPKWaZmQdUsq01TYnY2qz1XJuVpVkmHVSstE1dNSvL2rLtpKWorce0UjEty9SkrMBAyUgRk5OKMnP//ujHfJvAZGzwvmVez8eDx+5c93XPfAaMeXPd13XdNsMwDAEAAFiQj9kFAAAAnA1BBQAAWBZBBQAAWBZBBQAAWBZBBQAAWBZBBQAAWBZBBQAAWBZBBQAAWBZBBQAAWBZBBbCYrKws2Ww2LVq0yOxSXKxbt06dO3dWQECAbDabjh8/7tb5U6dOlc1mc2mLjIzU8OHDPVekBS1atEg2m01ZWVmm1jF8+HBFRkae97m1a9f2bEFAJRFUUG2VfUD89issLExxcXFau3btBa9n06ZNLrXUrFlTLVu21NChQ/XDDz945DW2bdumqVOnuh0iziUvL08333yzAgMD9fzzz+uNN95QUFCQR18DF78TJ05o6tSp2rRpk9mloBqpYXYBQFWbNm2aoqKiZBiGDh8+rEWLFun666/X//73Pw0cOPCC13Pfffepa9euOnPmjL788kstXLhQq1ev1t69exUREfGnnnvbtm1KTk7W8OHDVbduXc8ULOnzzz9XYWGhnnjiCV1zzTUee97vvvtOPj78vXQhvPzyy3I4HFX6GidOnFBycrIkqW/fvlX6WvAeBBVUe/3799cVV1zhfDxq1Cg1atRI77zzjilBJTY2VomJiZKkESNGqE2bNrrvvvv0+uuvKykp6YLXUxlHjhyRJI+GH0ny9/f36PPh7GrWrGl2CcB54U8ZeJ26desqMDBQNWq45vTi4mJNmDBBzZo1k7+/v9q2batZs2ap7AbjJ0+eVLt27dSuXTudPHnSed6xY8cUHh6uHj16yG63u13P1VdfLUnKzMz8w34fffSRYmNjFRQUpLp162rw4MHat2+f8/jUqVP10EMPSZKioqKcl5jONTfivffeU5cuXRQYGKiGDRvq9ttv16FDh5zH+/btq2HDhkmSunbtKpvNds55JVu3blXXrl0VEBCgVq1a6aWXXqqw3+/nqBw7dkwPPvigYmJiVLt2bQUHB6t///7as2ePy3lll9HeffddJScnq0mTJqpTp44SExOVn5+vkpISjRs3TmFhYapdu7ZGjBihkpISl+coLS3VE088oVatWsnf31+RkZF65JFHyvWLjIzUwIEDtXXrVl155ZUKCAhQy5Yt9d///rfc+/n666919dVXKzAwUE2bNtWTTz5ZqVGM999/XzabTV999ZWzbdmyZbLZbEpISHDp2759e91yyy0ubW+++abzZ1i/fn39/e9/V3Z2tkufiuao5OXl6R//+IeCg4NVt25dDRs2THv27DnrHKlDhw4pPj5etWvXVmhoqB588EHnv/msrCyFhoZKkpKTk53//qZOnXrO9w/8EUZUUO3l5+fr6NGjMgxDR44c0fz581VUVKTbb7/d2ccwDA0aNEgff/yxRo0apc6dO2v9+vV66KGHdOjQIc2ZM0eBgYF6/fXX1bNnTz366KP697//LUm65557lJ+fr0WLFsnX19ft+vbv3y9JatCgwVn7bNy4Uf3791fLli01depUnTx5UvPnz1fPnj315ZdfKjIyUgkJCfr+++/1zjvvaM6cOWrYsKEkOT88KrJo0SKNGDFCXbt21YwZM3T48GHNmzdPn3zyiXbt2qW6devq0UcfVdu2bbVw4ULnZbRWrVqd9Tn37t2ra6+9VqGhoZo6dapKS0s1ZcoUNWrU6Jzfix9++EErVqzQ3/72N0VFRenw4cN66aWX1KdPH33zzTflLo3NmDFDgYGBmjRpkjIyMjR//nzVrFlTPj4++uWXXzR16lR9+umnWrRokaKiojR58mTnuaNHj9brr7+uxMRETZgwQZ999plmzJihffv2afny5S6vk5GRocTERI0aNUrDhg3Tq6++quHDh6tLly7q0KGDJOnnn39WXFycSktLNWnSJAUFBWnhwoUKDAw85/vu1auXbDabtmzZok6dOkmSUlNT5ePjo61btzr75ebm6ttvv9W9997rbHvqqaf0+OOP6+abb9bo0aOVm5ur+fPnq3fv3s6fYUUcDoduuOEG7dixQ2PGjFG7du20cuVKZyj9Pbvdruuuu07dunXTrFmztHHjRs2ePVutWrXSmDFjFBoaqgULFmjMmDG68cYbnQGr7P0A580AqqnXXnvNkFTuy9/f31i0aJFL3xUrVhiSjCeffNKlPTEx0bDZbEZGRoazLSkpyfDx8TG2bNlivPfee4YkY+7cuees5+OPPzYkGa+++qqRm5tr/PTTT8bq1auNyMhIw2azGZ9//rlhGIaRmZlpSDJee+0157mdO3c2wsLCjLy8PGfbnj17DB8fH2Po0KHOtpkzZxqSjMzMzHPWc/r0aSMsLMzo2LGjcfLkSWf7qlWrDEnG5MmTnW1l38uyGv9IfHy8ERAQYBw4cMDZ9s033xi+vr7G73/ltGjRwhg2bJjz8alTpwy73e7SJzMz0/D39zemTZvmbCv7Xnbs2NE4ffq0s/3WW281bDab0b9/f5fn6N69u9GiRQvn4927dxuSjNGjR7v0e/DBBw1JxkcffeRSoyRjy5YtzrYjR44Y/v7+xoQJE5xt48aNMyQZn332mUu/kJCQSv1MOnToYNx8883Ox5dffrnxt7/9zZBk7Nu3zzAMw0hJSTEkGXv27DEMwzCysrIMX19f46mnnnJ5rr179xo1atRwaR82bJjL92DZsmXl/u3a7Xbj6quvLvfvb9iwYYYkl5+BYRjGZZddZnTp0sX5ODc315BkTJky5Q/fK+AOLv2g2nv++ee1YcMGbdiwQW+++abi4uI0evRopaSkOPusWbNGvr6+uu+++1zOnTBhggzDcFklNHXqVHXo0EHDhg3T3XffrT59+pQ774+MHDlSoaGhioiI0IABA1RcXKzXX3/dZR7Nb+Xk5Gj37t0aPny46tev72zv1KmT/vKXv2jNmjWVfu3f+uKLL3TkyBHdfffdCggIcLYPGDBA7dq10+rVq91+TrvdrvXr1ys+Pl7Nmzd3trdv317XXXfdOc/39/d3Tq612+3Ky8tT7dq11bZtW3355Zfl+g8dOtRl7kW3bt1kGIZGjhzp0q9bt27Kzs5WaWmpJDm/Zw888IBLvwkTJkhSufd+ySWXKDY21vk4NDRUbdu2dVmttWbNGl111VW68sorXfrddttt53zf0q9zl1JTUyVJhYWF2rNnj+688041bNjQ2Z6amqq6deuqY8eOkqSUlBQ5HA7dfPPNOnr0qPOrcePGat26tT7++OOzvt66detUs2ZN3XHHHc42Hx8f3XPPPWc956677ipXs6dWrAFnw6UfVHtXXnmlSwi49dZbddlll+nee+/VwIED5efnpwMHDigiIkJ16tRxObd9+/aSpAMHDjjb/Pz89OqrrzrnYLz22mvl9gf5I5MnT1ZsbKx8fX3VsGFDtW/fvtx8md8qe+22bduWO9a+fXutX79excXFbi8X/qPnbdeuncslh8rKzc3VyZMn1bp163LH2rZte85Q5XA4NG/ePL3wwgvKzMx0mfNT0aWx34YhSQoJCZEkNWvWrFy7w+FQfn6+GjRooAMHDsjHx0fR0dEu/Ro3bqy6deu6/Lwreh1Jqlevnn755Rfn4wMHDqhbt27l+lX0/a1IbGysXnzxRWVkZGj//v2y2Wzq3r27M8DccccdSk1NVc+ePZ1hLj09XYZhVPj9lv54Au2BAwcUHh6uWrVqubT//ntSJiAgoNxlxN9/D4CqQFCB1/Hx8VFcXJzmzZun9PR05xwDd6xfv16SdOrUKaWnpysqKqrS58bExHh0iW91Mn36dD3++OMaOXKknnjiCdWvX18+Pj4aN25chZNSzzYn6Gztxv+fGF2
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df[\"carat\"].plot(kind=\"box\")\n",
"\n",
"plt.ylabel(\"weight (carat)\") \n",
"plt.title(\"Box Plot of diamond weight\") \n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"5. Диаграмма с областями (area). "
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAHHCAYAAACiOWx7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABT2UlEQVR4nO3deVhU9eIG8HfYQQVUZCtCUkPNLTWRSs3kisutuNmvUlNL1Cw1FTO1a64VXs0tM81M6ZbmdtVKTUXcFVFRREARkEWUARdg2GFmvr8/upzrCMqAwJkZ3s/zzPMw53znzHtO3MvrOWe+oxBCCBARERHRI5nJHYCIiIjIGLA0EREREemBpYmIiIhIDyxNRERERHpgaSIiIiLSA0sTERERkR5YmoiIiIj0wNJEREREpAeWJiIiIiI9sDQRUQXvvfceWrZsWa/vOW/ePCgUCty5c6dO3+fll1/Gyy+/XOW4o0ePQqFQ4OjRozV6n/L9uV/Lli3x3nvv1Wh7xiIkJAQKhQIpKSlyRyGqdSxNRCam/I9W+cPGxgbPPPMMJk6ciMzMTLnjEREZLQu5AxBR3ViwYAG8vLxQXFyMkydPYs2aNdi3bx9iYmJgZ2f3yNf+8MMP0Gq19ZS0fh08eFC2946Pj4eZGf+tSmSsWJqITNTAgQPRvXt3AMCYMWPQvHlzLFu2DL/99huGDh1a6WsKCgrQqFEjWFpa1mfUemVlZSXbe1tbW8v23kT0+PhPHqIG4pVXXgEAJCcnA/jrvqXGjRsjKSkJgwYNQpMmTTB8+HBp3YP3NGm1WqxcuRIdO3aEjY0NWrRogQEDBuD8+fM643755Rd069YNtra2aNasGd555x3cuHFD75x37tzBW2+9BXt7ezRv3hyTJ09GcXGxtL5Pnz7o3Llzpa/19vaGv7//I7df2T1N6enpCAgIQKNGjeDs7IypU6eipKRE78wnT57E888/DxsbG7Rq1Qrff/99peMevKfp3r17+OSTT9CxY0c0btwY9vb2GDhwIC5duqTzuvL7q7Zt24b58+fjiSeeQJMmTfDmm28iNzcXJSUlmDJlCpydndG4cWO8//77FfKr1WosXLgQrVq1grW1NVq2bInPPvuswriWLVvi73//O06ePIkePXrAxsYGTz/9NP79739X2J/Y2Fi88sorsLW1xZNPPokvvvii0jOU58+fh7+/P5ycnGBrawsvLy+MHj1a38NLZDB4pomogUhKSgIANG/eXFqmVqvh7++Pl156CV9//fUjL9sFBgYiJCQEAwcOxJgxY6BWq3HixAmcOXNGOqP15Zdf4vPPP8dbb72FMWPG4Pbt21i1ahV69+6NixcvwtHRscqcb731Flq2bIng4GCcOXMG33zzDbKzs6U/2iNGjMDYsWMRExODDh06SK87d+4crl27htmzZ1fruBQVFaFfv35IS0vDxx9/DHd3d/z88884fPiwXq+/fPky+vfvjxYtWmDevHlQq9WYO3cuXFxcqnzt9evXsXv3bvzf//0fvLy8kJmZie+//x59+vRBXFwc3N3ddcYHBwfD1tYWM2fORGJiIlatWgVLS0uYmZkhOzsb8+bNw5kzZxASEgIvLy/MmTNHeu2YMWPw008/4c0338S0adMQERGB4OBgXLlyBbt27dJ5n8TERLz55psIDAzEqFGjsGHDBrz33nvo1q0bnn32WQCAUqlE3759oVarMXPmTDRq1Ajr1q2Dra2tzraysrKk4zNz5kw4OjoiJSUFO3fu1Ov4EhkUQUQmZePGjQKAOHTokLh9+7a4ceOG2LJli2jevLmwtbUV6enpQgghRo0aJQCImTNnVtjGqFGjhKenp/T88OHDAoD4+OOPK4zVarVCCCFSUlKEubm5+PLLL3XWX758WVhYWFRY/qC5c+cKAOK1117TWf7RRx8JAOLSpUtCCCFycnKEjY2NmDFjhs64jz/+WDRq1Ejk5+c/8n369Okj+vTpIz1fsWKFACC2bdsmLSsoKBCtW7cWAMSRI0ceub2AgABhY2MjUlNTpWVxcXHC3NxcPPh/sZ6enmLUqFHS8+LiYqHRaHTGJCcnC2tra7FgwQJp2ZEjRwQA0aFDB1FaWiotHzp0qFAoFGLgwIE62/D19dX57xcVFSUAiDFjxuiM++STTwQAcfjwYZ2MAMTx48elZVlZWcLa2lpMmzZNWjZlyhQBQEREROiMc3BwEABEcnKyEEKIXbt2CQDi3LlzFY4dkbHh5TkiE+Xn54cWLVrAw8MD77zzDho3boxdu3bhiSee0Bn34YcfVrmt//znP1AoFJg7d26FdeUfq9+5cye0Wi3eeust3LlzR3q4urqiTZs2OHLkiF65J0yYoPN80qRJAIB9+/YBABwcHPD666/j119/hRACAKDRaLB161bpElt17Nu3D25ubnjzzTelZXZ2dhg3blyVr9VoNDhw4AACAgLw1FNPScvbtWtX5WVC4K97nMpvDNdoNLh79y4aN24Mb29vXLhwocL4kSNH6txv5uPjAyFEhUtdPj4+uHHjBtRqtbSPABAUFKQzbtq0aQCAvXv36ixv3749evXqJT1v0aIFvL29cf36dWnZvn370LNnT/To0UNnXPkl3nLlZxf37NmDsrKyRxwNIsPH0kRkolavXo3Q0FAcOXIEcXFxuH79eoU/5BYWFnjyySer3FZSUhLc3d3RrFmzh45JSEiAEAJt2rRBixYtdB5XrlxBVlaWXrnbtGmj87xVq1YwMzPTmfdn5MiRSEtLw4kTJwAAhw4dQmZmJkaMGKHXe9wvNTUVrVu3rjCnkre3d5WvvX37NoqKiipk1vf1Wq0Wy5cvR5s2bWBtbQ0nJye0aNEC0dHRyM3NrTD+/mIG/FUgAcDDw6PCcq1WK20jNTUVZmZmaN26tc44V1dXODo6IjU19ZHvAwBNmzZFdna29Dw1NVWv/e7Tpw+GDBmC+fPnw8nJCa+//jo2btxYrXvGiAwF72kiMlE9evSQ7jV6mPvPdDwurVYLhUKBP//8E+bm5hXWN27cuEbbfbDMAIC/vz9cXFzwyy+/oHfv3vjll1/g6uoKPz+/Gr2HXL766it8/vnnGD16NBYuXIhmzZrBzMwMU6ZMqfSG6sqO66OWl5+JK1fZsXyc7elDoVBgx44dOHPmDP744w8cOHAAo0ePxtKlS3HmzJka/14QyYGliYiq1KpVKxw4cAD37t176NmmVq1aQQgBLy8vPPPMMzV+r4SEBHh5eUnPExMTodVqdT7NZ25ujmHDhiEkJAT/+te/sHv3bowdO/ahf+wfxdPTEzExMRBC6JSK+Pj4Kl/bokUL2NraIiEhocI6fV6/Y8cO9O3bFz/++KPO8pycHDg5OemRXj+enp7QarVISEhAu3btpOWZmZnIycmBp6dnjbZZnf3u2bMnevbsiS+//BKbN2/G8OHDsWXLFowZM6ba700kF16eI6IqDRkyBEIIzJ8/v8K68rMPb7zxBszNzTF//vwKZySEELh7965e77V69Wqd56tWrQLw17xT9xsxYgSys7PxwQcfID8/H++++67e+3O/QYMG4datW9ixY4e0rLCwEOvWravytebm5vD398fu3buRlpYmLb9y5QoOHDig1+sfPFbbt2/HzZs3q7EHVRs0aBAAYMWKFTrLly1bBgAYPHhwjbZ55swZnD17Vlp2+/ZtbNq0SWdcdnZ2hX3s0qULAPASHRkdnmkioir17dsXI0aMwDfffIOEhAQMGDAAWq0WJ06cQN++fTFx4kS0atUKX3zxBWbNmoWUlBQEBASgSZMmSE5Oxq5duzBu3Dh88sknVb5XcnIyXnvtNQwYMADh4eH45ZdfMGzYsApzMz333HPo0KEDtm/fjnbt2qFr16412rexY8fi22+/xciRIxEZGQk3Nzf8/PPPVc6aXm7+/PnYv38/evXqhY8++ghqtRqrVq3Cs88+i+jo6Ee+9u9//zsWLFiA999/Hy+88AIuX76MTZs24emnn67RvjxM586dMWrUKKxbtw45OTno06cPzp49i59++gkBAQHo27dvtbf56aef4ueff8aAAQMwefJkacoBT09
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df.plot(x=\"id\", y=\"price\", kind=\"area\")\n",
"\n",
"plt.xlabel(\"id\") \n",
"plt.ylabel(\"price\")\n",
"plt.title(\"Price by id diamonds\")\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"6. Диаграмма рассеяния (scatter). Зависимость цены от веса"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAHHCAYAAACiOWx7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACUAUlEQVR4nO3deVxVZf4H8M8F2S7rZRNRZFE09xUVFcUy0ZyKbFq0cklzLJfSnMzJKW2zyZw0Nf1VpmOjVs6UNdpmhiuouKCiRSjgjiACV0CR5fz+oHu9+z13X/i8Xy9edc8995znHJDz5Xm+z/eRCIIggIiIiIgM8nB0A4iIiIhcAYMmIiIiIhEYNBERERGJwKCJiIiISAQGTUREREQiMGgiIiIiEoFBExEREZEIDJqIiIiIRGDQRERERCQCgyaiP+zatQsSiQT/+c9/HHr+Xbt2OeT81uZu1yPG1atX8ec//xlhYWGQSCRYtmyZTc83ceJExMXF2fQc7sqSezdx4kQEBARYt0HkElo4ugFEtiSRSETtl5GRYeOWUHMwe/Zs/Pjjj3jttdcQFRWFvn37OrpJ5EA1NTV49913kZqaitTUVEc3h6yAQRO5tc8++0zt9YYNG7Bjxw6t7Z06dcKvv/5qz6aRG/rll1/w4IMPYu7cuXY538cff4zGxka7nMvd2OPe1dTUYNGiRQDAoMlNMGgit/bkk0+qvT5w4AB27NihtR0AgyayWElJCUJCQmx+nurqavj7+8PLy8vm53JXvHdkDuY0EWlobGzEW2+9hTZt2sDX1xf33HMPzpw5o7XfwYMHMXLkSAQHB0MqlWLo0KHYv3+/qHNcvHgR6enp8Pf3R2RkJGbPno3a2lqd+4o5z8KFCyGRSPDbb7/h0UcfRVBQEMLCwvD888/j1q1bWsf897//jT59+sDPzw+hoaF4/PHHceHCBbV9UlNT0bVrV5w+fRrDhg2DVCpF69at8e6779rtes6cOYOJEyciJCQEwcHBmDRpEmpqanReT79+/SCVSiGTyTBkyBD89NNPavt8//33SElJgb+/PwIDAzF69GicOnVKZxs1FRQU4JFHHkFoaCikUikGDBiA7du3K99fv349JBIJBEHAqlWrIJFIDA4NFxUVQSKR4L333sP777+P2NhY+Pn5YejQocjNzVXbV5E/c/bsWdx3330IDAzEE088oXxPMy+nsbERy5cvR7du3eDr64uIiAiMHDkShw8f1rpnxn4G9Dl27BhGjRqFoKAgBAQE4J577sGBAwfU9lHck/3792POnDmIiIiAv78/HnroIZSWlho8/rfffguJRIITJ04ot/33v/+FRCLBmDFj1Pbt1KkTHnvsMZOvTde9Kysrw1NPPYWgoCCEhIRgwoQJOH78OCQSCdavX6/VzkuXLiE9PR0BAQGIiIjA3Llz0dDQAKDpexwREQEAWLRokfJnYuHChQavnZycQNSMTJ8+XdD3Y5+RkSEAEHr16iX06dNHeP/994WFCxcKUqlU6Nevn9q+O3fuFLy9vYXk5GRh6dKlwvvvvy90795d8Pb2Fg4ePGiwDTU1NUKHDh0EX19f4aWXXhKWLVsm9OnTR+jevbsAQMjIyDD5PK+99poAQOjWrZtw//33CytXrhSefPJJAYDw1FNPqZ3/zTffFCQSifDYY48JH374obBo0SIhPDxciIuLE8rLy5X7DR06VIiOjhZiYmKE559/Xvjwww+Fu+++WwAgfPfdd3a5nl69egljxowRPvzwQ2HKlCkCAOGll15Su56FCxcKAISBAwcKS5YsEZYvXy6MGzdOmDdvnnKfDRs2CBKJRBg5cqSwYsUK4R//+IcQFxcnhISECIWFhQa/X8XFxULLli2FwMBA4ZVXXhH++c9/Cj169BA8PDyEr776ShAEQTh79qzw2WefCQCEe++9V/jss8+Ezz77TO8xCwsLld+vuLg44R//+IewaNEiITQ0VIiIiBCKi4uV+06YMEHw8fER2rVrJ0yYMEFYs2aNsGHDBuV7sbGxaseeOHGiAEAYNWqUsGzZMuG9994THnzwQWHFihXKfcT+DOiSm5sr+Pv7C61atRLeeOMN4Z133hHi4+MFHx8f4cCBA8r91q1bp/we3n333cKKFSuEF198UfD09BQeffRRg+coKysTJBKJWpuff/55wcPDQ4iIiFBuKykpEQAIK1euNPnaNO9dQ0ODkJycLHh6egozZswQVq5cKdx7771Cjx49BADCunXr1D7r6+srdOnSRXj66aeF1atXCw8//LAAQPjwww8FQRCEqqoqYfXq1QIA4aGHHlL+TBw/ftzgtZNzY9BEzYqYoKlTp05CbW2tcvvy5csFAMLJkycFQRCExsZGITExUUhLSxMaGxuV+9XU1Ajx8fHCvffea7ANy5YtEwAIX375pXJbdXW10L59e7Ugw5TzKIKMBx54QO1czz33nABA+Yu6qKhI8PT0FN566y21/U6ePCm0aNFCbfvQoUMFAMoHtCAIQm1trRAVFSU8/PDDdrmep59+Wq2dDz30kBAWFqZ8nZ+fL3h4eAgPPfSQ0NDQoLav4hw3btwQQkJChGeeeUbt/eLiYiE4OFhru6YXXnhBACDs3btXue3GjRtCfHy8EBcXp3ZeAML06dMNHk8Q7gRNfn5+wsWLF5XbDx48KAAQZs+erdw2YcIEAYDw8ssvax1H88H/yy+/CACEWbNmae2ruB+m/Azokp6eLnh7ewtnz55Vbrt8+bIQGBgoDBkyRLlNETQNHz5c7fs9e/ZswdPTU6ioqDB4ni5duqgFV7179xYeeeQRAYDw66+/CoIgCF999ZXZP9+a9+6///2vAEBYtmyZcltDQ4PyDwXNoAmA8Prrr6udR/EHl0JpaakAQHjttdcMXiu5Dg7PEWmYNGkSvL29la9TUlIANA3RAEBOTg7y8/Mxbtw4lJWV4dq1a7h27Rqqq6txzz33YM+ePQYTTL/77ju0atUKf/7zn5XbpFIppk6dqrafOeeZPn262uuZM2cqzwkAX331FRobG/Hoo48qj3ft2jVERUUhMTFRaxZhQECAWv6Xt7c3+vXrp7wXtr6eadOmqb1OSUlBWVkZ5HI5AGDr1q1obGzEq6++Cg8P9V9niuGxHTt2oKKiAmPHjlW7Zk9PT/Tv39/ozMnvvvsO/fr1w+DBg9Xuy9SpU1FUVITTp08b/Lwh6enpaN26tfJ1v3790L9/f+X3S9Wzzz5r9HiKIazXXntN6z3F/TD1Z0BVQ0MDfvrpJ6SnpyMhIUG5vVWrVhg3bhz27dun/N4oTJ06VW2oMiUlBQ0NDTh37pzBa0lJScHevXsBADdu3MDx48cxdepUhIeHK7fv3bsXISEh6Nq1q8XX9sMPP8DLywvPPPOMcpuHh4fWvylVun4+Vf9tkPthIjiRhrZt26q9lslkAIDy8nIAQH5+PgBgwoQJeo9RWVmp/Jymc+fOoX379lo5Lx07dlR7bc55EhMT1d5v164dPDw8UFRUpDymIAha+yloJse2adNGq50ymUwt18SW12PoexEUFISzZ8/Cw8MDnTt31ntMxXnvvvtune8HBQXp/SzQdH39+/fX2t6pUyfl+4qHtql0fR86dOiAL7/8Um1bixYt0KZNG6PHO3v2LKKjoxEaGqp3H1N/BlSVlpaipqZG63sLNN2PxsZGXLhwAV26dFFuN/bvSZ+UlBSsWbMGZ86cwdmzZyGRSJCcnKwMpp555hns3bsXgwYNUgbMllzbuXPn0KpVK0ilUrXt7du317m/Il9M89qMXRe5NgZNRBo8PT11bhcEAQCUvSFLlixBz549de5rjcJ31jiPZiDT2NgIiUSC77//Xud1ah7P2L0whTnXY43zK8772WefISoqSuv9Fi2c/9egj4+PVk+auUz9GbCUud9DRc/enj17UFBQgN69e8Pf3x8pKSn44IMPUFVVhWPHjuGtt95Sfsae16bvusi9Of9vCyIn065dOwBNPRTDhw83+fOxsbHIzc2FIAhqQU1eXp7F58nPz0d8fLzy9ZkzZ9DY2KicJdSuXTsIgoD4+Hh06NDB5LbrYsvrMaZ
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df.plot(kind=\"scatter\", x=\"carat\", y=\"price\")\n",
"\n",
"plt.xlabel(\"weight\") \n",
"plt.ylabel(\"price\")\n",
"plt.title(\"The dependence of price on weight\") \n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"7. Круговая диаграмма (pie). Прозрачность"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAgcAAAEqCAYAAACBe/NOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACQkklEQVR4nOzddVgV2RvA8e+lG0VBQBQQA1sUEwUbu8VaC2tt7LVrdXXV1bU71+7uwu5uVFQUBJFuuPP7g59390oIClyE83keHmXizDvDjXfOnJBJkiQhCIIgCILwf2qqDkAQBEEQhOxFJAeCIAiCICgRyYEgCIIgCEpEciAIgiAIghKRHAiCIAiCoEQkB4IgCIIgKBHJgSAIgiAISkRyIAiCIAiCEpEcCIIgCIKgJNOTgylTpiCTyTL7MADUrl2b2rVrK34/d+4cMpmMXbt2Zcnxe/TogY2NTZYc63uFh4fTu3dvzM3NkclkeHh4qCQOGxsbevToofj9y9/q3LlzKolHyN28vb2RyWSsX79e1aH8lMLDwzEzM2Pz5s2qDiXXSu41/Ntvv1G1atXvKi9dycH69euRyWSKHx0dHSwtLXF1dWXhwoWEhYV9VxBf+/DhA1OmTOHu3bsZUl5Gys6xpcXMmTNZv349/fv3Z9OmTXTt2jXFbW1sbBR/azU1NfLkyUPZsmXp27cv165dy8Kov9/du3f55ZdfKFSoENra2piYmFC/fn3WrVtHQkJCphxz6dKlOf5LRhXXNb1mzpzJvn37VHZ8uVzOxo0bqVq1KiYmJhgaGlK8eHG6devG1atXFds9fvyYKVOm4O3trbJYf9Tff/+NoaEhHTt2THb96NGjkclkdOjQIYsjy36y8vPBw8ODe/fuceDAgfTvLKXDunXrJECaNm2atGnTJmnt2rXSzJkzpYYNG0oymUyytraW7t27p7RPXFycFBUVlZ7DSDdu3JAAad26denaLyYmRoqJiVH8fvbsWQmQdu7cma5yvje22NhYKTo6OsOOlRmqVq0qOTk5pWlba2trqUKFCtKmTZukTZs2SUuXLpUGDx4smZubS4A0bNiw744jOjpaio2NVfz+5W919uzZ7y7za6tWrZLU1dUlS0tLacyYMdLq1aul+fPnS82aNZNkMpk0Y8aMDDvWf5UuXVpycXHJlLKzA1Vd1/TS19eXunfvnqZt5XK5FBUVJcXHx2fY8QcOHCgBUsuWLaW///5bWrJkiTRkyBCpWLFi0uTJkxXb7dy5M8Nf+1kpNjZWMjU1lWbOnJnserlcLllZWUk2NjaSrq6uFBoamsURZi+Z9fnw+vXrZL+b3NzcpFq1aqW7PI3vyUYaN26Mo6Oj4vexY8dy5swZmjVrRosWLXjy5Am6uroAaGhooKHxXYdJs8jISPT09NDS0srU43yLpqamSo+fFv7+/pQqVSrN2xcsWJBffvlFadns2bPp3Lkz8+fPp1ixYvTv3z/dcWhra6d7n/S4evUqv/76K9WrV+fIkSMYGhoq1nl4eHDz5k0ePnyYqTH8rL68n5KTU6/rl5rQjPLx40eWLl1Knz59WLlypdK6BQsWEBAQkGHHSklqf8eMdOjQIQICAnBzc0t2/blz5/Dx8eHMmTO4urqyZ88eunfvnulxJSc6OhotLS3U1HJPczs3Nzfat2/Pq1evKFKkSNp3TE8m8aXm4MaNG8munzlzpgRIK1euVCybPHmy9PVhTpw4ITk5OUnGxsaSvr6+VLx4cWns2LGSJP17B/n1z5dsyMXFRSpdurR08+ZNqVatWpKurq40dOhQxbr/ZmRfytq2bZs0duxYqUCBApKenp7UvHlz6e3bt0oxWVtbJ3uX8d8yvxVb9+7dJWtra6X9w8PDpeHDh0tWVlaSlpaWVLx4cWnOnDmSXC5X2g6QBg4cKO3du1cqXbq0pKWlJZUqVUo6evRostf6ax8/fpTc3d0lMzMzSVtbWypXrpy0fv36JNfi65/Xr1+nWKa1tbXUtGnTZNeFhYVJJiYmUsGCBZXOZc6cOVL16tUlExMTSUdHR6pYsWKyNTdfX++vaw4mTZokaWhoSP7+/kn27dOnj2RsbJxqjVSjRo0kDQ0N6c2bNyluk9Kxv0guE/f19ZV69OghFSxYUNLS0pLMzc2lFi1aKK6jtbV1kmv839fky5cvpXbt2kl58+aVdHV1papVq0qHDh1KNp7t27dLU6ZMkSwtLSUDAwOpbdu2UnBwsBQdHS0NHTpUMjU1lfT19aUePXokW2O1adMmqWLFipKOjo6UN29eqUOHDkle96m9n370ukpS2l7/Kd3xSFLi++K/d9lfPk9evHghde/eXTI2NpaMjIykHj16SBEREUr7ff2TWi1CcjF0795d0tfXl3x8fKSWLVtK+vr6Uv78+aURI0Z8s4bhypUrEqD0HkzOl8/Ur3/++1pcsmSJVKpUKUlLS0uysLCQBgwYIAUFBSmVk9rfMTo6Wpo0aZJkZ2cnaWlpSVZWVtKoUaOSvGZS+1xOTbdu3SQbG5sU1/fq1UsqVaqUJEmS1LhxY6lBgwbJbuft7S01b95c0tPTk0xNTSUPDw/p2LFjyb43Fy9eLNna2ko6OjpS5cqVJU9PzxQ//7du3SqNHz9esrS0lGQymeLaXb16VXJ1dZWMjIwkXV1dydnZWbp48WKSuM6ePStVqlRJ0tbWlooUKSItX7482e+1tWvXSnXq1JFMTU0lLS0tqWTJktLSpUuVtvnW50NQUJA0dOhQxfvFzs5OmjVrlpSQkKBUTlBQkNS9e3fJyMhIMjY2lrp16ybduXMn2fdRcHCwJJPJpL/++ivZ656SDL2l79q1K+PGjePEiRP06dMn2W0ePXpEs2bNKFeuHNOmTUNbWxsvLy8uXboEQMmSJZk2bRqTJk2ib9++1KpVC4AaNWooyggMDKRx48Z07NiRX375hQIFCqQa14wZM5DJZIwZMwZ/f38WLFhA/fr1uXv3rqKGIy3SEtt/SZJEixYtOHv2LL169aJChQocP36cUaNG8f79e+bPn6+0/cWLF9mzZw8DBgzA0NCQhQsX0rZtW96+fUu+fPlSjCsqKoratWvj5eXFoEGDsLW1ZefOnfTo0YPg4GCGDh1KyZIl2bRpE8OGDcPKyooRI0YAYGpqmubz/y8DAwNat27NmjVrePz4MaVLlwYSnz22aNGCLl26EBsby7Zt22jfvj2HDh2iadOmaS6/a9euTJs2je3btzNo0CDF8tjYWHbt2kXbtm1TvNOLjIzk9OnTODs7U7hw4e86v5S0bduWR48eMXjwYGxsbPD39+fkyZO8ffsWGxsbFixYwODBgzEwMGD8+PEAitfnx48fqVGjBpGRkQwZMoR8+fKxYcMGWrRowa5du2jdurXSsf744w90dXX57bff8PLyYtGiRWhqaqKmpkZQUBBTpkzh6tWrrF+/HltbWyZNmqTYd8aMGUycOBE3Nzd69+5NQEAAixYtwtnZmTt37pAnTx7Ftml9P6X3uqb39Z8ebm5u2Nra8scff3D79m1Wr16NmZkZs2fPBmDTpk307t2bKlWq0LdvXwDs7OzSfZyEhARcXV2pWrUqc+fO5dSpU8ybNw87O7tUa8ysra0B2LlzJ+3bt0/xDt7Z2ZkhQ4awcOFCxo0bR8mSJQEU/06ZMoWpU6dSv359+vfvz7Nnz1i2bBk3btzg0qVLSrWVyf0d5XI5LVq04OLFi/Tt25eSJUvy4MED5s+fz/PnzxVtMr71uZyay5cvU7FixWTXxcTEsHv3bsXnTadOnejZsyd+fn6Ym5srtouIiKBu3br4+voydOhQzM3N2bJlC2fPnk1S5rJlyxg0aBC1atVi2LBheHt706pVK/LmzYuVlVWS7adPn46WlhYjR44kJiYGLS0tzpw5Q+PGjalUqRKTJ09GTU2NdevWUbduXS5cuECVKlUAuHPnDo0aNcLCwoKpU6eSkJDAtGnTkv3cXLZsGaVLl6ZFixZoaGhw8OBBBgwYgFwuZ+DAgQCpfj5ERkbi4uLC+/fv6devH4ULF+by5cuMHTsWX19fFixYACS+r1q2bMnFixf59ddfKVm
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Определение порога для объединения редких значений\n",
"threshold = 0.02 # Порог 2%\n",
"\n",
"# Подсчёт количества уникальных значений и расчёт частот\n",
"value_counts = df[\"clarity\"].value_counts()\n",
"total_count = value_counts.sum()\n",
"\n",
"# Условие для агрегации значений ниже порога\n",
"other_values = value_counts[value_counts / total_count < threshold].sum()\n",
"main_values = value_counts[value_counts / total_count >= threshold]\n",
"\n",
"# Добавление категории \"Other\"\n",
"main_values[\"Other\"] = other_values\n",
"\n",
"# Построение диаграммы\n",
"main_values.plot(kind=\"pie\", \n",
" autopct='%1.1f%%', # Проценты\n",
" startangle=90, # Начальный угол\n",
" counterclock=False, # По часовой стрелке\n",
" cmap=\"Set3\", # Цветовая схема\n",
" wedgeprops={'edgecolor': 'black'}) # Границы сегментов\n",
"\n",
"plt.title(\"Distribution of Daily Customer Count in Stores (Aggregated)\")\n",
"plt.subplots_adjust(left=0.3, right=0.7, top=0.9, bottom=0.1)\n",
"plt.show()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}