639 lines
152 KiB
Plaintext
639 lines
152 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Лабораторная работа №3\n",
|
|||
|
"\n",
|
|||
|
"## Набор данных Students Performance in Exams (Успеваемость студентов на экзаменах)\n",
|
|||
|
"\n",
|
|||
|
"Выгрузка данных из CSV файла в датафрейм"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Index(['gender', 'race/ethnicity', 'parental level of education', 'lunch',\n",
|
|||
|
" 'test preparation course', 'math score', 'reading score',\n",
|
|||
|
" 'writing score'],\n",
|
|||
|
" dtype='object')\n",
|
|||
|
"\n",
|
|||
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|||
|
"RangeIndex: 1000 entries, 0 to 999\n",
|
|||
|
"Data columns (total 8 columns):\n",
|
|||
|
" # Column Non-Null Count Dtype \n",
|
|||
|
"--- ------ -------------- ----- \n",
|
|||
|
" 0 gender 1000 non-null object\n",
|
|||
|
" 1 race/ethnicity 1000 non-null object\n",
|
|||
|
" 2 parental level of education 1000 non-null object\n",
|
|||
|
" 3 lunch 1000 non-null object\n",
|
|||
|
" 4 test preparation course 1000 non-null object\n",
|
|||
|
" 5 math score 1000 non-null int64 \n",
|
|||
|
" 6 reading score 1000 non-null int64 \n",
|
|||
|
" 7 writing score 1000 non-null int64 \n",
|
|||
|
"dtypes: int64(3), object(5)\n",
|
|||
|
"memory usage: 62.6+ KB\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"\n",
|
|||
|
"# Загрузка данных\n",
|
|||
|
"df = pd.read_csv(\"..//..//static//csv//StudentsPerformance.csv\")\n",
|
|||
|
"\n",
|
|||
|
"# Вывод колонок\n",
|
|||
|
"print(df.columns)\n",
|
|||
|
"\n",
|
|||
|
"print()\n",
|
|||
|
"\n",
|
|||
|
"df.info()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Описание набора \n",
|
|||
|
"\n",
|
|||
|
"Контекст\n",
|
|||
|
"Оценки, полученные студентами\n",
|
|||
|
"\n",
|
|||
|
"Содержание\n",
|
|||
|
"Этот набор данных состоит из оценок, полученных учащимися по различным предметам.\n",
|
|||
|
"\n",
|
|||
|
"Вдохновение\n",
|
|||
|
"Понять влияние происхождения родителей, подготовки к тестированию и т. д. на успеваемость учащихся."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 10,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>gender</th>\n",
|
|||
|
" <th>race/ethnicity</th>\n",
|
|||
|
" <th>parental level of education</th>\n",
|
|||
|
" <th>lunch</th>\n",
|
|||
|
" <th>test preparation course</th>\n",
|
|||
|
" <th>math score</th>\n",
|
|||
|
" <th>reading score</th>\n",
|
|||
|
" <th>writing score</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>group B</td>\n",
|
|||
|
" <td>bachelor's degree</td>\n",
|
|||
|
" <td>standard</td>\n",
|
|||
|
" <td>none</td>\n",
|
|||
|
" <td>72</td>\n",
|
|||
|
" <td>72</td>\n",
|
|||
|
" <td>74</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>group C</td>\n",
|
|||
|
" <td>some college</td>\n",
|
|||
|
" <td>standard</td>\n",
|
|||
|
" <td>completed</td>\n",
|
|||
|
" <td>69</td>\n",
|
|||
|
" <td>90</td>\n",
|
|||
|
" <td>88</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>group B</td>\n",
|
|||
|
" <td>master's degree</td>\n",
|
|||
|
" <td>standard</td>\n",
|
|||
|
" <td>none</td>\n",
|
|||
|
" <td>90</td>\n",
|
|||
|
" <td>95</td>\n",
|
|||
|
" <td>93</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>group A</td>\n",
|
|||
|
" <td>associate's degree</td>\n",
|
|||
|
" <td>free/reduced</td>\n",
|
|||
|
" <td>none</td>\n",
|
|||
|
" <td>47</td>\n",
|
|||
|
" <td>57</td>\n",
|
|||
|
" <td>44</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>group C</td>\n",
|
|||
|
" <td>some college</td>\n",
|
|||
|
" <td>standard</td>\n",
|
|||
|
" <td>none</td>\n",
|
|||
|
" <td>76</td>\n",
|
|||
|
" <td>78</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" gender race/ethnicity parental level of education lunch \\\n",
|
|||
|
"0 female group B bachelor's degree standard \n",
|
|||
|
"1 female group C some college standard \n",
|
|||
|
"2 female group B master's degree standard \n",
|
|||
|
"3 male group A associate's degree free/reduced \n",
|
|||
|
"4 male group C some college standard \n",
|
|||
|
"\n",
|
|||
|
" test preparation course math score reading score writing score \n",
|
|||
|
"0 none 72 72 74 \n",
|
|||
|
"1 completed 69 90 88 \n",
|
|||
|
"2 none 90 95 93 \n",
|
|||
|
"3 none 47 57 44 \n",
|
|||
|
"4 none 76 78 75 "
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 10,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Вывод столбцов\n",
|
|||
|
"df.head()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>math score</th>\n",
|
|||
|
" <th>reading score</th>\n",
|
|||
|
" <th>writing score</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>count</th>\n",
|
|||
|
" <td>1000.00000</td>\n",
|
|||
|
" <td>1000.000000</td>\n",
|
|||
|
" <td>1000.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>mean</th>\n",
|
|||
|
" <td>66.08900</td>\n",
|
|||
|
" <td>69.169000</td>\n",
|
|||
|
" <td>68.054000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>std</th>\n",
|
|||
|
" <td>15.16308</td>\n",
|
|||
|
" <td>14.600192</td>\n",
|
|||
|
" <td>15.195657</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>min</th>\n",
|
|||
|
" <td>0.00000</td>\n",
|
|||
|
" <td>17.000000</td>\n",
|
|||
|
" <td>10.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>25%</th>\n",
|
|||
|
" <td>57.00000</td>\n",
|
|||
|
" <td>59.000000</td>\n",
|
|||
|
" <td>57.750000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>50%</th>\n",
|
|||
|
" <td>66.00000</td>\n",
|
|||
|
" <td>70.000000</td>\n",
|
|||
|
" <td>69.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>75%</th>\n",
|
|||
|
" <td>77.00000</td>\n",
|
|||
|
" <td>79.000000</td>\n",
|
|||
|
" <td>79.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>max</th>\n",
|
|||
|
" <td>100.00000</td>\n",
|
|||
|
" <td>100.000000</td>\n",
|
|||
|
" <td>100.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" math score reading score writing score\n",
|
|||
|
"count 1000.00000 1000.000000 1000.000000\n",
|
|||
|
"mean 66.08900 69.169000 68.054000\n",
|
|||
|
"std 15.16308 14.600192 15.195657\n",
|
|||
|
"min 0.00000 17.000000 10.000000\n",
|
|||
|
"25% 57.00000 59.000000 57.750000\n",
|
|||
|
"50% 66.00000 70.000000 69.000000\n",
|
|||
|
"75% 77.00000 79.000000 79.000000\n",
|
|||
|
"max 100.00000 100.000000 100.000000"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Краткая статистическая сводка для данных:\n",
|
|||
|
"df.describe()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Анализ содержимого\n",
|
|||
|
"\n",
|
|||
|
"*Объекты наблюдения:* студенты, участвующие в экзаменах.\n",
|
|||
|
"\n",
|
|||
|
"*Атрибуты объектов:* \n",
|
|||
|
"\n",
|
|||
|
"gender — пол: определяет гендерную принадлежность студента (мужской, женский). \n",
|
|||
|
"race/ethnicity — этническая принадлежность: группа, к которой относится студент (например, различные расовые/этнические категории). \n",
|
|||
|
"parental level of education — уровень образования родителей(например, среднее образование, высшее образование и т.д.). \n",
|
|||
|
"lunch — тип обеда: информация о том, получает ли студент бесплатный или платный обед. \n",
|
|||
|
"test preparation course — курс подготовки к тесту\n",
|
|||
|
"math score — результаты экзаменов по математике.\n",
|
|||
|
"reading score — результаты экзаменов по чтению.\n",
|
|||
|
"writing score — результаты экзаменов по письму.\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"### Бизнес-цель\n",
|
|||
|
"1. Анализ факторов, влияющих на успеваемость студентов:\n",
|
|||
|
"\n",
|
|||
|
" **Цель:** Исследовать, как различные факторы, такие как пол, этническая принадлежность, уровень образования родителей, тип обеда и наличие курса подготовки к тесту, влияют на оценки студентов по математике, чтению и письму.\n",
|
|||
|
"\n",
|
|||
|
" **Эффект:** Это поможет образовательным учреждениям и политикам лучше понять, какие аспекты могут быть улучшены для повышения успеваемости студентов, а также выявить возможные неравенства в образовательных возможностях.\n",
|
|||
|
"\n",
|
|||
|
"2. Прогнозирование успеваемости студентов\n",
|
|||
|
"\n",
|
|||
|
" **Цель:** Разработать модель прогнозирования успеваемости студентов на основе имеющихся данных, таких как пол, раса/этническая принадлежность, уровень образования родителей, тип обеда и участие в подготовительных курсах.\n",
|
|||
|
"\n",
|
|||
|
" **Эффект:** Это позволит предсказать, какие студенты могут столкнуться с трудностями в обучении, и принять меры для их поддержки. Например, образовательные учреждения могут инициировать дополнительные занятия или индивидуальные консультации для студентов, у которых ожидаются низкие результаты на экзаменах, тем самым повышая их шансы на успешную сдачу экзаменов.\n",
|
|||
|
"\n",
|
|||
|
"### Техническая цель\n",
|
|||
|
"1. Разработка системы анализа факторов успеваемости студентов:\n",
|
|||
|
"\n",
|
|||
|
" **Цель:** Создать аналитическую платформу, которая будет собирать, обрабатывать и визуализировать данные о студентах, включая их оценки и соответствующие факторы (пол, этническая принадлежность, уровень образования родителей, тип обеда, наличие подготовительных курсов).\n",
|
|||
|
"\n",
|
|||
|
"2. Создание модели прогнозирования успеваемости студентов:\n",
|
|||
|
"\n",
|
|||
|
" **Цель:** Разработать и внедрить предсказательную модель, которая будет оценивать вероятную успеваемость студентов на основании их характеристик и данных.\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Анализ данных "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 12,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"gender 0\n",
|
|||
|
"race/ethnicity 0\n",
|
|||
|
"parental level of education 0\n",
|
|||
|
"lunch 0\n",
|
|||
|
"test preparation course 0\n",
|
|||
|
"math score 0\n",
|
|||
|
"reading score 0\n",
|
|||
|
"writing score 0\n",
|
|||
|
"dtype: int64"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 12,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Проверка на пропущенные данные\n",
|
|||
|
"df.isnull().sum()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Нет пропущенных данных"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"gender — пол: определяет гендерную принадлежность студента (мужской, женский). \n",
|
|||
|
"race/ethnicity — этническая принадлежность: группа, к которой относится студент (например, различные расовые/этнические категории). \n",
|
|||
|
"parental level of education — уровень образования родителей(например, среднее образование, высшее образование и т.д.). \n",
|
|||
|
"lunch — тип обеда: информация о том, получает ли студент бесплатный или платный обед. \n",
|
|||
|
"test preparation course — курс подготовки к тесту\n",
|
|||
|
"math score — результаты экзаменов по математике.\n",
|
|||
|
"reading score — результаты экзаменов по чтению.\n",
|
|||
|
"writing score — результаты экзаменов по письму."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 18,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA9wAAAGGCAYAAACJ2omlAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADfG0lEQVR4nOzdd1gU19cH8O8usMsCu0sREBQBsYAVxYoiduwtlqhRUFFj70ZjImCJP3uLGksUa2KMLfYWjSW2qNhBRCyxISogICDsef/w3QnjUhVcyvk8D4/uzJ07Z2Znzs6dckdCRATGGGOMMcYYY4zlKam+A2CMMcYYY4wxxooibnAzxhhjjDHGGGP5gBvcjDHGGGOMMcZYPuAGN2OMMcYYY4wxlg+4wc0YY4wxxhhjjOUDbnAzxhhjjDHGGGP5gBvcjDHGGGOMMcZYPuAGN2OMMcYYY4wxlg+4wc0YY4wxxhhjjOUDbnAXARKJBIGBgfoOg6XD3wljGeN9o3gIDAyERCJBdHR0ntUZHBwMiUSC+/fv52o6Pz8/ODk55VkcTL84hxRvOd2fGzdujMaNG+d7PDmRH7HMnTsXZcuWhYGBAdzd3fO07vS0ubwgKww5nhvcWdD+uKf/s7GxQZMmTXDgwAF9h/fJbt26hcDAwFwfvDDGCh7OV+xT7NmzB1KpFM+ePcvVdD/88AN27dqVP0Hlk8TERAQGBuLEiRP6DqVA4RzC8ht/B3nj8OHDmDhxIho0aIB169bhhx9+0HdI+e7JkycIDAxESEiIvkP5KIb6DqAwmDZtGpydnUFEeP78OYKDg9GmTRvs2bMH7dq103d4H+3WrVsICgpC48aNC/yZIcZYznC+Yh9j37598PDwQMmSJXM13Q8//ICuXbuiU6dO+RNYOn369MGXX34JuVyeq+lWr14NjUYjfE5MTERQUBAAFJgrYAUJ5xCWX/g7yBt//vknpFIpfv75Z8hkMn2H81k8efIEQUFBcHJy0rmi/2GOL4i4wZ0DrVu3Rq1atYTPAwYMgK2tLX755ZdC/ePDCoekpCTIZDJIpXxDCsse56vCpaDs3/v370f//v31GkN2DAwMYGBgkOvpjIyM8iGaootzCEtISICpqam+w2CZiIqKgkKhKDaN7ewUhhzPR/AfwdzcHAqFAoaG4vMVCQkJGDduHBwcHCCXy1GxYkXMmzcPRAQAePv2LVxdXeHq6oq3b98K07169Qp2dnbw9PREWloagPfPI5iZmeHevXvw8fGBqakp7O3tMW3aNKG+rFy5cgWtW7eGSqWCmZkZmjVrhnPnzgnjg4OD0a1bNwBAkyZNhNvHcnKLnZOTk85tZxKJBMHBwTpltc9+fPjn5+cnKnf8+HF4eXnBwsJCVG748OHZxvPgwQN06NABpqamsLGxwZgxY3Do0KEMl+f8+fNo1aoV1Go1TExM4O3tjTNnzmQY8927d+Hn5wdzc3Oo1Wr069cPiYmJorLJyckYM2YMrK2toVQq0aFDB/z7778Zxvn48WP0798ftra2kMvlqFy5MtauXSsqc+LECUgkEvz666/47rvvUKpUKZiYmCAuLi7b9cBYRjhfFZx8ldX+/erVK4wfPx5Vq1aFmZkZVCoVWrdujatXr+rUk5SUhMDAQFSoUAHGxsaws7NDly5dEBERIZTRaDRYtGgRKleuDGNjY9ja2mLw4MF4/fq1Tn3Xr1/Ho0eP0LZtW2FYcnIyAgICUK5cOcjlcjg4OGDixIlITk4WykgkEiQkJGD9+vWZrquYmJhs86h23e3atQtVqlQR8uPBgwdF5TJ7hvvAgQPw9vaGUqmESqVC7dq1sWXLFmF8+uf77t+/D2trawBAUFCQEHdgYCDWrVsHiUSCK1eu6KyjH374AQYGBnj8+LHOuKKOc0jByyFbt27Ft99+i5IlS8LU1BQdOnTAo0ePRGVPnTqFbt26oUyZMsI+PGbMGNF3Afy37iMiItCmTRsolUr07t0bQM7ziJOTE9q1a4fTp0+jTp06MDY2RtmyZbFhwwahTHbfwe7du9G2bVvY29tDLpfDxcUF06dPF7aRvJCTvFalShU0adJEZ1qNRoNSpUqha9euomE5zbM5kZqaiunTp8PFxQVyuRxOTk749ttvdfLuunXrkJCQkOW2mF5Ojn0B4PTp06hduzaMjY3h4uKClStX6pS5f/9+pvPMqH+Fx48fY8CAAcL36uzsjCFDhiAlJQUAcvTbd+LECdSuXRsA0K9fP53lzugZ7uzyU/qYc/L786n4CncOxMbGIjo6GkSEqKgoLF26FPHx8fjqq6+EMkSEDh064Pjx4xgwYADc3d1x6NAhTJgwAY8fP8bChQuhUCiwfv16NGjQAFOmTMGCBQsAAMOGDUNsbCyCg4NFZ+/T0tLQqlUr1KtXD3PmzMHBgwcREBCA1NRUTJs2LdN4b968CS8vL6hUKkycOBFGRkZYuXIlGjdujL/++gt169ZFo0aNMHLkSCxZsgTffvst3NzcAED4Nzvu7u4YN24cACAyMhJTp07NsvzGjRuF/48ZM0Y0LjIyEm3btoWdnR2mTp0qHAz16dMn2zgSEhLQtGlTPH36FKNGjULJkiWxZcsWHD9+XKfsn3/+idatW8PDwwMBAQGQSqVYt24dmjZtilOnTqFOnTqi8t27d4ezszNmzZqFy5cvY82aNbCxscHs2bOFMv7+/ti0aRN69eoFT09P/Pnnn6KDVq3nz5+jXr16wo5tbW2NAwcOYMCAAYiLi8Po0aNF5adPnw6ZTIbx48cjOTmZz2KyHON8paug5CutjPbvW7duYdeuXejWrRucnZ3x/PlzrFy5Et7e3rh16xbs7e0BvF/P7dq1w7Fjx/Dll19i1KhRePPmDY4cOYIbN27AxcUFADB48GAEBwejX79+GDlyJCIjI/Hjjz/iypUrOHPmjOiKwP79+2FjYyNc1dRoNOjQoQNOnz6NQYMGwc3NDdevX8fChQtx584d4ZntjRs3wt/fH3Xq1MGgQYMAQJi/Vk7yKPD+QG/Hjh0YOnQolEollixZgi+++AIPHz6ElZVVpusyODgY/fv3R+XKlTF58mSYm5vjypUrOHjwIHr16qVT3traGitWrMCQIUPQuXNndOnSBQBQrVo1ODs7Y9iwYdi8eTNq1Kghmm7z5s1o3LgxSpUqle33W9hxDtFV0HLIzJkzIZFI8M033yAqKgqLFi1C8+bNERISAoVCAQDYtm0bEhMTMWTIEFhZWeHChQtYunQp/v33X2zbtk1UX2pqKnx8fNCwYUPMmzcPJiYmAHKXR+7evYuuXbtiwIAB8PX1xdq1a+Hn5wcPDw9Urlw52+8gODgYZmZmGDt2LMzMzPDnn39i6tSpiIuLw9y5c3O8bjKT07zWo0cPBAYG4tmzZ6JHbE6fPo0nT57gyy+/FIblZv3khL+/P9avX4+uXbti3LhxOH/+PGbNmoXbt29j586dAN5vW6tWrcKFCxewZs0aAICnp2emdeb02Pf69eto2bIlrK2tERgYiNTUVAQEBMDW1jZXy5DekydPUKdOHcTExGDQoEFwdXXF48eP8fvvvyMxMREymQz37t3L9rfPzc0N06ZNw9SpUzFo0CB4eXlludw5yU/pfezvT64Qy9S6desIgM6fXC6n4OBgUdldu3YRAJoxY4ZoeNeuXUkikdDdu3eFYZMnTyapVEonT56kbdu2EQBatGiRaDpfX18CQCNGjBCGaTQaatu2LclkMnrx4oUwHAAFBAQInzt16kQymYwiIiKEYU+ePCGlUkmNGjUShmnnffz48VytF3t7e2rXrp3w+eLFiwSA1q1bp1N2ypQpJJFIRMMcHR3J19dX+Lxy5UoCQGfPnhWVA0DDhg3LMpb58+cTANq1a5cw7O3bt+Tq6ipaNo1GQ+XLlycfHx/SaDRC2cTERHJ2dqYWLVoIwwICAggA9e/fXzSvzp07k5WVlfA5JCSEANDQoUNF5Xr16qXznQwYMIDs7OwoOjpaVPbLL78ktVpNiYmJRER0/PhxAkBly5YVhjGWE5yvMlaQ8lVW+3dSUhKlpaWJhkVGRpJcLqdp06YJw9a
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x1000 with 8 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Построим графики boxplot для обнаружения выбросов по каждой характеристике\n",
|
|||
|
"plt.figure(figsize=(10, 10))\n",
|
|||
|
"\n",
|
|||
|
"# Создание boxplot\n",
|
|||
|
"for i, column in enumerate(['gender', 'race/ethnicity','parental level of education','lunch','test preparation course','math score','reading score','writing score'], 1):\n",
|
|||
|
" plt.subplot(8, 3, i)\n",
|
|||
|
" sns.boxplot(x=df[column])\n",
|
|||
|
" plt.title(f\"Boxplot для {column}\")\n",
|
|||
|
" \n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Попробуем решить устранить проблему выбросов для writing score и reading score и math score. Используется метод усреднения данных для устранения выбросов. "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": []
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Связи между объектами:\n",
|
|||
|
"1) Влияние атрибутов на успеваемость: Анализ данных покажет, как каждый из атрибутов (пол, этническая принадлежность, уровень образования родителей, тип обеда, курс подготовки) влияет на оценки студентов, что поможет выявить ключевые факторы, способствующие или препятствующие успеваемости."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": []
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"2) Корреляция между результатами экзаменов: Можно исследовать взаимосвязь между оценками по математике, чтению и письму, чтобы понять, например, влияет ли высокая успеваемость в одном предмете на другие."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": []
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"3) Группировка по характеристикам: Студенты могут быть сгруппированы по различным признакам (например, пол, уровень образования родителей) для анализа различий в успеваемости и выявления возможных неравенств в образовательных возможностях."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": []
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"4) Предсказательные связи: Используя модель прогнозирования успеваемости, можно будет установить, какие комбинации атрибутов (например, пол + уровень образования родителей + участие в курсах подготовки) наиболее предрасполагают к высокому или низкому результату, что позволит образовательным учреждениям эффективно направлять ресурсы на поддержку студентов с высоким риском неуспеха."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Данная гистограмма в диапазоне с 10 по 51 строки отображает:\n",
|
|||
|
"На оси X значения оценок по математике, разбитые на 100 интервалов.\n",
|
|||
|
"На оси Y будет указано количество записей (частота) в каждом из этих интервалов. \n",
|
|||
|
"Анализируя гистограмму \"math score\", можно сделать выводы о том, как распределяются оценки.\n",
|
|||
|
"Например, оценку 70 имеет 4 человека, а оценку 18 всего 1 человек из этого диапазона."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Axes: ylabel='Frequency'>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAj8AAAGdCAYAAAD9kBJPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAzXElEQVR4nO3de1RVdf7/8deRyxET8MpFRaHEWyreyg41aaPlheVX6rvMcSxMyTWNWDhkJVY65TQ401KzVXmZVGocIy0vTWVGKDYqpSioNI2XNFEDdDJBKNE4+/dHP8+3o2B4PHDg7Odjrb1W+7M/e+/3h02Ll/t89tkWwzAMAQAAmEQTTxcAAABQnwg/AADAVAg/AADAVAg/AADAVAg/AADAVAg/AADAVAg/AADAVAg/AADAVHw9XUB9s9vt+uabbxQYGCiLxeLpcgAAQC0YhqFz586pXbt2atLk+u7dmC78fPPNN4qIiPB0GQAAwAXHjx9Xhw4drusYpgs/gYGBkn764QUFBXm4GgAAUBtlZWWKiIhw/B2/HqYLP5c+6goKCiL8AADQyLhjygoTngEAgKkQfgAAgKkQfgAAgKmYbs4PAKDhMwxDP/74o6qqqjxdCuqRn5+ffHx86vw8hB8AQINy4cIFFRUV6fvvv/d0KahnFotFHTp0UPPmzev0PIQfAECDYbfbdfToUfn4+Khdu3by9/fnC2lNwjAMnT59WidOnFB0dHSd3gEi/AAAGowLFy7IbrcrIiJCzZo183Q5qGdt27bV119/rYsXL9Zp+GHCMwCgwbne1xegcaqvu3z8dgEAAFMh/AAAAFNpMHN+5s6dq9TUVCUnJ+ull16qsd+aNWv07LPP6uuvv1Z0dLT+8pe/aOTIkfVXKADAIyJnfFCv5/t6bly9nu9afP3114qKilJeXp769Onj6XIanQZx52fXrl1asmSJevfufdV+O3bs0Lhx45SYmKi8vDzFx8crPj5eBQUF9VQpAAD166GHHlJ8fLyny/AqHg8/5eXlGj9+vP72t7+pZcuWV+27cOFCDR8+XE888YS6d++uOXPmqF+/fnrllVfqqVoAAFAbFy5c8HQJNfJ4+ElKSlJcXJyGDh36i31zcnKu6Dds2DDl5OTUuE9lZaXKysqcFgAA3G3w4MF69NFHNW3aNLVs2VKhoaH629/+poqKCk2cOFGBgYHq3LmzNm7c6NinqqpKiYmJioqKUkBAgLp27aqFCxc6tv/xj3/UG2+8oQ0bNshischisSg7O9ux/ciRI7rrrrvUrFkzxcTEXPXvoWEY+uMf/6iOHTvKarWqXbt2euyxxxzbKysr9dRTTykiIkJWq1WdO3fWsmXLHNu3bt2qW2+9VVarVeHh4ZoxY4Z+/PFHp/FPnTpV06ZNU5s2bTRs2DBJUkFBgUaMGKHmzZsrNDRUDz74oP773/9e18/6enk0/GRkZGjPnj1KS0urVf/i4mKFhoY6tYWGhqq4uLjGfdLS0hQcHOxYIiIirqtmAGioImd8cMWC+vXGG2+oTZs22rlzpx599FH9/ve/15gxYxQbG6s9e/bonnvu0YMPPuj49mq73a4OHTpozZo1+ve//61Zs2Zp5syZWr16tSRp+vTpuv/++zV8+HAVFRWpqKhIsbGxjvM9/fTTmj59uvLz89WlSxeNGzfOKZD83LvvvqsFCxZoyZIlOnTokNavX69evXo5tickJOitt97Syy+/rC+//FJLlixxfNPyyZMnNXLkSN1yyy3au3evFi1apGXLlulPf/rTFeP39/fX9u3btXjxYp09e1a//vWv1bdvX+Xm5uqjjz5SSUmJ7r//frf+3K+VxyY8Hz9+XMnJycrMzFTTpk3r7DypqalKSUlxrJeVlRGAAAB1IiYmRs8884ykn/7+zJ07V23atNHkyZMlSbNmzdKiRYu0b98+3XbbbfLz89Nzzz3n2D8qKko5OTlavXq17r//fjVv3lwBAQGqrKxUWFjYFeebPn264uJ+mpj93HPP6eabb9bhw4fVrVu3K/oWFhYqLCxMQ4cOlZ+fnzp27Khbb71VknTw4EGtXr1amZmZjk9YbrzxRse+r732miIiIvTKK6/IYrGoW7du+uabb/TUU09p1qxZju9lio6O1l//+lfHfn/605/Ut29f/fnPf3a0LV++XBERETp48KC6dOni2g/6Onnszs/u3bt16tQp9evXT76+vvL19dXWrVv18ssvy9fXt9qX2YWFhamkpMSpraSkpNpfiEusVquCgoKcFgAA6sLPH9zx8fFR69atne6uXPr04tSpU462V199Vf3791fbtm3VvHlzLV26VIWFhdd8vvDw8CuO/XNjxozRDz/8oBtvvFGTJ0/WunXrHHeJ8vPz5ePjo0GDBlW775dffimbzeb0JYS33367ysvLdeLECUdb//79nfbbu3evtmzZoubNmzuWS8Hsq6++qtUY64LHws+QIUO0f/9+5efnO5YBAwZo/PjxjotwOZvNpqysLKe2zMxM2Wy2+iobAIAa+fn5Oa1bLBantkvhwW63S/pp+sf06dOVmJiojz/+WPn5+Zo4cWKtJwtf7diXi4iI0IEDB/Taa68pICBAU6ZM0Z133qmLFy8qICCg9oO8ihtuuMFpvby8XKNGjXL6W5+fn69Dhw7pzjvvdMs5XeGxj70CAwPVs2dPp7YbbrhBrVu3drQnJCSoffv2jjlBycnJGjRokObNm6e4uDhlZGQoNzdXS5curff6AQC4Xtu3b1dsbKymTJniaLv8joi/v3+1n4a4IiAgQKNGjdKoUaOUlJSkbt26af/+/erVq5fsdru2bt1a7QNI3bt317vvvivDMBwha/v27QoMDFSHDh1qPF+/fv307rvvKjIyUr6+DearBT3/tNfVFBYWqqioyLEeGxurVatWaenSpYqJidE777yj9evXXxGiAABoDKKjo5Wbm6tNmzbp4MGDevbZZ7Vr1y6nPpGRkdq3b58OHDig//73v7p48aJL50pPT9eyZctUUFCgI0eOaOXKlQoICFCnTp0UGRmpCRMmaNKkSVq/fr2OHj2q7Oxsx8TrKVOm6Pjx43r00Uf1n//8Rxs2bNDs2bOVkpJy1fewJSUl6cyZMxo3bpx27dqlr776Sps2bdLEiRPdFuhc0XBimOT0+F5169JPn1mOGTOmfgoCADQYDfkbl131u9/9Tnl5eRo7dqwsFovGjRunKVOmOD0OP3nyZGVnZ2vAgAEqLy/Xli1bFBkZec3natGihebOnauUlBRVVVWpV69e+uc//6nWrVtLkhYtWqSZM2dqypQp+vbbb9WxY0fNnDlTktS+fXt9+OGHeuKJJxQTE6NWrVopMTHRMbm7Ju3atdP27dv11FNP6Z577lFlZaU6deqk4cOHe/TltRbDMAyPnd0DysrKFBwcrNLSUiY/A/Aq1T3a3tgCw/nz53X06FFFRUXV6ZPAaJiudv3d+fe7QX/sBQAA4G6EHwAAYCqEHwAAYCqEHwAAYCqEHwBAg2OyZ3Hw/9XXdSf8AAAajEvfWHzpxZ8wl0vfbF3dWx7cqUF9zw8AwNx8fHzUokULx/upmjVr5vQ+KXgvu92u06dPq1mzZnX+bdCEHwBAg3LpZdU1vaAT3qtJkybq2LFjnQdewg8AoEGxWCwKDw9XSEiIy69yQOPk7+9fL9/8TPgBADRIPj4+dT73A+bEhGcAAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqhB8AAGAqHg0/ixYtUu/evRUUFKSgoCDZbDZt3Lixxv7p6emyWCxOS9OmTeuxYgAA0Nj5evLkHTp00Ny5cxUdHS3DMPTGG29o9OjRysvL080331ztPkFBQTpw4IBj3WKx1Fe5AADAC3g0/IwaNcpp/YUXXtCiRYv02Wef1Rh+LBaLwsLC6qM8AADghRrMnJ+qqiplZGSooqJCNputxn7
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"\n",
|
|||
|
"df.iloc[10:51].plot.hist(column=[\"math score\"], bins=100)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Данная гистограмма отображает прцоентное соотношение мужчин и женщин.\n",
|
|||
|
"Что позволяет сделать вывод о том, что женщин среди студентов больше, чем мужчин. "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 15,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAGFCAYAAAASI+9IAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAv6ElEQVR4nO3dd3hUZcL+8e/MpBcICYRQQjOELkUFCygIduzK2kCXxVUXfXXVdVd3ddVX1rI/677q7soCuqigK0VFXSsgFhQQKaFJCwmppJdJJjPz+2N0JIKUtGfOmftzXbniTGYm9yR47pzznPM8Dr/f70dERARwmg4gIiKhQ6UgIiJBKgUREQlSKYiISJBKQUREglQKIiISpFIQEZEglYKIiASpFEREJEilICIiQSoFEREJUimIiEiQSkFERIJUCiIiEqRSEBGRIJWCiIgEqRRERCRIpSAiIkEqBRERCVIpiIhIkEpBRESCVAoiIhKkUhARkSCVgoiIBKkUREQkSKUgIiJBKgUL27p1K507d+amm25qdP/SpUtxOByUlZUF73O73YwbN47hw4c3ul9EZH8RpgNI02VmZvLee+8xbtw4kpKSePjhhw/6uIaGBi6//HL27t3Lp59+SlJSUtsGFRHL0J6CxQ0fPpwlS5bwzDPP8Nhjjx3wdZ/Px7XXXsu3337LBx98QGpqKgB1dXXceeeddOvWjfj4eEaNGsXSpUuDz5szZ84B5bFr1y4cDgdr164FDr5HMmzYMO6///6ffZ2DPWfy5Mk4HA4WLVp00O/zg169evHUU08Fb+//nJ/6aQ4ROTLaU7CBU045hQULFnDBBRfQvn17+vXrF/za9OnT+eCDD1ixYgU9evQI3n/zzTeTlZXFvHnz6Nq1KwsXLuTss89m/fr19O3bt82yr169mjfffLPNvp+IHJr2FGyiffv2+Hw+brnlFubNmwfA3XffzcyZM/F6vY3+Ws/Ozmb27Nm8/vrrjBkzhmOOOYY777yT0aNHM3v27DbNffvtt/O73/2uTb+niPw8lYIN1NfXM23aNG688Uaee+45/vGPfwDw4osv8tFHHzFo0CBuvvnm4OPXr1+P1+slMzOThISE4MeyZcvYvn178HHl5eWNvj5o0KAWzb1o0SJ27NjBHXfccdCvn3zyyY2+f3Z29gGPufLKK0lISKBLly6cd955ZGVltWhGkXCjw0c2MGPGDKqqqnj44YdJSEggLy+P++67j/nz53Pqqacyc+ZMhg4dyuLFi7nwwgupqqrC5XKxevVqXC5Xo9dKSEgI/ndiYiJr1qwJ3s7NzWXs2LEtktnj8XDXXXcxY8YMYmNjD/qY+fPnM2DAgODtg33vJ598kgkTJlBWVsY999zDpEmT2LBhQ4tkFAlHKgWL27BhA4888ghvvvlmcIM+ZsyYRp8zMzP585//zG9+8xvGjh3L8OHD8Xq9FBYWBh9zME6nk4yMjODtiIiW++fy/PPPk5CQwOTJk3/2Menp6Yf9/mlpacHH3HrrrZx//vl4PJ4WyykSbnT4yMJ8Ph/Tpk3jiiuu4KyzzjrkY++88046d+7MnXfeSWZmJldffTVTpkxhwYIF7Ny5k6+++oqHH36YJUuWHHWOuro63G43brcbv99PQ0ND8PYPG+i6urpGz3nsscd4/PHHcTgcR/399ufxeHC73eTn5zN37lwyMzOJjIxs1muKhDPtKVjY008/za5du3jnnXcO+9iIiAhmzZrFyJEjueqqq5g9ezYPPfQQd9xxB7m5uXTs2JETTzyRiRMnHnWOtLS0RrfXrVvHjBkzGt3Xr18/du3aFbw9btw4xo0bd9Tf66cmTZoEBA51jRgxgvnz5zf7NUXCmcPv9/tNhxB7KysrY9iwYY1KQURCkw4fSatzOBxER0ebjiEiR0B7CiIiEqQ9BRERCVIpiIhIkEpBRESCVAoiIhKkUhARkSCVgoiIBOmKZrEdt8dLQYWbkup6KtwNlNd6qHR7qK5roKrOS01dA+4GLz4/+P3g9/sDn/Hj+/4E7egIJ3FRLuKiIgKfoyOIi3QRH+2iXUwknRKj6ZQYTVJclNk3K9LCVApiOSXV9XxXWMWu4mryyt3kV9QGPpe7ya9wU1bTdhPiRUU46ZQQKIjUxGhS20XTIzmO3h0T6N0xnp4pcUS6tEMu1qGL1yRkFVS42ZRXwXeFVWwvquK7wsBHaRtu9JvL5XTQvUMsvVLi6d0xnszOiQzp1p5+aYlERagsJPSoFCQkVNU1sG5PGWtzylibXca3OWUUVNQd/okWFeVy0i8tkSHd2zOkW/tgUWivQkxTKYgRhZVuPvuumC+27+Ob7DK2F1UFj+eHq+gIJ8f17MBJfVI46ZgUhqYnqSSkzakUpE1U1zWwcuc+Pt1WzGffFbO1oMp0pJAXF+XiuJ4dOLFPCicfk8LQ7kk4nc1bf0LkcFQK0mp276vm3Q35fLypkG/2lOLx6p9ac6TER3F6/1TOGNiZUzM7ERPpOvyTRI6SSkFa1Ob8Ct7bkM97G/LZnF9pOo5txUQ6GZ3RiTMHdmb8gFRSEjQ1ubQMlYI026a8Chav3ct/N+azs7jadJyw43TAqN4pXDS8K+cM6UK7GC1HKk2nUpAmKa2uZ/HaXF5fncPGvRWm48j3oiKcjO+fyqUjujO2XyciNFAtR0mlIEfM6/OzbGsh/1mdw4dZhdR7faYjySF0TIjmomFduWJkDzJSE0zHEYtQKchhFVa6mftlNvO+yqaw0r7XDtiVwwGnHNORa0/uxfj+qTqDSQ5JpSA/a0NuObNW7OTtdXnaK7CJ7h1imXxiT35xQrrmbZKDUilIIz6fn/ez8pm1Yhdf7SoxHUdaSUykk4uGdWPamN5kpCaajiMhRKUgANQ1eHltVQ7/XL6dPSW1puNIG3E64JzBXbj59AwGdGlnOo6EAJVCmHN7vLyyMpt/LN9u67mG5NAcDhjfP5VbTu/L0PQk03HEIJVCmKprCJTBc0u3U6TBY9nPmL4d+Z/xfTmhV7LpKGKASiHMNHh9zF+1h//7+Dvyyt2m40gIO71/Knef05++nTXmEE5UCmHkky2FzFiyie8KNRmdHBmX08Gk47vz2zMySU2MMR1H2oBKIQxsLajkoSWbWL61yHQUsai4KBfTxvThhlP7EB+tBRvtTKVgY/uq6njig63M+3oP3nBfrEBaRKfEaO48M5NJx6fjcOgiODtSKdiQ1+dnzue7eOrDrVS6G0zHERs6vmcHZlw8hH5pGm+wG5WCzWTtreAPC9axLqfcdBSxuQing1+N7s1tEzKJjdLaDnahUrAJt8fLkx9u5V+f7qRBh4qkDXVLiuXBCwcxfkBn01GkBagUbGDFtmLuWbie7JIa01EkjJ01qDMPXTSETola8MfKVAoWVun28MBbWfxndY7pKCIAJMdH8ZeLh3D24DTTUaSJVAoWtWpXCbfNX0tOqeYpktBz6Yju3H/BQBK1CpzlqBQspsHr4+mPtvHc0u06zVRCWrekWP7f5UM56ZgU01HkKKgULGRXcTW3zl/Lt3vKTEcROSIOB/zqlN7cdXZ/oiK0NKgVqBQs4rWv93D/WxupqfeajiJy1IamJ/Hc1SPolhRrOoochkohxLk9Xu5dtIHXNZgsFpcUF8mTvxjGuH6ppqPIIagUQlhOaQ03zV3D+lxdiCb24HDAzeMy+O2ETK0VHaJUCiFqxbZibnl1DaU1HtNRRFrc6IyOPH3FMFISdE1DqFEphKDnln7H4+9v1dlFYmtd2sfwwpTjGdytvekosh+VQghxe7zc/tpa3lmfbzqKSJuIi3Lx9BXDOWOgpsgIFSqFELGvqo5pL63im+wy01FE2pTTAfecO4BpY/qYjiKoFELCzuJqrpv9Fbv3ae4iCV9Xj+rBAxcMIsKl6xlMUikYtnp3CdNeXKUBZRHg1MxOPHvVcE2PYZBKwaB31ufx2/lrqWvwmY4iEjL6pyXy0tSRpLbTmtAmqBQMefHzXdz/1kb00xc5UI/kOOb+ahQ9UuJMRwk7KgUD/rFsOw+/u9l0DJGQlpoYzb9/NUpLfrYxlUIbe/rDbTz54VbTMUQ
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import matplotlib.pyplot as plt \n",
|
|||
|
"\n",
|
|||
|
"labels = 'Женщины', 'Мужчины'\n",
|
|||
|
"sizes = [len(df[df['gender']== 'female']),\n",
|
|||
|
" len(df[df['gender']== 'male'])]\n",
|
|||
|
"\n",
|
|||
|
"plt.pie(sizes, labels=labels, autopct='%1.1f%%')\n",
|
|||
|
"plt.show()\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Данная диаграмма отображает соотношение студентов, которые прошли курс подготовки к тестированию по группам.\n",
|
|||
|
"Что позволяет сделать вывод о том, что, например, больше всего неподготовленных студентов в группе С."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 16,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.legend.Legend at 0x20a9f46a900>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 16,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAHaCAYAAAAqv7IKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA6TElEQVR4nO3de3RNd/7/8ddJIpGQSxOXJNMgdY26hZamWgQl0aoOo6V03KrqTrS1MtO6tKPRFr0aZloS+qW0U7ea74+6lboTE4ZRJUMpEVSTNPEVkezfH13OzGlChZxzPpLnY629Vvb+fM7e75PPxsu+2izLsgQAAGAQD3cXAAAA8EsEFAAAYBwCCgAAMA4BBQAAGIeAAgAAjENAAQAAxiGgAAAA43i5u4BbUVRUpDNnzsjf3182m83d5QAAgJtgWZZ++uknhYeHy8PjxsdI7siAcubMGUVERLi7DAAAcAtOnTqlu++++4Z97siA4u/vL+nnLxgQEODmagAAwM3IyclRRESE/d/xG7kjA8q10zoBAQEEFAAA7jA3c3kGF8kCAADjEFAAAIBxCCgAAMA4d+Q1KAAAsxUWFqqgoMDdZcDFKlWqJE9PzzJZFwEFAFBmLMvS2bNnlZWV5e5S4CZBQUEKDQ297eeUEVAAAGXmWjipUaOG/Pz8eJhmBWJZli5duqRz585JksLCwm5rfQQUAECZKCwstIeTkJAQd5cDN/D19ZUknTt3TjVq1Lit0z1cJAsAKBPXrjnx8/NzcyVwp2vjf7vXIBFQAABlitM6FVtZjT8BBQAAGIeAAgDAHaBq1apavXq1Fi1a9Ksv2isPuEgWAOB8m/e6blvt7yv1RwYOHKgFCxZct/3HH39UUFDQbRR1+9LS0hQWFqaioiI9+OCDbq3FFTiCAgCApLi4OGVkZDhMn3/+ubvLsqtXr56qVKkif39/RUZGurscpyOgAAAgycfHR6GhoQ5TcHCwvT0lJUVBQUFasWKF6tevr8qVK6tr1646deqUw3rmzJmjunXrytvbWw0bNtTHH3/s0G6z2bRixQr7/Lhx49ShQwf7/IkTJ2Sz2ZSWlmZfdvDgQcXHx6tq1aqqWbOmnnnmGV24cMFhvSkpKbLZbA5TixYtbvv34i4EFAAAbtKlS5c0bdo0LVy4UNu2bVNWVpb69Oljb1++fLnGjh2rCRMm6ODBgxo2bJgGDRqkTZs23fI2s7Ky1LFjR0VHR2vv3r1as2aNMjMz9eSTTzr0syxLAQEB9qM/EyZMuOVtmqBU16AkJSVp2bJl+uabb+Tr66sHH3xQb7zxhho2bGjvc/nyZU2YMEFLlixRfn6+unbtqj//+c+qWbOmvc/Jkyc1fPhwbdq0SVWrVtWAAQOUlJQkLy8uiQEg2aY6/zZVa7Ll9G2g/CkoKNAHH3ygNm3aSJIWLFigqKgo7d69W61bt9aMGTM0cOBAjRgxQpKUkJCgnTt3asaMGYqNjb2lbX7wwQeKjo7W66+/bl82f/58RURE6Ntvv1WDBg3stXl7eys0NFTSzxfV3slKdQRl8+bNGjlypHbu3Kl169apoKBAXbp0UV5enr3P+PHj9cUXX+izzz7T5s2bdebMGfXs2dPeXlhYqEcffVRXrlzR9u3btWDBAqWkpGjSpEll960AAHACLy8v3X///fb5Ro0aKSgoSIcPH5YkHT58WG3btnX4TNu2be3tt2L//v32/9Bfmxo1aiRJSk9Pt/fLyclRlSpVbnk7pinVIYs1a9Y4zKekpKhGjRpKTU1Vu3btlJ2drXnz5mnx4sXq2LGjJCk5OVlRUVHauXOnHnjgAX355Zf617/+pfXr16tmzZpq0aKFXnvtNU2cOFFTpkyRt7d32X07AADucLm5uerevbveeOONYm3//b6bM2fOKDw83JWlOdVtXYOSnZ0tSfaLiFJTU1VQUKDOnTvb+zRq1Ei1atXSjh07JEk7duxQ06ZNHU75dO3aVTk5OTp06FCJ28nPz1dOTo7DBACAq129elV79/7nlukjR44oKytLUVFRkqSoqCht27bN4TPbtm1T48aNb3mbLVu21KFDh1SnTh3Vq1fPYfrvIyZ79uxRdHT0LW/HNLccUIqKijRu3Di1bdtWTZo0kfTzWyy9vb2L3Stes2ZNnT171t7nv8PJtfZrbSVJSkpSYGCgfYqIiLjVsgEAuGWVKlXS6NGjtWvXLqWmpmrgwIF64IEH1Lp1a0nSiy++qJSUFM2ZM0dHjx7VrFmztGzZMr3wwgsO6ykoKNDly5d1+fJlFRYWqqioyD6fn5/v0HfkyJG6ePGi+vbtqz179ig9PV1r167VoEGDVFhYqAsXLuiPf/yjtm3bpgEDBrjsd+Fst3xV6siRI3Xw4EFt3bq1LOspUWJiohISEuzzOTk5hBQAgMv5+flp4sSJevrpp3X69Gk9/PDDmjdvnr39iSee0LvvvqsZM2Zo7NixioyMVHJyssNtxJKK3YEj/edNwL8UHh6ubdu2aeLEierSpYvy8/NVu3ZtxcXFycPDQ4sWLdLatWu1fPlye1AqD24poIwaNUqrV6/Wli1bHB63GxoaqitXrigrK8vhKEpmZqb9quLQ0FDt3r3bYX2ZmZn2tpL4+PjIx8fnVkoFAJjgFp7u6kopKSklLu/QoYMsy/GOr549ezrc/PFLw4cP1/Dhw6/b/sv13Yz69etr2bJlJbaNHTtWY8eOLbZ8ypQpmjJlSqm3ZYpSneKxLEujRo3S8uXLtXHjxmJPsmvVqpUqVaqkDRs22JcdOXJEJ0+eVExMjCQpJiZG//znP3Xu3Dl7n3Xr1ikgIOC2ztEBAIDyo1RHUEaOHKnFixdr5cqV8vf3t18zEhgYKF9fXwUGBmrIkCFKSEhQcHCwAgICNHr0aMXExOiBBx6QJHXp0kWNGzfWM888ozfffFNnz57Vyy+/rJEjR3KUBAAASCrlEZQ5c+YoOztbHTp0UFhYmH1aunSpvc/bb7+txx57TL169VK7du0UGhrqcFjK09NTq1evlqenp2JiYtS/f3/9/ve/16uvvlp23woAgDI2cOBAZWVlubuMCqNUR1Bu5rxZ5cqVNXv2bM2ePfu6fWrXrq3//d//Lc2mAQBABcK7eAAAgHEIKAAAwDgEFAAAYBwCCgAAMA4BBQAAGIeAAgBABVe1alWtXr1aixYtcnhCvDvd8rt4AAC4WbapNpdty5pc+kfJX3vGyYoVKxyWf/XVV4qNjdWPP/5Y7EW45UlaWprCwsJUVFSkBx980N3lSCKgAABQ4dWrV8/+s7+/vxsr+Q9O8QAAUApbt27Vww8/LF9fX0VERGjMmDHKy8u7bv8pU6bIZrOVOF17Mm1KSoqCgoK0YsUK1a9fX5UrV1bXrl116tQph3XNmTNHdevWlbe3txo2bKiPP/7Yod1mszkcBRo3bpzDm5RPnDghm82mtLQ0+7KDBw8qPj5eVatWVc2aNfXMM8/owoULDutNSUkpVnuLFi1K9XsrLQIKAAA3KT09XXFxcerVq5cOHDigpUuXauvWrRo1atQNP3fvvfcqIyPDPn3++efF+ly6dEnTpk3TwoULtW3bNmVlZalPnz729uXLl2vs2LGaMGGCDh48qGHDhmnQoEHatGnTLX+frKwsdezYUdHR0dq7d6/WrFmjzMxMPfnkkw79LMtSQECAvf4JEybc8jZvFqd4AACQtHr1alWtWtVhWWFhocN8UlKS+vXrp3HjxkmS6tevr/fee0/t27fXnDlzVLly5RLX7eXlpdDQUPt8cHBwsT4FBQX64IMP1KZNG0nSggULFBUVpd27d6t169aaMWOGBg4cqBEjRkiSEhIStHPnTs2YMUOxsbG39J0/+OADRUdH6/XXX7cvmz9/viIiIvTtt9+qQYMG9tq8vb3t3+GXvydn4AgKAACSYmNjlZaW5jB99NFHDn3279+vlJQUVa1a1T517dpVRUVFOn78+G1t38vLS/fff799vlGjRgoKCtLhw4clSYcPH1bbtm0dPtO2bVt7+63Yv3+/Nm3a5PB9GjVqJOnno0XX5OTkqEqVKre8nVvBERQAACRVqVLF4WJRSfr
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"plot = df.groupby([\"race/ethnicity\", \"test preparation course\"]).size().unstack().plot.bar(color=[\"pink\", \"green\"])\n",
|
|||
|
"plot.legend([\"Прошёл\", \"Не прошёл\"])"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "aimvenv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.6"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|