{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Начало лабораторной работы №1\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Набор данных \"Наблюдения НЛО в США\"."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Загрузка и сохранение данных"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" summary | \n",
" city | \n",
" state | \n",
" date_time | \n",
" shape | \n",
" duration | \n",
" stats | \n",
" report_link | \n",
" text | \n",
" posted | \n",
" city_latitude | \n",
" city_longitude | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Viewed some red lights in the sky appearing to... | \n",
" Visalia | \n",
" CA | \n",
" 2021-12-15T21:45:00 | \n",
" light | \n",
" 2 minutes | \n",
" Occurred : 12/15/2021 21:45 (Entered as : 12/... | \n",
" http://www.nuforc.org/webreports/165/S165881.html | \n",
" Viewed some red lights in the sky appearing to... | \n",
" 2021-12-19T00:00:00 | \n",
" 36.356650 | \n",
" -119.347937 | \n",
"
\n",
" \n",
" 1 | \n",
" Look like 1 or 3 crafts from North traveling s... | \n",
" Cincinnati | \n",
" OH | \n",
" 2021-12-16T09:45:00 | \n",
" triangle | \n",
" 14 seconds | \n",
" Occurred : 12/16/2021 09:45 (Entered as : 12/... | \n",
" http://www.nuforc.org/webreports/165/S165888.html | \n",
" Look like 1 or 3 crafts from North traveling s... | \n",
" 2021-12-19T00:00:00 | \n",
" 39.174503 | \n",
" -84.481363 | \n",
"
\n",
" \n",
" 2 | \n",
" seen dark rectangle moving slowly thru the sky... | \n",
" Tecopa | \n",
" CA | \n",
" 2021-12-10T00:00:00 | \n",
" rectangle | \n",
" Several minutes | \n",
" Occurred : 12/10/2021 00:00 (Entered as : 12/... | \n",
" http://www.nuforc.org/webreports/165/S165810.html | \n",
" seen dark rectangle moving slowly thru the sky... | \n",
" 2021-12-19T00:00:00 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 3 | \n",
" One red light moving switly west to east, beco... | \n",
" Knoxville | \n",
" TN | \n",
" 2021-12-10T19:30:00 | \n",
" triangle | \n",
" 20-30 seconds | \n",
" Occurred : 12/10/2021 19:30 (Entered as : 12/... | \n",
" http://www.nuforc.org/webreports/165/S165825.html | \n",
" One red light moving switly west to east, beco... | \n",
" 2021-12-19T00:00:00 | \n",
" 35.961561 | \n",
" -83.980115 | \n",
"
\n",
" \n",
" 4 | \n",
" Bright, circular Fresnel-lens shaped light sev... | \n",
" Alexandria | \n",
" VA | \n",
" 2021-12-07T08:00:00 | \n",
" circle | \n",
" NaN | \n",
" Occurred : 12/7/2021 08:00 (Entered as : 12/0... | \n",
" http://www.nuforc.org/webreports/165/S165754.html | \n",
" Bright, circular Fresnel-lens shaped light sev... | \n",
" 2021-12-19T00:00:00 | \n",
" 38.798958 | \n",
" -77.095133 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" summary city state \\\n",
"0 Viewed some red lights in the sky appearing to... Visalia CA \n",
"1 Look like 1 or 3 crafts from North traveling s... Cincinnati OH \n",
"2 seen dark rectangle moving slowly thru the sky... Tecopa CA \n",
"3 One red light moving switly west to east, beco... Knoxville TN \n",
"4 Bright, circular Fresnel-lens shaped light sev... Alexandria VA \n",
"\n",
" date_time shape duration \\\n",
"0 2021-12-15T21:45:00 light 2 minutes \n",
"1 2021-12-16T09:45:00 triangle 14 seconds \n",
"2 2021-12-10T00:00:00 rectangle Several minutes \n",
"3 2021-12-10T19:30:00 triangle 20-30 seconds \n",
"4 2021-12-07T08:00:00 circle NaN \n",
"\n",
" stats \\\n",
"0 Occurred : 12/15/2021 21:45 (Entered as : 12/... \n",
"1 Occurred : 12/16/2021 09:45 (Entered as : 12/... \n",
"2 Occurred : 12/10/2021 00:00 (Entered as : 12/... \n",
"3 Occurred : 12/10/2021 19:30 (Entered as : 12/... \n",
"4 Occurred : 12/7/2021 08:00 (Entered as : 12/0... \n",
"\n",
" report_link \\\n",
"0 http://www.nuforc.org/webreports/165/S165881.html \n",
"1 http://www.nuforc.org/webreports/165/S165888.html \n",
"2 http://www.nuforc.org/webreports/165/S165810.html \n",
"3 http://www.nuforc.org/webreports/165/S165825.html \n",
"4 http://www.nuforc.org/webreports/165/S165754.html \n",
"\n",
" text posted \\\n",
"0 Viewed some red lights in the sky appearing to... 2021-12-19T00:00:00 \n",
"1 Look like 1 or 3 crafts from North traveling s... 2021-12-19T00:00:00 \n",
"2 seen dark rectangle moving slowly thru the sky... 2021-12-19T00:00:00 \n",
"3 One red light moving switly west to east, beco... 2021-12-19T00:00:00 \n",
"4 Bright, circular Fresnel-lens shaped light sev... 2021-12-19T00:00:00 \n",
"\n",
" city_latitude city_longitude \n",
"0 36.356650 -119.347937 \n",
"1 39.174503 -84.481363 \n",
"2 NaN NaN \n",
"3 35.961561 -83.980115 \n",
"4 38.798958 -77.095133 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Чтобы загрузить данные из CSV файла:\n",
"\n",
"df = pd.read_csv('datasets/nuforc_reports.csv')\n",
"df.head()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Получение сведений о датафрейме с данными"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 136940 entries, 0 to 136939\n",
"Data columns (total 12 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 summary 136866 non-null object \n",
" 1 city 136558 non-null object \n",
" 2 state 127595 non-null object \n",
" 3 date_time 134272 non-null object \n",
" 4 shape 131018 non-null object \n",
" 5 duration 130448 non-null object \n",
" 6 stats 136940 non-null object \n",
" 7 report_link 136940 non-null object \n",
" 8 text 136902 non-null object \n",
" 9 posted 134272 non-null object \n",
" 10 city_latitude 110136 non-null float64\n",
" 11 city_longitude 110136 non-null float64\n",
"dtypes: float64(2), object(10)\n",
"memory usage: 12.5+ MB\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" city_latitude | \n",
" city_longitude | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 110136.000000 | \n",
" 110136.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 38.704608 | \n",
" -95.185792 | \n",
"
\n",
" \n",
" std | \n",
" 5.752186 | \n",
" 18.310088 | \n",
"
\n",
" \n",
" min | \n",
" -32.055500 | \n",
" -170.494000 | \n",
"
\n",
" \n",
" 25% | \n",
" 34.238375 | \n",
" -113.901810 | \n",
"
\n",
" \n",
" 50% | \n",
" 39.257500 | \n",
" -89.161450 | \n",
"
\n",
" \n",
" 75% | \n",
" 42.317739 | \n",
" -80.363444 | \n",
"
\n",
" \n",
" max | \n",
" 64.845276 | \n",
" 130.850580 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" city_latitude city_longitude\n",
"count 110136.000000 110136.000000\n",
"mean 38.704608 -95.185792\n",
"std 5.752186 18.310088\n",
"min -32.055500 -170.494000\n",
"25% 34.238375 -113.901810\n",
"50% 39.257500 -89.161450\n",
"75% 42.317739 -80.363444\n",
"max 64.845276 130.850580"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Получить общую информацию о датафрейме можно с помощью:\n",
"df.info()\n",
"#Для получения статистического описания числовых колонок:\n",
"df.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Получение сведений о колонках датафрейма"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['summary', 'city', 'state', 'date_time', 'shape', 'duration', 'stats',\n",
" 'report_link', 'text', 'posted', 'city_latitude', 'city_longitude'],\n",
" dtype='object')\n"
]
}
],
"source": [
"#Вывести названия колонок:\n",
"print(df.columns)\n",
"\n",
"#Получить уникальные значения в колонке:\n",
"unique_values = df['city'].unique()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Вывод отельных строки и столбцов из датафрейма"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"\n",
"#Для вывода отдельных строк можно использовать iloc или loc:\n",
"\n",
"# Вывод первой строки\n",
"first_row = df.iloc[0]\n",
"\n",
"# Вывод строк с 0 по 4\n",
"first_five_rows = df.iloc[0:5]\n",
"\n",
"# Вывод по метке индекса\n",
"row_by_label = df.loc[0]\n",
"\n",
"# Вывод определенного столбца\n",
"specific_column = df['city']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Группировка и агрегация данных в датафрейме"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"#Для группировки данных можно использовать groupby:\n",
"grouped = df.groupby('city').agg({'state': 'sum'})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### сортировка данных в датафрейме"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"#Для сортировки данных по определенной колонке:\n",
"sorted_df = df.sort_values(by='city', ascending=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### удаление строк/столбцов"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"#Для удаления строк:\n",
"\n",
"# Удаление строки по индексу\n",
"df = df.drop(24)\n",
"\n",
"# Удаление нескольких строк\n",
"df = df.drop([1, 2, 3])\n",
"\n",
"\n",
"#Для удаления столбцов:\n",
"\n",
"# Удаление столбца\n",
"df = df.drop(\"summary\", axis=1)\n",
"\n",
"# Удаление нескольких столбцов\n",
"df = df.drop(['shape', 'duration'], axis=1)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### создание новых столбцов на основе данных из существующих столбцов датафрейма"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"#Создание нового столбца на основе существующих:\n",
"df['new_columnStateCity'] = df['state'] + df['city']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### удаление строк с пустыми значениями"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"#Для удаления строк с хотя бы одним пустым значением:\n",
"df = df.dropna()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### заполнение пустых значений на основе существующих данных"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# Заполнение средним значением\n",
"df['city_latitude'] = df['city_latitude'].fillna(df['city_latitude'].mean())\n",
"\n",
"# Заполнение фиксированным значением\n",
"df['state'] = df['state'].fillna(0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Линейная диаграмма (plot)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"