lab_1 #1
3
.gitignore
vendored
3
.gitignore
vendored
@ -12,3 +12,6 @@ ipython_config.py
|
||||
# Remove previous ipynb_checkpoints
|
||||
# git rm -r .ipynb_checkpoints/
|
||||
|
||||
# virtual
|
||||
aimenv/
|
||||
static/
|
||||
|
@ -1,3 +1,8 @@
|
||||
# AIM-PIbd-32-Puchkina-A-A
|
||||
|
||||
МИИ
|
||||
## МИИ
|
||||
|
||||
ФИО: Пучкина Анна\
|
||||
Группа: ПИбд-32\
|
||||
Вариант: 18 (Цена на мобильные устройства)\
|
||||
Сслыка на dataset: https://www.kaggle.com/datasets/dewangmoghe/mobile-phone-price-prediction
|
BIN
lab_1/image.png
Normal file
BIN
lab_1/image.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 71 KiB |
162
lab_1/lab1.ipynb
Normal file
162
lab_1/lab1.ipynb
Normal file
@ -0,0 +1,162 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Начало лабораторной \n",
|
||||
"\n",
|
||||
"Выгрузка данных из csv файла в датафрейм"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 63,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Index(['Unnamed: 0', 'Name', 'Rating', 'Spec_score', 'No_of_sim', 'Ram',\n",
|
||||
" 'Battery', 'Display', 'Camera', 'External_Memory', 'Android_version',\n",
|
||||
" 'Price', 'company', 'Inbuilt_memory', 'fast_charging',\n",
|
||||
" 'Screen_resolution', 'Processor', 'Processor_name'],\n",
|
||||
" dtype='object')\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd \n",
|
||||
"df = pd.read_csv(\"..//static//csv//mobile phone price prediction.csv\")\n",
|
||||
"print(df.columns)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 68,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<pandas.core.groupby.generic.SeriesGroupBy object at 0x000001BFC924FE60>\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "TypeError",
|
||||
"evalue": "unsupported operand type(s) for +=: 'int' and 'str'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[1;32mIn[68], line 12\u001b[0m\n\u001b[0;32m 10\u001b[0m price \u001b[38;5;241m=\u001b[39m df[df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcompany\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m==\u001b[39m c_value][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrice\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39msum()\n\u001b[0;32m 11\u001b[0m c_total \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m count\n\u001b[1;32m---> 12\u001b[0m \u001b[43mp_total\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mprice\u001b[49m\n\u001b[0;32m 13\u001b[0m \u001b[38;5;28mprint\u001b[39m(c_value, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcount =\u001b[39m\u001b[38;5;124m\"\u001b[39m, count, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m price =\u001b[39m\u001b[38;5;124m\"\u001b[39m, price)\n\u001b[0;32m 14\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTotal count = \u001b[39m\u001b[38;5;124m\"\u001b[39m, c_total)\n",
|
||||
"\u001b[1;31mTypeError\u001b[0m: unsupported operand type(s) for +=: 'int' and 'str'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"average_prices = df.groupby('company')['Price']\n",
|
||||
"print(average_prices)\n",
|
||||
"\n",
|
||||
"c_values = df[\"company\"].unique()\n",
|
||||
"\n",
|
||||
"c_total = 0\n",
|
||||
"p_total = 0\n",
|
||||
"for c_value in c_values:\n",
|
||||
" count = df[df[\"company\"] == c_value].shape[0]\n",
|
||||
" price = df[df[\"company\"] == c_value][\"Price\"].sum()\n",
|
||||
" countrys = df1.groupby(\"Country\").size().reset_index(name=\"Count\")\n",
|
||||
" c_total += count\n",
|
||||
" p_total += price\n",
|
||||
" print(c_value, \"count =\", count, \" price =\", price)\n",
|
||||
"print(\"Total count = \", c_total)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 65,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" price = 89 6,990\n",
|
||||
"90 6,999\n",
|
||||
"91 7,499\n",
|
||||
"92 7,999\n",
|
||||
"93 8,033\n",
|
||||
" ... \n",
|
||||
"854 36,990\n",
|
||||
"855 45,215\n",
|
||||
"856 69,999\n",
|
||||
"857 68,899\n",
|
||||
"858 63,490\n",
|
||||
"Name: Price, Length: 186, dtype: object\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"count = df[df[\"company\"] == \"Vivo\"].shape[0]\n",
|
||||
"price = df[df[\"company\"] == \"Vivo\"][\"Price\"].replace(\",\", \"\")\n",
|
||||
"print(\" price =\", price)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "ModuleNotFoundError",
|
||||
"evalue": "No module named 'matplotlib'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[1;32mIn[61], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[0;32m 3\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcompany\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcompany\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mstr\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m; \u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 4\u001b[0m df1 \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mexplode(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcompany\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
|
||||
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'matplotlib'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"df['company'] = df['company'].str.split('; ')\n",
|
||||
"df1 = df.explode('company')\n",
|
||||
"companys = df1.groupby(\"company\").size().reset_index(name=\"Count\") # type: ignore\n",
|
||||
"company_counts_sorted = companys.sort_values(by='Count', ascending=False)\n",
|
||||
"top_countries = company_counts_sorted.head(50)\n",
|
||||
"\n",
|
||||
"top_countries.plot.bar(x='company', y='Count', color=['green'])\n",
|
||||
"plt.title('Top Countries by count of people')\n",
|
||||
"plt.xlabel('Country')\n",
|
||||
"plt.ylabel('Number of People')\n",
|
||||
"plt.show()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "aimenv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
BIN
lab_1/requirements.txt
Normal file
BIN
lab_1/requirements.txt
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user