diff --git a/.gitignore b/.gitignore index 207d123..27f0999 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,6 @@ ipython_config.py # Remove previous ipynb_checkpoints # git rm -r .ipynb_checkpoints/ +# virtual +aimenv/ +static/ diff --git a/README.md b/README.md index 724296a..90c8c3e 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,8 @@ # AIM-PIbd-32-Puchkina-A-A -МИИ \ No newline at end of file +## МИИ + +ФИО: Пучкина Анна\ +Группа: ПИбд-32\ +Вариант: 18 (Цена на мобильные устройства)\ +Сслыка на dataset: https://www.kaggle.com/datasets/dewangmoghe/mobile-phone-price-prediction \ No newline at end of file diff --git a/lab_1/image.png b/lab_1/image.png new file mode 100644 index 0000000..fc022a3 Binary files /dev/null and b/lab_1/image.png differ diff --git a/lab_1/lab1.ipynb b/lab_1/lab1.ipynb new file mode 100644 index 0000000..9b73b55 --- /dev/null +++ b/lab_1/lab1.ipynb @@ -0,0 +1,162 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Начало лабораторной \n", + "\n", + "Выгрузка данных из csv файла в датафрейм" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['Unnamed: 0', 'Name', 'Rating', 'Spec_score', 'No_of_sim', 'Ram',\n", + " 'Battery', 'Display', 'Camera', 'External_Memory', 'Android_version',\n", + " 'Price', 'company', 'Inbuilt_memory', 'fast_charging',\n", + " 'Screen_resolution', 'Processor', 'Processor_name'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "import pandas as pd \n", + "df = pd.read_csv(\"..//static//csv//mobile phone price prediction.csv\")\n", + "print(df.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "ename": "TypeError", + "evalue": "unsupported operand type(s) for +=: 'int' and 'str'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[68], line 12\u001b[0m\n\u001b[0;32m 10\u001b[0m price \u001b[38;5;241m=\u001b[39m df[df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcompany\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m==\u001b[39m c_value][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrice\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39msum()\n\u001b[0;32m 11\u001b[0m c_total \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m count\n\u001b[1;32m---> 12\u001b[0m \u001b[43mp_total\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mprice\u001b[49m\n\u001b[0;32m 13\u001b[0m \u001b[38;5;28mprint\u001b[39m(c_value, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcount =\u001b[39m\u001b[38;5;124m\"\u001b[39m, count, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m price =\u001b[39m\u001b[38;5;124m\"\u001b[39m, price)\n\u001b[0;32m 14\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTotal count = \u001b[39m\u001b[38;5;124m\"\u001b[39m, c_total)\n", + "\u001b[1;31mTypeError\u001b[0m: unsupported operand type(s) for +=: 'int' and 'str'" + ] + } + ], + "source": [ + "average_prices = df.groupby('company')['Price']\n", + "print(average_prices)\n", + "\n", + "c_values = df[\"company\"].unique()\n", + "\n", + "c_total = 0\n", + "p_total = 0\n", + "for c_value in c_values:\n", + " count = df[df[\"company\"] == c_value].shape[0]\n", + " price = df[df[\"company\"] == c_value][\"Price\"].sum()\n", + " countrys = df1.groupby(\"Country\").size().reset_index(name=\"Count\")\n", + " c_total += count\n", + " p_total += price\n", + " print(c_value, \"count =\", count, \" price =\", price)\n", + "print(\"Total count = \", c_total)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " price = 89 6,990\n", + "90 6,999\n", + "91 7,499\n", + "92 7,999\n", + "93 8,033\n", + " ... \n", + "854 36,990\n", + "855 45,215\n", + "856 69,999\n", + "857 68,899\n", + "858 63,490\n", + "Name: Price, Length: 186, dtype: object\n" + ] + } + ], + "source": [ + "count = df[df[\"company\"] == \"Vivo\"].shape[0]\n", + "price = df[df[\"company\"] == \"Vivo\"][\"Price\"].replace(\",\", \"\")\n", + "print(\" price =\", price)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'matplotlib'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[61], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[0;32m 3\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcompany\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcompany\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mstr\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m; \u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 4\u001b[0m df1 \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mexplode(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcompany\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'matplotlib'" + ] + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "df['company'] = df['company'].str.split('; ')\n", + "df1 = df.explode('company')\n", + "companys = df1.groupby(\"company\").size().reset_index(name=\"Count\") # type: ignore\n", + "company_counts_sorted = companys.sort_values(by='Count', ascending=False)\n", + "top_countries = company_counts_sorted.head(50)\n", + "\n", + "top_countries.plot.bar(x='company', y='Count', color=['green'])\n", + "plt.title('Top Countries by count of people')\n", + "plt.xlabel('Country')\n", + "plt.ylabel('Number of People')\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "aimenv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/lab_1/requirements.txt b/lab_1/requirements.txt new file mode 100644 index 0000000..cd07185 Binary files /dev/null and b/lab_1/requirements.txt differ