2024-09-14 12:19:33 +04:00
5 changed files with 171 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -12,3 +12,6 @@ ipython_config.py
 # Remove previous ipynb_checkpoints
 #   git rm -r .ipynb_checkpoints/

+# virtual
+aimenv/
+static/
--- a/README.md
+++ b/README.md
@ -1,3 +1,8 @@
 # AIM-PIbd-32-Puchkina-A-A

-МИИ
+## МИИ
+
+ФИО: Пучкина Анна\
+Группа: ПИбд-32\
+Вариант: 18 (Цена на мобильные устройства)\
+Сслыка на dataset: https://www.kaggle.com/datasets/dewangmoghe/mobile-phone-price-prediction
--- a/lab_1/image.png
+++ b/lab_1/image.png
--- a/lab_1/lab1.ipynb
+++ b/lab_1/lab1.ipynb
@ -0,0 +1,162 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Начало лабораторной \n",
+    "\n",
+    "Выгрузка данных из csv файла в датафрейм"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Index(['Unnamed: 0', 'Name', 'Rating', 'Spec_score', 'No_of_sim', 'Ram',\n",
+      "       'Battery', 'Display', 'Camera', 'External_Memory', 'Android_version',\n",
+      "       'Price', 'company', 'Inbuilt_memory', 'fast_charging',\n",
+      "       'Screen_resolution', 'Processor', 'Processor_name'],\n",
+      "      dtype='object')\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd \n",
+    "df = pd.read_csv(\"..//static//csv//mobile phone price prediction.csv\")\n",
+    "print(df.columns)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<pandas.core.groupby.generic.SeriesGroupBy object at 0x000001BFC924FE60>\n"
+     ]
+    },
+    {
+     "ename": "TypeError",
+     "evalue": "unsupported operand type(s) for +=: 'int' and 'str'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[68], line 12\u001b[0m\n\u001b[0;32m     10\u001b[0m     price \u001b[38;5;241m=\u001b[39m df[df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcompany\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m==\u001b[39m c_value][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrice\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39msum()\n\u001b[0;32m     11\u001b[0m     c_total \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m count\n\u001b[1;32m---> 12\u001b[0m     \u001b[43mp_total\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mprice\u001b[49m\n\u001b[0;32m     13\u001b[0m     \u001b[38;5;28mprint\u001b[39m(c_value, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcount =\u001b[39m\u001b[38;5;124m\"\u001b[39m, count, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m price =\u001b[39m\u001b[38;5;124m\"\u001b[39m, price)\n\u001b[0;32m     14\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTotal count = \u001b[39m\u001b[38;5;124m\"\u001b[39m, c_total)\n",
+      "\u001b[1;31mTypeError\u001b[0m: unsupported operand type(s) for +=: 'int' and 'str'"
+     ]
+    }
+   ],
+   "source": [
+    "average_prices = df.groupby('company')['Price']\n",
+    "print(average_prices)\n",
+    "\n",
+    "c_values = df[\"company\"].unique()\n",
+    "\n",
+    "c_total = 0\n",
+    "p_total = 0\n",
+    "for c_value in c_values:\n",
+    "    count = df[df[\"company\"] == c_value].shape[0]\n",
+    "    price = df[df[\"company\"] == c_value][\"Price\"].sum()\n",
+    "    countrys = df1.groupby(\"Country\").size().reset_index(name=\"Count\")\n",
+    "    c_total += count\n",
+    "    p_total += price\n",
+    "    print(c_value, \"count =\", count, \" price =\", price)\n",
+    "print(\"Total count = \", c_total)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " price = 89      6,990\n",
+      "90      6,999\n",
+      "91      7,499\n",
+      "92      7,999\n",
+      "93      8,033\n",
+      "        ...  \n",
+      "854    36,990\n",
+      "855    45,215\n",
+      "856    69,999\n",
+      "857    68,899\n",
+      "858    63,490\n",
+      "Name: Price, Length: 186, dtype: object\n"
+     ]
+    }
+   ],
+   "source": [
+    "count = df[df[\"company\"] == \"Vivo\"].shape[0]\n",
+    "price = df[df[\"company\"] == \"Vivo\"][\"Price\"].replace(\",\", \"\")\n",
+    "print(\" price =\", price)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'matplotlib'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[61], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[0;32m      3\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcompany\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcompany\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mstr\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m; \u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m      4\u001b[0m df1 \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mexplode(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcompany\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
+      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'matplotlib'"
+     ]
+    }
+   ],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "df['company'] = df['company'].str.split('; ')\n",
+    "df1 = df.explode('company')\n",
+    "companys = df1.groupby(\"company\").size().reset_index(name=\"Count\") # type: ignore\n",
+    "company_counts_sorted = companys.sort_values(by='Count', ascending=False)\n",
+    "top_countries = company_counts_sorted.head(50)\n",
+    "\n",
+    "top_countries.plot.bar(x='company', y='Count', color=['green'])\n",
+    "plt.title('Top Countries by count of people')\n",
+    "plt.xlabel('Country')\n",
+    "plt.ylabel('Number of People')\n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "aimenv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/lab_1/requirements.txt
+++ b/lab_1/requirements.txt