diff --git a/lab_1/lab1.ipynb b/lab_1/lab1.ipynb index c6ab2f7..9b73b55 100644 --- a/lab_1/lab1.ipynb +++ b/lab_1/lab1.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 63, "metadata": {}, "outputs": [ { @@ -31,6 +31,111 @@ "df = pd.read_csv(\"..//static//csv//mobile phone price prediction.csv\")\n", "print(df.columns)" ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "ename": "TypeError", + "evalue": "unsupported operand type(s) for +=: 'int' and 'str'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[68], line 12\u001b[0m\n\u001b[0;32m 10\u001b[0m price \u001b[38;5;241m=\u001b[39m df[df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcompany\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m==\u001b[39m c_value][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrice\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39msum()\n\u001b[0;32m 11\u001b[0m c_total \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m count\n\u001b[1;32m---> 12\u001b[0m \u001b[43mp_total\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mprice\u001b[49m\n\u001b[0;32m 13\u001b[0m \u001b[38;5;28mprint\u001b[39m(c_value, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcount =\u001b[39m\u001b[38;5;124m\"\u001b[39m, count, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m price =\u001b[39m\u001b[38;5;124m\"\u001b[39m, price)\n\u001b[0;32m 14\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTotal count = \u001b[39m\u001b[38;5;124m\"\u001b[39m, c_total)\n", + "\u001b[1;31mTypeError\u001b[0m: unsupported operand type(s) for +=: 'int' and 'str'" + ] + } + ], + "source": [ + "average_prices = df.groupby('company')['Price']\n", + "print(average_prices)\n", + "\n", + "c_values = df[\"company\"].unique()\n", + "\n", + "c_total = 0\n", + "p_total = 0\n", + "for c_value in c_values:\n", + " count = df[df[\"company\"] == c_value].shape[0]\n", + " price = df[df[\"company\"] == c_value][\"Price\"].sum()\n", + " countrys = df1.groupby(\"Country\").size().reset_index(name=\"Count\")\n", + " c_total += count\n", + " p_total += price\n", + " print(c_value, \"count =\", count, \" price =\", price)\n", + "print(\"Total count = \", c_total)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " price = 89 6,990\n", + "90 6,999\n", + "91 7,499\n", + "92 7,999\n", + "93 8,033\n", + " ... \n", + "854 36,990\n", + "855 45,215\n", + "856 69,999\n", + "857 68,899\n", + "858 63,490\n", + "Name: Price, Length: 186, dtype: object\n" + ] + } + ], + "source": [ + "count = df[df[\"company\"] == \"Vivo\"].shape[0]\n", + "price = df[df[\"company\"] == \"Vivo\"][\"Price\"].replace(\",\", \"\")\n", + "print(\" price =\", price)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'matplotlib'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[61], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[0;32m 3\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcompany\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcompany\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mstr\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m; \u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 4\u001b[0m df1 \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mexplode(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcompany\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'matplotlib'" + ] + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "df['company'] = df['company'].str.split('; ')\n", + "df1 = df.explode('company')\n", + "companys = df1.groupby(\"company\").size().reset_index(name=\"Count\") # type: ignore\n", + "company_counts_sorted = companys.sort_values(by='Count', ascending=False)\n", + "top_countries = company_counts_sorted.head(50)\n", + "\n", + "top_countries.plot.bar(x='company', y='Count', color=['green'])\n", + "plt.title('Top Countries by count of people')\n", + "plt.xlabel('Country')\n", + "plt.ylabel('Number of People')\n", + "plt.show()" + ] } ], "metadata": {