AIM-PIbd-31-Kouvshinoff-T-A/lab_1/laba1.ipynb

337 lines
200 KiB
Plaintext
Raw Normal View History

2024-09-20 20:12:39 +04:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
2024-09-21 01:14:16 +04:00
"просто устанавливаем матплот потому что иначе он не хотел устанавливаться"
2024-09-20 20:12:39 +04:00
]
},
{
"cell_type": "code",
2024-09-21 01:14:16 +04:00
"execution_count": 141,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: matplotlib in d:\\мии\\aim-pibd-31-kouvshinoff-t-a\\laba\\lib\\site-packages (3.9.2)\n",
"Requirement already satisfied: contourpy>=1.0.1 in d:\\мии\\aim-pibd-31-kouvshinoff-t-a\\laba\\lib\\site-packages (from matplotlib) (1.3.0)\n",
"Requirement already satisfied: cycler>=0.10 in d:\\мии\\aim-pibd-31-kouvshinoff-t-a\\laba\\lib\\site-packages (from matplotlib) (0.12.1)\n",
"Requirement already satisfied: fonttools>=4.22.0 in d:\\мии\\aim-pibd-31-kouvshinoff-t-a\\laba\\lib\\site-packages (from matplotlib) (4.53.1)\n",
"Requirement already satisfied: kiwisolver>=1.3.1 in d:\\мии\\aim-pibd-31-kouvshinoff-t-a\\laba\\lib\\site-packages (from matplotlib) (1.4.7)\n",
"Requirement already satisfied: numpy>=1.23 in d:\\мии\\aim-pibd-31-kouvshinoff-t-a\\laba\\lib\\site-packages (from matplotlib) (2.1.1)\n",
"Requirement already satisfied: packaging>=20.0 in d:\\мии\\aim-pibd-31-kouvshinoff-t-a\\laba\\lib\\site-packages (from matplotlib) (24.1)\n",
"Requirement already satisfied: pillow>=8 in d:\\мии\\aim-pibd-31-kouvshinoff-t-a\\laba\\lib\\site-packages (from matplotlib) (10.4.0)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in d:\\мии\\aim-pibd-31-kouvshinoff-t-a\\laba\\lib\\site-packages (from matplotlib) (3.1.4)\n",
"Requirement already satisfied: python-dateutil>=2.7 in d:\\мии\\aim-pibd-31-kouvshinoff-t-a\\laba\\lib\\site-packages (from matplotlib) (2.9.0.post0)\n",
"Requirement already satisfied: six>=1.5 in d:\\мии\\aim-pibd-31-kouvshinoff-t-a\\laba\\lib\\site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install matplotlib"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## просто вывод колонок датасета"
]
},
{
"cell_type": "code",
"execution_count": 142,
2024-09-20 20:12:39 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2024-09-21 01:14:16 +04:00
"Index(['ID', 'Price', 'Levy', 'Manufacturer', 'Model', 'Prod. year',\n",
" 'Category', 'Leather interior', 'Fuel type', 'Engine volume', 'Mileage',\n",
" 'Cylinders', 'Gear box type', 'Drive wheels', 'Doors', 'Wheel', 'Color',\n",
" 'Airbags'],\n",
" dtype='object')"
2024-09-20 20:12:39 +04:00
]
},
2024-09-21 01:14:16 +04:00
"execution_count": 142,
2024-09-20 20:12:39 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"df = pd.read_csv(\"..//static//csv//car_price_prediction.csv\", sep=\",\")\n",
2024-09-21 01:14:16 +04:00
"df.columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## преобразуем пробег в число"
]
},
{
"cell_type": "code",
"execution_count": 143,
"metadata": {},
"outputs": [],
"source": [
"df['Mileage'] = df['Mileage'].str.replace(r'\\D+', '', regex=True).astype(float)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## отрисовываю круговую диаграмму по которой можно сделать вывод о распределении типов двигателей"
]
},
{
"cell_type": "code",
"execution_count": 144,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Fuel type\n",
"Petrol 10150\n",
"Diesel 4036\n",
"Hybrid 3578\n",
"LPG 892\n",
"CNG 494\n",
"Plug-in Hybrid 86\n",
"Hydrogen 1\n",
"Name: count, dtype: int64"
]
},
"execution_count": 144,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAfoAAAGbCAYAAAAsvVK2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABzfklEQVR4nO3ddXxV9RvA8c/NdfeoAaO7SxoBBRULFBFQwf4pSlkoYiCiYmMhIXYhFqiUgCjS3Rsxxgbs3vV26/z+mFy5Llifu7vn7WsvuSefc7d7n/P9nm9oFEVREEIIIYRH0qodgBBCCCGqjiR6IYQQwoNJohdCCCE8mCR6IYQQwoNJohdCCCE8mCR6IYQQwoNJohdCCCE8mCR6IYQQwoNJohdCCCE8mCR6IYRwE0uXLiUxMdH5etGiRSQlJakXkPAIkuiFEMJNrF+/nmnTppGYmMjKlSu577770Grla1pUjF7tAIQQQhR46KGH6NevHw0bNgTg4YcfJiYmRuWoRE2nkUlthBDCfWRnZ7Nnzx7Cw8Np3Lix2uEIDyB1QsDMmTPRaDQcOHCAkSNHEhgYSFhYGA8++CB5eXku2y5cuJABAwYQGRmJl5cXLVu2ZP78+UUe9+eff6Zv374EBAQQGBhIly5d+OSTT5zr+/Xrh0ajKfbn4md1cXFxDB8+nF9++YX27dvj7e1Ny5Yt+eabbwqd12w2M2nSJOrVq4eXlxfx8fHMmTMHh8NRaNtFixYVee64uLhC2x44cIAbbriB0NBQvL296dy5M8uXLy/y2ou7tkWLFrls99dffzF06FCCgoLw9fWlb9++bNy40WWbC7+fc+fOuSzfsmVLoWOOHz++UOwnT57Ex8en0HsKBb+j3r174+fnR0BAAMOGDWPv3r1FXtOl3rOirrMs8cTFxZV4zIvZbDaeeeYZGjdujJeXF3FxcTz22GPk5+e7bHfxMbVaLdHR0YwaNYoTJ06UeI0XJCYmlurv87/via+vL23atOGDDz5wOV5lvB8TJkxwbvPSSy/Rs2dPwsLC8PHxoVOnTnz11Vcux7/U76pfv37ObfPz83nqqaeIj4/Hy8uLevXqMW3atELva1HXXNTxoHSfxwvv86JFi/Dz86Nbt240btyY++67D41Gw/jx40v+RQlRAqm6v8jIkSOJi4tj9uzZ/Pnnn7z++uuYTCaWLFni3Gb+/Pm0atWKq6++Gr1ez/fff8+9996Lw+Hgvvvuc263aNEibr/9dlq1asWjjz5KcHAw27dvZ8WKFYwePdq5Xd26dZk9e7ZLHD/99BOffvppofgOHz7MqFGjuPvuuxk3bhwLFy7kxhtvZMWKFVx++eUA5OTk0LdvX5KSkrjrrruoX78+f/zxB48++ijJycm8+uqrRV77rFmznNWFL7/8MiaTyWX93r176dWrF3Xq1OGRRx7Bz8+PL774ghEjRvD1119z7bXXFjpm8+bNefzxxwE4d+4cDz30kMv61atXc8UVV9CpUyeeeuoptFqt80Zq/fr1dO3atchYy+rJJ58sdMMG8NFHHzFu3DiGDBnCnDlzyMnJYf78+Vx22WVs3769yJsdgD59+vDRRx85Xz/33HMAzmsF6NmzZ5njAWjfvj2TJ092WbZkyRJ+/fVXl2UTJkxg8eLF3HDDDUyePJm//vqL2bNns3//fr799luXbXv37s2dd96Jw+Fgz549vPrqq5w+fZr169cXG+N/3XzzzVx55ZVA8X+fAPPmzSM8PJyMjAw+/PBDJk6cSFxcHIMGDSr22GV9P+Lj453/fu2117j66qu55ZZbsFgsfPbZZ9x444388MMPDBs2DMDld7V+/Xree+89Z5wAUVFRADgcDq6++mo2bNjAnXfeSYsWLdi9ezfz5s3j0KFDLFu2rMRrhn//Fi4o7+cR4MiRI7z//vvFrhei1BShPPXUUwqgXH311S7L7733XgVQdu7c6VyWk5NTaP8hQ4YojRo1cr42m81KQECA0q1bNyU3N9dlW4fD4fx33759lVatWhU63ty5cxVASUhIcC5r0KCBAihff/21c1l6eroSExOjdOjQwbnsmWeeUfz8/JRDhw65HPORRx5RdDqdcuLECZfl7733ngIoW7ZscS4bNmyY0qBBA5ftBg4cqLRp00bJy8tzuZaePXsqTZo0KXQNvXr1Uvr37+98nZCQoADKwoULnfs2adJEGTJkiMt7kpOTozRs2FC5/PLLncsu/H7Onj3rco6///7b5ZiKoijjxo1ziX3Pnj2KVqtVrrjiCpf3NDMzUwkODlYmTpzocswzZ84oQUFBhZaXpG/fvkrfvn2LXFfaeBSl4Hc8bNiwQse47777lIs/qjt27FAAZcKECS7bTZkyRQGU1atXuxxz3LhxLtuNHj1a8fX1LdW1HTp0SAGUl156ybmsqL/PhQsXFlp2Yd8XX3zRuawy3o+L/ffzaLFYlNatWysDBgwocvui4rzgo48+UrRarbJ+/XqX5e+8844CKBs3bnRZ/v777yuAcvz4ceey//4tlPbz+N/Ph6IoysiRI5XWrVsr9erVK/Q7FKIspOr+IheXyAH+97//AQUlmAt8fHyc/05PT+fcuXP07duXY8eOkZ6eDsCvv/5KZmYmjzzyCN7e3i7H/G8VbFnExsa6lJwDAwMZO3Ys27dv58yZMwB8+eWX9O7dm5CQEM6dO+f8GTRoEHa7nd9//93lmBdKUv+N82JpaWmsXr2akSNHkpmZ6Tzm+fPnGTJkCIcPHy7UBchiseDl5VXsMXfs2MHhw4cZPXo058+fdx4zOzubgQMH8vvvvxd61JCWluZyTRfe75I8+uijdOzYkRtvvNFl+a+//orZbObmm292OaZOp6Nbt26sWbPmkscuj+LiKYsLf48PP/ywy/ILJd8ff/zRZXl+fj7nzp0jNTWVX3/9ldWrVzNw4MBSnas0fx8XM5lMnDt3jmPHjjFv3jx0Oh19+/YtdvuKvh8Xfx5NJhPp6en07t2bbdu2lflYX375JS1atKB58+YufxMDBgwAKPQ3YbFYAEr8Oy/r5/GCrVu38uWXXzJ79mxpdS8qTKruL9KkSROX140bN0ar1bo8N9y4cSNPPfUUmzZtIicnx2X79PR0goKCOHr0KACtW7eu1Pji4+ML3Sg0bdoUKHjGFx0dzeHDh9m1axcRERFFHiM1NdXl9YXn3kFBQcWe98iRIyiKwowZM5gxY0axx61Tp47ztdlspkGDBsUe8/DhwwCMGzeu2G3S09MJCQlxvm7WrFmx2xZlw4YNfP/996xatarQM+kL57/wJf5fgYGBZTpXReMpi+PHj6PVal2qsAGio6MJDg7m+PHjLss/++wzPvvsM+frLl26FHp2XpzS/H1crGPHjs5/e3l58eabbxb7CKYy3o8ffviBZ599lh07drg8Ry/PDfXhw4fZv39/qT87ZrMZAH9//xKPWZbP4wWPPPIIvXv3Zvjw4dx///2liF6I4kmiL8F/vyyOHj3KwIEDad68Oa+88gr16tXDaDTy008/MW/evCIbu1U3h8PB5ZdfzrRp04pcf+HG4ILExEQMBgOxsbElHhNgypQpDBkypMht/pt0zpw5U+y2Fx9z7ty5tG/fvsht/vsF+vXXX7sk4EOHDhWqhbnY9OnTGTJkCAMGDCjUCPDC+T/66COio6ML7avXV/5Ho6R4yqO0yWzw4MFMnToVgFOnTjFnzhz69+/Pli1bXErERblwk1tce4X/Wrp0KVFRUeTl5bF69Wruu+8+vL29i2xMVtH3Y/369Vx99dX06dOHt99+m5iYGAwGAwsXLnRp9FpaDoeDNm3a8MorrxS5vl69ei6vz5w5g7+/P35+fiUesyyfR4BffvmF3377jU2bNpUheiGKJ4n+IocPH3Y2SIOCkqzD4XB+yX3//ffk5+ezfPly6tev79zuv1V6F7rE7Nmzp1ACrIgLJeuLv+APHToE/PtF3LhxY7Kyskps/HSxLVu20LFjxxKrBxs1agSAwWAo1XFPnTpFZmYmLVq0KHabC+9RYGBgqWPt06ePs9ETQHBwcLHbLlu2jE2bNhVbhXvh/JGRkaU+f0V
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"df['Fuel type'].value_counts().plot(kind='pie', autopct='%1.1f%%', legend=False, y='', title='распределение типов двигателей', \n",
" pctdistance=0.85, # Расположение процентов внутри круга\n",
" labeldistance=1.25) # Расположение меток снаружи круга\n",
"df['Fuel type'].value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"вывод: бензиновых двигателей больше чем всех остальных вместе взятых, а дизель и гибрид делят остальную половину почти пополам \n",
"З.Ы. \n",
"&nbsp;&nbsp;&nbsp;&nbsp;CNG(Природный газ) \n",
"&nbsp;&nbsp;&nbsp;&nbsp;LPG (Сжиженный нефтяной газ)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## график Распределения цен, так как среди записей есть машина за 1$ и 20'000'000$, то пришлось деласть фильтрацию для отсеивания ненормальных значений, а то на графике вообще ничего видно не было."
]
},
{
"cell_type": "code",
"execution_count": 145,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"минимальная цена:1\n",
"максимальная цена:26307500\n"
]
},
{
"data": {
"text/plain": [
"<Axes: title={'center': 'Распределение цен (в пределах нормы)'}, xlabel='Цена', ylabel='Количество'>"
]
},
"execution_count": 145,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2YAAAHWCAYAAAAcgJqiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABSYUlEQVR4nO3dfXzO9f////thbMYcm2FnDEPOT8oKUyHGaIXoLaWc5+Q9lRTyfhei95uoUEr17c060QmdUCmac2XIspwmNDndnG4zJzt9/v7ot+PjsBOzZq+x2/VyOS4Xx/P1PF6vx+t5vDbHfa/X63nYjDFGAAAAAADLlLG6AAAAAAAo7QhmAAAAAGAxghkAAAAAWIxgBgAAAAAWI5gBAAAAgMUIZgAAAABgMYIZAAAAAFiMYAYAAAAAFiOYAQAAAIDFCGYAAPwNKSkp8vHx0cKFC60uBXA4ffq0KlasqO+++87qUgAUEMEMQLGLjIyUzWZzPMqXL6/69etr1KhRSkhIsLo84JrMmTNHlSpVUt++fa0uBXCoUqWKhg4dqhdeeMHqUgAUEMEMgGWmTJmiDz/8UHPnzlXbtm01b948hYSE6MKFC1aXBhRIenq65syZo6FDh8rFxcXqcgAnI0aM0C+//KLVq1dbXQqAAiCYAbBMt27d9Oijj2ro0KGKjIzU6NGjFRcXp6VLl1pdGlAg3377rU6ePKk+ffpYXQqQQ6NGjdS0aVNFRkZaXQqAAiCYASgxOnbsKEmKi4uTJJ05c0bPPvusmjVrJg8PD9ntdnXr1k2//vprjtdeunRJkydPVv369VW+fHn5+/urV69eOnDggCTp4MGDTpdPXvno0KGDY11r166VzWbTZ599pn/961/y8/NTxYoV1b17dx0+fDjHtjdv3qyuXbvK09NTFSpUUPv27fXTTz/luo8dOnTIdfuTJ0/O0fejjz5ScHCw3N3d5e3trb59++a6/fz27XJZWVmaPXu2mjRpovLly8vX11fDhw/X2bNnnfrVrl1b9913X47tjBo1Ksc6c6t95syZOcZUklJTUzVp0iTVq1dPbm5uCgwM1Lhx45SamprrWF2uQ4cOatq0aY72V155RTabTQcPHnRqT0xM1OjRoxUYGCg3NzfVq1dPL7/8srKyshx9ssftlVdeybHepk2b5qg/N0uWLFHt2rVVt25dp/aBAwc6vQ+VK1dWhw4dtGHDhquu88rXXvlYu3ato2/2uMTExKht27Zyd3dXUFCQ3n777RzrvZbxz/4ZuNoxJUlHjx7V4MGD5evrKzc3NzVp0kTz58+/pn27/Bjavn27Bg4cqDp16qh8+fLy8/PT4MGDdfr0aUefixcvqmHDhmrYsKEuXrzoaD9z5oz8/f3Vtm1bZWZm5jnG2ZdTb9261an91KlTuR7T27ZtU7du3WS32+Xh4aFOnTpp06ZNua5z/fr1Gj58uKpUqSK73a7+/fvn+jNms9k0evToHLWFhYXJZrM5/Qxmvx+ff/55nvs0cOBA1a5dO0d7586d9c0338gYk+drAZQMZa0uAACyZYeoKlWqSJL++OMPLVmyRP/4xz8UFBSkhIQEvfPOO2rfvr12796tgIAASVJmZqbuu+8+rVq1Sn379tVTTz2lc+fOKSoqSjt37nT60Pzwww/r3nvvddruhAkTcq3nP//5j2w2m8aPH68TJ05o9uzZCg0NVWxsrNzd3SVJq1evVrdu3RQcHKxJkyapTJkyWrBggTp27KgNGzaoVatWOdZbo0YNTZs2TdJfE0eMHDky122/8MIL6tOnj4YOHaqTJ0/qjTfeULt27bRt2zZ5eXnleM2wYcN09913S5K+/PJLffXVV07Lhw8frsjISA0aNEhPPvmk4uLiNHfuXG3btk0//fSTypUrl+s4XIvExETHvl0uKytL3bt3148//qhhw4apUaNG2rFjh2bNmqXff/9dS5Ys+dvbznbhwgW1b99eR48e1fDhw1WzZk1t3LhREyZM0PHjxzV79uwi29bGjRvVsmXLXJdVrVpVs2bNkiQdOXJEc+bM0b333qvDhw/n+v5dzs3NTe+9955T288//6zXX389R9+zZ8/q3nvvVZ8+ffTwww9r0aJFGjlypFxdXTV48GBJhR//J598UnfccYck6YMPPlBUVJTT8oSEBLVp00Y2m02jRo1StWrV9P3332vIkCFKTk7ONXhcPi6S9Nhjjzktj4qK0h9//KFBgwbJz89Pu3bt0rvvvqtdu3Zp06ZNstlscnd31/vvv68777xT//73v/Xaa69JkiIiIpSUlKTIyMgiu7R0165duvvuu2W32zVu3DiVK1dO77zzjjp06KB169apdevWTv1HjRolLy8vTZ48WXv37tW8efP0559/OsJVtvLly2vhwoWaOXOm42fvyJEjWrVqlcqXL18ktUtScHCwZs2apV27duX6xw0AJYgBgGK2YMECI8msXLnSnDx50hw+fNh8+umnpkqVKsbd3d0cOXLEGGPMpUuXTGZmptNr4+LijJubm5kyZYqjbf78+UaSee2113JsKysry/E6SWbmzJk5+jRp0sS0b9/e8XzNmjVGkqlevbpJTk52tC9atMhIMnPmzHGs+5ZbbjFhYWGO7RhjzIULF0xQUJDp3Llzjm21bdvWNG3a1PH85MmTRpKZNGmSo+3gwYPGxcXF/Oc//3F67Y4dO0zZsmVztO/bt89IMu+//76jbdKkSebyX/EbNmwwkszChQudXrt8+fIc7bVq1TLh4eE5ao+IiDBX/rdxZe3jxo0zPj4+Jjg42GlMP/zwQ1OmTBmzYcMGp9e//fbbRpL56aefcmzvcu3btzdNmjTJ0T5z5kwjycTFxTnapk6daipWrGh+//13p77PPfeccXFxMYcOHTLGXNsxkZv09HRjs9nMM888k2PZgAEDTK1atZza3n33XSPJbNmyJd/1DhgwwFSsWDFH++LFi40ks2bNGkdb+/btjSTz6quvOtpSU1PNrbfeanx8fExaWpox5trH/4cffjCSzOeff+5oy+39HzJkiPH39zenTp1yau/bt6/x9PQ0Fy5ccGrv16+fCQoKcmq78hi68jXGGPPJJ58YSWb9+vVO7RMmTDBlypQx69evd4zP7Nmzc7z+Stm/g37++Wen9tx+Hnv27GlcXV3NgQMHHG3Hjh0zlSpVMu3atcuxzuDgYMe4G2PMjBkzjCSzdOlSR1utWrVM586dTdWqVZ3GeOrUqaZt27Y5fgazfyctXrw4z33K7ZgzxpiNGzcaSeazzz7Lf1AAWI5LGQFYJjQ0VNWqVVNgYKD69u0rDw8PffXVV6pevbqkv84alCnz16+pzMxMnT59Wh4eHmrQoIF++eUXx3q++OILVa1aVU888USObeR26VVB9e/fX5UqVXI8f/DBB+Xv7++Yfjo2Nlb79u3TI488otOnT+vUqVM6deqUzp8/r06dOmn9+vVOl85Jf11yebW/hn/55ZfKyspSnz59HOs8deqU/Pz8dMstt2jNmjVO/dPS0iT9NV55Wbx4sTw9PdW5c2endQYHB8vDwyPHOtPT0536nTp1SpcuXcq37qNHj+qNN97QCy+8IA8Pjxzbb9SokRo2bOi0zuzLV6/c/t+xePFi3X333apcubLTtkJDQ5WZman169c79b9w4UKOfc3vMrhsZ86ckTFGlStXznV5VlaWY32xsbH64IMP5O/vr0aNGhXJfmYrW7ashg8f7nju6uqq4cOH68SJE4qJiZF07eOf/V7nd6waY/TFF1/o/vvvlzHGab1hYWFKSkpy+jmV/jpW8ztOJTnORmfXcerUKbVp00aScqxv8uTJatKkiQYMGKB//vOfat++vZ588sl813+5pKQkp7rPnDnjtDwzM1M//PCDevbsqTp16jja/f399cgjj+jHH39UcnKy02uGDRvmdPZ55MiRKlu2bI5p611dXdWvXz8tWLDA0ZZ9Rjsv586d06lTp5SYmFjgfcw+Pk+dOlXg1wCwBpcyArDMm2++qfr166ts2bLy9fVVgwYNHEFM+uuD7Zw5c/TWW28pLi7O6cNy9uWO0l+XQDZo0EBlyxb
"text/plain": [
"<Figure size 1000x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"print('минимальная цена:' + str(df['Price'].min()))\n",
"print('максимальная цена:' + str(df['Price'].max()))\n",
"mean_price = df['Price'].median()\n",
"std_price = df['Price'].std()\n",
"\n",
"# Фильтрация данных: оставляем только значения в пределах одного стандартного отклонения\n",
"df_filtered = df[(df['Price'] >= mean_price - std_price) & (df['Price'] <= mean_price + std_price)]\n",
"\n",
"# Построение гистограммы для отфильтрованных значений\n",
"df_filtered['Price'].plot(kind='hist', bins=100, figsize=(10, 5), title='Распределение цен (в пределах нормы)', xlabel='Цена', ylabel = 'Количество')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"вывод: большинство машин было проданно менее 25000$"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## график зависимости цены от пробега для первых 100 элементов"
]
},
{
"cell_type": "code",
"execution_count": 146,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: title={'center': 'Зависимость цены от пробега'}, xlabel='Пробег', ylabel='Цена'>"
]
},
"execution_count": 146,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABOcAAAHWCAYAAAA8fO0eAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABlCklEQVR4nO3deVzVVeL/8feVTVHBhUUpFRTSLEvKVCKxhYnKmrGslJxxydZxyWxRp1JrpmzapqTSaSrNb0XllC1almMqReaWplYSBmhToSABAiUK5/dHP+5wZfGCFz53eT0fDx6POOfczz33cz/3k/fNWWzGGCMAAAAAAAAAra6N1R0AAAAAAAAAfBXhHAAAAAAAAGARwjkAAAAAAADAIoRzAAAAAAAAgEUI5wAAAAAAAACLEM4BAAAAAAAAFiGcAwAAAAAAACxCOAcAAAAAAABYxN/qDgAAAABoGZWVlSoqKlJ1dbWioqKs7g4AAKgHI+cAAAAAL7JlyxZdd911CgsLU1BQkLp3765Ro0ZZ3S0AANAARs4BAACXWLRokZYvX67t27erqKhIXbt2Vd++fTVp0iT98Y9/VJs2/E0QaGnvvPOORo8erX79+unBBx9Unz59JEkREREW9wwAADTEZowxVncCAAB4voSEBHXv3l0XXnihQkJCVFxcrM8//1yvvfaaRo8erfT0dKu7CHi1oqIi9e3bV+eee66WLVumwMBAq7sEAACcQDgHAABc4siRIwoICKhTPnXqVD399NPKzc1VdHR063cM8BGPP/645s2bp3379qlz585WdwcAADiJ+SUAAMAl6gvmJNkDudrTWt955x2NGDFCUVFRCgoKUp8+ffTXv/5VVVVVDo89//zzZbPZ7D9hYWEaMWKEdu3a5dDOZrNp3rx5DmWPPvqobDabzj//fIfyX3/9VfPmzdMpp5yitm3bqnv37rrqqqv03XffSZLy8vJks9m0ZMkSh8dNnjxZNptNEyZMsJctWbJENptNgYGBKigocGi/YcMGe7+3bNniULds2TKdffbZateuncLCwvTHP/5RP/zwQ51zt3v3bl177bUKDw9Xu3bt1LdvX91zzz2SpHnz5jmcm/p+1q1bZz+Pp59+ep3jO6Ohxz722GOy2WzKy8tzKC8uLtb06dPVo0cPBQUFKTY2Vn//+99VXV1tb1Nzjh977LE6xz399NMd3rN169bJZrPp3//+d4N9nDBhgtPB77PPPqvTTjtNQUFBioqK0uTJk1VcXOzweo93XhtzvMfXPl/R0dG6/PLL9dFHH2ngwIFq27at+vfvr7feeqvOcXNycnTNNdeoS5cuCg4O1tChQ7Vy5UqHNp9//rkGDhyohx56yH7+4+Li9PDDDzuc/xovv/yy/Trs0qWLxowZo++//96p15OcnGxvs3jxYl144YWKiIhQUFCQ+vfvr4ULFzZ6ngAAwP+w5hwAAHCp4uJiHT16VIcOHdLWrVv12GOPacyYMerZs6e9zZIlS9ShQwfNmDFDHTp00Mcff6w5c+aotLRUjz76qMPx+vXrp3vuuUfGGH333Xd64okndNlll2nfvn2N9mH+/Pl1yquqqnT55ZdrzZo1GjNmjG677TYdOnRIq1ev1q5du+zrcx1rz549+te//tXg8/n5+enll1/W7bffbi9bvHix2rZtq19//dWh7ZIlSzRx4kSdc845mj9/vvbv36+nnnpKmZmZ2rZtmzp16iRJ2rFjh4YNG6aAgADddNNNio6O1nfffaf33ntPDz74oK666irFxsbaj3v77bfr1FNP1U033WQvO/XUUxvsc0uoqKjQ8OHD9cMPP+jmm29Wz5499dlnn2n27Nn66aef9OSTT7Zqf441b9483X///UpOTtatt96qrKwsLVy4UJs3b1ZmZqYCAgJ0zz336IYbbpAkFRYW6vbbb9dNN92kYcOGOf08J598cp3r7/333693and2drZGjx6tW265RePHj9fixYt1zTXXaNWqVfrd734nSdq/f7/OPfdcVVRUaNq0aeratateeukl/f73v9e///1vXXnllZKkgwcP6tNPP9Wnn36q66+/XmeffbbWrFmj2bNnKy8vT4sWLbI/74MPPqj77rtP1157rW644QYVFBQoLS1NSUlJDtdhQ6+ne/fu9v9euHChTjvtNP3+97+Xv7+/3nvvPf35z39WdXW1Jk+e7PR5AwDAZxkAAAAX6tu3r5Fk/xk3bpw5cuSIQ5uKioo6j7v55ptNcHCw+fXXX+1lw4cPN8OHD3do95e//MVIMgcOHLCXSTJz5861/3733XebiIgIc/bZZzs8/sUXXzSSzBNPPFHn+aurq40xxuTm5hpJZvHixfa6a6+91px++ummR48eZvz48fbyxYsXG0kmNTXVDBgwwF5eXl5uQkJCzHXXXWckmc2bNxtjjKmsrDQRERHm9NNPN7/88ou9/YoVK4wkM2fOHHtZUlKS6dixo9m7d2+9/TxWr169HPpW2/Dhw81pp51Wb93xNPTYRx991Egyubm59rK//vWvpn379ubbb791aDtr1izj5+dn9u3bZ4z53zl+9NFH6xz3tNNOc3jP1q5daySZZcuWNdjH8ePHm169ejX6Og4cOGACAwPNxRdfbKqqquzlTz/9tJFkXnzxxTqPqe9aOJ6mnK9evXoZSebNN9+0l5WUlJju3bub+Ph4e9n06dONJPPJJ5/Yyw4dOmRiYmJMdHS0/fUMHz7cSDLz5s1zeO4JEyYYSWbnzp3GGGPy8vKMn5+fefDBBx3a7dy50/j7+zuUO3Pt1Pd5TklJMb179270cQAA4DdMawUAAC61ePFirV69Wq+88oomTZqkV155xWE0lyS1a9fO/t+HDh1SYWGhhg0bpoqKCu3evduh7ZEjR1RYWKiCggJt2LBBy5cv1xlnnKGwsLB6n/+HH35QWlqa7rvvPnXo0MGh7s0331RYWJimTp1a53ENTVfcunWrli1bpvnz5ze44+yf/vQn7d692z599c0331RoaKguuugih3ZbtmzRgQMH9Oc//1lt27a1l48YMUL9+vWzT1MsKChQRkaGrr/+eocRh43183iqqqpUWFiowsJCVVZWNusYx7Ns2TINGzZMnTt3tj9XYWGhkpOTVVVVpYyMDIf2FRUVDu0KCwvrTG2uUXOd1J6C2hT/+c9/VFlZqenTpzu8jzfeeKNCQkLqTBFtLVFRUfaRb5IUEhKicePGadu2bcrPz5f026i7wYMH67zzzrO369Chg2666Sbl5eXp66+/tpf7+fk5jOCUpDvuuEOS7K/xrbfeUnV1ta699lqHc9+tWzfFxcVp7dq1TXoNtT/PJSUlKiws1PDhw5WTk6OSkpImHQsAAF9EOAcAAFwqISFBycnJuu666/T888/rgQce0OLFi5WZmWlv89VXX+nKK69UaGioQkJCFB4erj/+8Y+SVOfL/Geffabw8HBFRETo3HPP1dGjR7Vs2bIGQ6q5c+cqKipKN998c5267777Tn379pW/v/Mre8yaNUvDhg3T5Zdf3mCb8PBwjRgxQi+++KIk6cUXX9T48ePrhHl79+6VJPXt27fOMfr162evz8nJkaRmrxNXn927dys8PNxh/bpXX33VZceXfpuiuWrVKvvz1PzUrE924MABh/Zz586t0/bYcLbG9ddfr/DwcHXu3FkdO3bUddddp/379zvdt4bOfWBgoHr37m2vb22xsbF1ruVTTjlFkuzr0+3du7fea6Zm2nJN3202m6KiohQSEuLQrm/fvmrTpo39eNnZ2TLGKC4urs75/+abb+q8T8eTmZmp5ORktW/fXp06dVJ4eLj+8pe/SKr7eQYAAHWx5hwAAGhRV199te655x5t3LhRiYmJKi4u1vDhwxUSEqIHHnhAffr0Udu2bfXFF19o5syZdRauP+OMM/T4449L+m1E2YIFC3T++efriy++ULdu3RzafvPNN1qyZIlefvnlBjeoaIqPPvpI//nPf7Rhw4bjtr3++us1btw4TZ06VRkZGXr++ef1ySefnHAfXCU6Otq+bt7Bgwe1YMEC/elPf1Lv3r01dOhQlzxHdXW
"text/plain": [
"<Figure size 1500x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df[0:100].plot.scatter(x=\"Mileage\", y=\"Price\", title ='Зависимость цены от пробега', xlabel = 'Пробег', ylabel = 'Цена', figsize=(15, 5),)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"вывод: \n",
"1. даже уже в первых 100 есть аномалии, где пробег почти 1М \n",
"2. чем больше пробег тем сложнее продать машину за дорого"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## повторим этот график только отфильтруем все значения вне нормы, и не только первые 100"
]
},
{
"cell_type": "code",
"execution_count": 147,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"минимальный пробег:0.0\n",
"максимальный пробег:2147483647.0\n"
]
},
{
"data": {
"text/plain": [
"<Axes: title={'center': 'Зависимость цены от пробега(в пределах нормы)'}, xlabel='Пробег', ylabel='Цена'>"
]
},
"execution_count": 147,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABPAAAAHWCAYAAAD5DnePAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeVxU9foH8M+A7MqALCqKgkKaW6K5K2qZZFbaLi2YUVqZZmapaWq22M1sU8vbL8Uos64ttlhezVBSSdy4ihaiLJqGDAiMgMp2fn/QTHNmzsycGWaFz/v14nXjnO+c851zDlzn4Xm+j0IQBAFERERERERERETkkjycPQEiIiIiIiIiIiIyjgE8IiIiIiIiIiIiF8YAHhERERERERERkQtjAI+IiIiIiIiIiMiFMYBHRERERERERETkwhjAIyIiIiIiIiIicmEM4BEREREREREREbkwBvCIiIiIiIiIiIhcWCtnT4CIiIiIyJyamhpcvHgRDQ0NiIiIcPZ0iIiIiByKGXhERERE5JIOHjyI+++/H6GhofDx8UGHDh1w1113OXtaAICGhgb07t0br776qrOnQqRVW1uLyMhIvP/++86eChER2RgDeERERHrWrl2LhIQEtGvXDl5eXmjfvj1GjRqF1NRUNDQ0OHt6RC3Ct99+ixEjRuDEiRN49dVXsWPHDuzYsQP//ve/nT01AMCmTZtw9uxZPPXUU86eCpGWl5cX5syZg1dffRVXrlxx9nSIiMiGFIIgCM6eBBERkSsZOnQoOnTogBtuuAGBgYEoLy/Hb7/9hs8//xz33XcfNm3a5OwpEjVrFy9eRPfu3TFs2DBs3rwZ3t7ezp6SgX79+mHw4MEuE1Ak0igvL0e7du3wwQcf4JFHHnH2dIiIyEYYwCMiItJTW1sLLy8vg+0zZ87E6tWrkZ+fj6ioKMdPjKiFWLlyJZYuXYozZ84gODjY2dMxcOTIEfTv3x8///wzbrzxRmdPh8jAbbfdhoqKCqSnpzt7KkREZCMsoSUiItIjFbwDoA3aeXj883+f3377LSZMmICIiAj4+PigW7duePnll1FfXy967ejRo6FQKLRfoaGhmDBhArKzs0XjFAoFli5dKtq2YsUKKBQKjB49WrT9ypUrWLp0Ka655hr4+vqiQ4cOuPPOO3H69GkAQEFBARQKBTZs2CB63YwZM6BQKPDwww9rt23YsAEKhQLe3t5QqVSi8RkZGdp5Hzx4ULRv8+bNGDBgAPz8/BAaGooHH3wQ586dM7h2f/zxB+69916EhYXBz88P3bt3x8KFCwEAS5cuFV0bqa9du3Zpr2Pv3r0Nji+Hsde++eabUCgUKCgoEG0vLy/H7NmzERkZCR8fH8TExOBf//qXqIxac43ffPNNg+P27t1bdM927doFhUKBL7/80ugcH374YdnB4ffffx+9evWCj48PIiIiMGPGDJSXl4ver7nraoq51+ter6ioKNx6663Yvn07+vXrB19fX/Ts2RNff/21wXHz8vJwzz33oG3btvD398eQIUOwdetW0ZjffvsN/fr1w2uvvaa9/rGxsXj99dcly9g//fRT7XPYtm1bTJ48GWfPnpX1fsaOHasdk5KSghtuuAHh4eHw8fFBz5498cEHHxicb8uWLfD29kZ8fLxou/6z3KZNGwwaNAhbtmwxea2lXqv/pftz/PDDD6N169bIy8tDQkICAgICEBERgWXLlkH/b/MNDQ1455130KtXL/j6+qJdu3aYPn06ysrKDOageZ7N3W9A3s+Hufem+zuosLAQTz75JLp37w4/Pz+EhITgnnvuEZ1XEASMGTMGYWFhKC4u1m6vqalBnz590K1bN1RVVRm9xqZ+Blu3bi2aDyDvWdUc84svvsALL7yA9u3bIyAgALfffrvRZ3DSpEkG558+fToUCoXod5Sp3y8ammur76abbsKePXtw8eJFo68lIiL3wi60RERERpSXl6Ourg6XLl3CoUOH8Oabb2Ly5Mno3LmzdsyGDRvQunVrzJkzB61bt8Yvv/yCxYsXQ61WY8WKFaLj9ejRAwsXLoQgCDh9+jTeeust3HLLLThz5ozJOSxfvtxge319PW699Vbs3LkTkydPxtNPP41Lly5hx44dyM7ORrdu3SSPd+rUKfzf//2f0fN5enri008/xTPPPKPdlpKSAl9fX4P1lDZs2ICpU6di4MCBWL58OS5cuIB3330Xe/fuxZEjRxAUFAQAOHr0KEaOHAkvLy9MmzYNUVFROH36NL7//nu8+uqruPPOOxETE6M97jPPPINrr70W06ZN02679tprjc7ZHqqrqzFq1CicO3cO06dPR+fOnbFv3z4sWLAAf/31F9555x2Hzkff0qVL8dJLL2Hs2LF44oknkJOTgw8++AAHDhzA3r174eXlhYULF+LRRx8FAJSUlOCZZ57BtGnTMHLkSNnn6dSpk8Hz9+OPP0qWkefm5uK+++7D448/jilTpiAlJQX33HMPtm3bhptuugkAcOHCBQwbNgzV1dWYNWsWQkJC8PHHH+P222/Hl19+iTvuuAMAUFpaij179mDPnj145JFHMGDAAOzcuRMLFixAQUEB1q5dqz3vq6++ihdffBH33nsvHn30UahUKqxatQrx8fGi59DY++nQoYP2vz/44AP06tULt99+O1q1aoXvv/8eTz75JBoaGjBjxgztuH379qF3795Gg/2ffPIJgMbr/v777+Oee+5BdnY2unfvbvaaf/DBB2jdurX2+/z8fCxevNhgXH19PW6++WYMGTIEb7zxBrZt24YlS5agrq4Oy5Yt046bPn269md11qxZyM/Px+rVq3HkyBHts6IvMTERt9xyCwDp+23tz4fmugAQ/Y4BgAMHDmDfvn2YPHkyOnXqhIKCAnzwwQcYPXo0Tpw4AX9/fygUCqxfvx59+/bF448/rg0QL1myBMePH8euXbsQEBBg5grLI/dZ1Xj11VehUCgwb948FBcX45133sHYsWORlZUFPz8/7ThfX19s3boVxcXFCA8PBwBcvnwZX3zxBXx9fW0ydwAYMGAABEHAvn37cOutt9rsuERE5EQCERERSerevbsAQPuVlJQk1NbWisZUV1cbvG769OmCv7+/cOXKFe22UaNGCaNGjRKNe+GFFwQAQnFxsXYbAGHJkiXa759//nkhPDxcGDBggOj169evFwAIb731lsH5GxoaBEEQhPz8fAGAkJKSot137733Cr179xYiIyOFKVOmaLenpKQIAITExEShT58+2u1VVVVCYGCgcP/99wsAhAMHDgiCIAg1NTVCeHi40Lt3b+Hy5cva8T/88IMAQFi8eLF2W3x8vNCmTRuhsLBQcp76unTpIpqbrlGjRgm9evWS3GeOsdeuWLFCACDk5+drt7388stCQECAcPLkSdHY+fPnC56ensKZM2cEQfjnGq9YscLguL169RLds7S0NAGAsHnzZqNznDJlitClSxeT76O4uFjw9vYWxo0bJ9TX12u3r169WgAgrF+/3uA1Us+COZZcry5duggAhK+++kq7raKiQujQoYMQFxen3TZ79mwBgPDrr79qt126dEmIjo4WoqKitO9n1KhRAgBh6dKlonM//PDDAgDh2LFjgiAIQkFBgeDp6Sm8+uqronHHjh0TWrVqJdou59mR+nlOSEgQunbtKtrWqVMn4a677jIYu2TJEkH/n9fbt28XAAj/+c9/TJ5b81qVSiXafuDAAYN7N2XKFAGAMHPmTO22hoYGYcKECYK3t7f2GL/++qsAQNi4caPomNu2bZPcfvLkSQGA8Oabb2q3NeXnQ2PhwoWCQqEQbdP/OZe69hkZGQIAITU1VbT93//+twBA+PTTT4XffvtN8PT0FGbPnm3wen2mfgYDAgJE85H7rGqO2bFjR0GtVmvH/uc//xEACO+++652m+YZ7Nu3r+gaf/LJJ0KnTp2EkSNHip5RU79fNKSeOUEQhPPnzwsAhH/9619mrgoREbkLltASEREZkZKSgh07dmDjxo1ITk7Gxo0bRVlhAESZFZcuXUJJSQlGjhyJ6upq/PHHH6KxtbW1KCkpgUqlQkZ
"text/plain": [
"<Figure size 1500x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"print('минимальный пробег:' + str(df['Mileage'].min()))\n",
"print('максимальный пробег:' + str(df['Mileage'].max()))\n",
"mean_mileage = df_filtered['Mileage'].median()\n",
"std_mileage = df_filtered['Mileage'].std()/100\n",
"\n",
"# Фильтрация данных: оставляем только значения в пределах одного стандартного отклонения\n",
"df_double_filtered = df_filtered[(df_filtered['Mileage'] >= mean_mileage - std_mileage) & (df_filtered['Mileage'] <= mean_mileage + std_mileage)]\n",
"\n",
"df_double_filtered.plot.scatter(x=\"Mileage\", y=\"Price\", title ='Зависимость цены от пробега(в пределах нормы)', xlabel = 'Пробег', ylabel = 'Цена', figsize=(15, 5),)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"так то приятнее глазу. И нагляднее, что при пробеге за 300к машину выше 25к сложно продать"
2024-09-20 20:12:39 +04:00
]
}
],
"metadata": {
"kernelspec": {
"display_name": "laba",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}