From ed7c7f729887f2d8c65f2ee67a43f3fea07eca82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=AF=D0=BA=D0=BE?=
 =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=B2?= <shotboll16@gmail.com>
Date: Fri, 18 Oct 2024 18:46:34 +0400
Subject: [PATCH] =?UTF-8?q?=D1=82=D0=B5=D0=BF=D0=B5=D1=80=D1=8C=20=D1=82?=
 =?UTF-8?q?=D0=BE=D1=87=D0=BD=D0=BE=20=D0=B2=D1=81=D0=B5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 lab_2/lab_2.ipynb      |  65 ++++++++++++++++++++++++-----------------
 lab_2/requirements.txt | Bin 0 -> 1620 bytes
 2 files changed, 39 insertions(+), 26 deletions(-)
 create mode 100644 lab_2/requirements.txt

diff --git a/lab_2/lab_2.ipynb b/lab_2/lab_2.ipynb
index 11cab85..dc64ebb 100644
--- a/lab_2/lab_2.ipynb
+++ b/lab_2/lab_2.ipynb
@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 110,
+   "execution_count": 124,
    "metadata": {},
    "outputs": [
     {
@@ -136,7 +136,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 111,
+   "execution_count": 125,
    "metadata": {},
    "outputs": [
     {
@@ -215,7 +215,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 112,
+   "execution_count": 126,
    "metadata": {},
    "outputs": [
     {
@@ -262,7 +262,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 113,
+   "execution_count": 127,
    "metadata": {},
    "outputs": [
     {
@@ -471,7 +471,7 @@
        "531            8     DDR4               0.0       other  "
       ]
      },
-     "execution_count": 113,
+     "execution_count": 127,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -492,15 +492,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 114,
+   "execution_count": 128,
    "metadata": {},
    "outputs": [],
    "source": [
     "#У первого дата сета добавим новый столбец с рейтингом от 1 до 5 на основе столбца от 1 до 100.\n",
+    "\n",
     "df['new_rating'] = pd.cut(df['rating'], bins=[0,20,40,60,80,100], labels=[1,2,3,4,5], include_lowest=True)\n",
+    "\n",
     "#У второго добавим столбец с наибольшей ценой от 1 до 10, на основе столбца от 1 до 127.\n",
+    "\n",
     "df2['new_high'] = pd.cut(df2['High'], bins=[0,13,26,39,52,65,78,91,104,117,130], labels=[1,2,3,4,5,6,7,8,9,10], include_lowest=True)\n",
+    "\n",
     "#У третьего удалим слишком большие значения обслуживания и слишком маленькие и добавим новый столбец с категориями цен от 1 до 5.\n",
+    "\n",
     "df3_filtered = df3[df3['Price'] >= 10000]\n",
     "df3_filtered = df3_filtered[df3_filtered['Price'] <= 100000]\n",
     "df3_filtered['new_price'] = pd.cut(df3_filtered['Price'], bins=[10000,28000,46000,64000,82000,100000], labels=[1,2,3,4,5], include_lowest=True)"
@@ -508,7 +513,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 115,
+   "execution_count": 129,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -565,7 +570,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 116,
+   "execution_count": 130,
    "metadata": {},
    "outputs": [
     {
@@ -606,7 +611,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 121,
+   "execution_count": 131,
    "metadata": {},
    "outputs": [
     {
@@ -621,11 +626,11 @@
       "1      0\n",
       "2      0\n",
       "Name: count, dtype: int64\n",
-      "Выборка после oversampling:  (748, 6)\n",
+      "Выборка после oversampling:  (750, 6)\n",
       "new_rating\n",
-      "5    252\n",
+      "5    251\n",
+      "3    250\n",
       "4    249\n",
-      "3    247\n",
       "1      0\n",
       "2      0\n",
       "Name: count, dtype: int64\n",
@@ -666,14 +671,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 122,
+   "execution_count": 132,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Выборка до oversampling: (4821, 7)\n",
+      "Выборка до oversampling и undersampling: (4821, 7)\n",
       "new_high\n",
       "1     2326\n",
       "2      704\n",
@@ -686,18 +691,18 @@
       "6      146\n",
       "10      31\n",
       "Name: count, dtype: int64\n",
-      "Выборка после oversampling:  (22990, 7)\n",
+      "Выборка после oversampling:  (23144, 7)\n",
       "new_high\n",
-      "6     2375\n",
-      "2     2353\n",
-      "10    2327\n",
+      "8     2374\n",
+      "6     2368\n",
+      "2     2351\n",
+      "4     2335\n",
       "1     2326\n",
-      "9     2306\n",
-      "8     2296\n",
-      "4     2293\n",
-      "3     2265\n",
-      "7     2254\n",
-      "5     2195\n",
+      "9     2317\n",
+      "10    2312\n",
+      "5     2256\n",
+      "7     2256\n",
+      "3     2249\n",
       "Name: count, dtype: int64\n",
       "Выборка после undersampling:  (310, 7)\n",
       "new_high\n",
@@ -735,14 +740,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 120,
+   "execution_count": 133,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Выборка до oversampling: (6931, 5)\n",
+      "Выборка до oversampling и undersampling: (6931, 5)\n",
       "new_price\n",
       "1    5008\n",
       "2    1281\n",
@@ -757,6 +762,14 @@
       "3    5008\n",
       "4    5008\n",
       "5    5008\n",
+      "Name: count, dtype: int64\n",
+      "Выборка после undersampling:  (285, 5)\n",
+      "new_price\n",
+      "1    57\n",
+      "2    57\n",
+      "3    57\n",
+      "4    57\n",
+      "5    57\n",
       "Name: count, dtype: int64\n"
      ]
     }
diff --git a/lab_2/requirements.txt b/lab_2/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f17ad4b524faac00942277a654e70a3dbf2196e6
GIT binary patch
literal 1620
zcmZvcQF7Wq5JdMoRXGYNOBg%($Q^P6mjDF|V_5-Vj1Qlr-?SD9Cq;>}*_rO1o?iX=
zU8O_n^?6B8sZOo_`>s!!zNA|{Mf#jRrIk*1diJSG&w8l187l5B70)`$C-j$>lQ!^=
z>hIDyY6_jt_1p|KXL0PrzrP?9p~3uXoh;KaZLa#is(%svoTCwaOk$LU&fzgeo!+AA
zsES5&Zh{G&)L6$)^`aFFCbk#&un4tAYBc9kOglZTc<Z!Ivru1&x&1J~Z76gP6JFPt
zw~4v%pZ;1{_;@GYGJOknw&1EhJ%&muaG*^0`Zmrsv@{b1bX)aST}vT1`lgqunao?-
z%3XHThlYpPMx(!RBQseKO8%|J^ExVV9Yw}{65mPBA^dy|^eOv&xI?CT@sqKBqLG9!
zfwBHqH4}Cl9ry+^xnwpY^DFtDiQ%C6*jFQ#tbv*AT`LEkFZs&3uy>uhSGpgSQZvoY
zctfaYl>=D7cK)++PWRzKR><a@j82HVnEW)T$~Vipa0PeB=3WZdss{@s`0>d8bo%`z
zX6Vy{I-FrP;2rwD!E0|iq4+k8d4oIia$jp9)#+6VT*EoD&0}vr<mg6>&dE4=5y{Lr
zXSwS6NAJ+$1d=Bw&$*t3i{x;s4Nj5(f2YaV>4@K__wb|CZ)LcW_jZo-wbENg#atDB
zW5M|@?Z1Xjr#nx7h@9BFk9Qm0q};B%611GDVxr(6_unQbW8Rp+smc?N?^UYoFt?p8
zRC0y6c`L2d>22U5O6-K+d~MV^;oyuJ?S)iD8h8)&Lx%j8bduNwUXo+fImy?ap2W|M
OUaR6UDsm=@LGxdg`s~*L

literal 0
HcmV?d00001