From ed7c7f729887f2d8c65f2ee67a43f3fea07eca82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=AF=D0=BA=D0=BE?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=B2?= Date: Fri, 18 Oct 2024 18:46:34 +0400 Subject: [PATCH] =?UTF-8?q?=D1=82=D0=B5=D0=BF=D0=B5=D1=80=D1=8C=20=D1=82?= =?UTF-8?q?=D0=BE=D1=87=D0=BD=D0=BE=20=D0=B2=D1=81=D0=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lab_2/lab_2.ipynb | 65 ++++++++++++++++++++++++----------------- lab_2/requirements.txt | Bin 0 -> 1620 bytes 2 files changed, 39 insertions(+), 26 deletions(-) create mode 100644 lab_2/requirements.txt diff --git a/lab_2/lab_2.ipynb b/lab_2/lab_2.ipynb index 11cab85..dc64ebb 100644 --- a/lab_2/lab_2.ipynb +++ b/lab_2/lab_2.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 124, "metadata": {}, "outputs": [ { @@ -136,7 +136,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 125, "metadata": {}, "outputs": [ { @@ -215,7 +215,7 @@ }, { "cell_type": "code", - "execution_count": 112, + "execution_count": 126, "metadata": {}, "outputs": [ { @@ -262,7 +262,7 @@ }, { "cell_type": "code", - "execution_count": 113, + "execution_count": 127, "metadata": {}, "outputs": [ { @@ -471,7 +471,7 @@ "531 8 DDR4 0.0 other " ] }, - "execution_count": 113, + "execution_count": 127, "metadata": {}, "output_type": "execute_result" } @@ -492,15 +492,20 @@ }, { "cell_type": "code", - "execution_count": 114, + "execution_count": 128, "metadata": {}, "outputs": [], "source": [ "#У первого дата сета добавим новый столбец с рейтингом от 1 до 5 на основе столбца от 1 до 100.\n", + "\n", "df['new_rating'] = pd.cut(df['rating'], bins=[0,20,40,60,80,100], labels=[1,2,3,4,5], include_lowest=True)\n", + "\n", "#У второго добавим столбец с наибольшей ценой от 1 до 10, на основе столбца от 1 до 127.\n", + "\n", "df2['new_high'] = pd.cut(df2['High'], bins=[0,13,26,39,52,65,78,91,104,117,130], labels=[1,2,3,4,5,6,7,8,9,10], include_lowest=True)\n", + "\n", "#У третьего удалим слишком большие значения обслуживания и слишком маленькие и добавим новый столбец с категориями цен от 1 до 5.\n", + "\n", "df3_filtered = df3[df3['Price'] >= 10000]\n", "df3_filtered = df3_filtered[df3_filtered['Price'] <= 100000]\n", "df3_filtered['new_price'] = pd.cut(df3_filtered['Price'], bins=[10000,28000,46000,64000,82000,100000], labels=[1,2,3,4,5], include_lowest=True)" @@ -508,7 +513,7 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 129, "metadata": {}, "outputs": [], "source": [ @@ -565,7 +570,7 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 130, "metadata": {}, "outputs": [ { @@ -606,7 +611,7 @@ }, { "cell_type": "code", - "execution_count": 121, + "execution_count": 131, "metadata": {}, "outputs": [ { @@ -621,11 +626,11 @@ "1 0\n", "2 0\n", "Name: count, dtype: int64\n", - "Выборка после oversampling: (748, 6)\n", + "Выборка после oversampling: (750, 6)\n", "new_rating\n", - "5 252\n", + "5 251\n", + "3 250\n", "4 249\n", - "3 247\n", "1 0\n", "2 0\n", "Name: count, dtype: int64\n", @@ -666,14 +671,14 @@ }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 132, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Выборка до oversampling: (4821, 7)\n", + "Выборка до oversampling и undersampling: (4821, 7)\n", "new_high\n", "1 2326\n", "2 704\n", @@ -686,18 +691,18 @@ "6 146\n", "10 31\n", "Name: count, dtype: int64\n", - "Выборка после oversampling: (22990, 7)\n", + "Выборка после oversampling: (23144, 7)\n", "new_high\n", - "6 2375\n", - "2 2353\n", - "10 2327\n", + "8 2374\n", + "6 2368\n", + "2 2351\n", + "4 2335\n", "1 2326\n", - "9 2306\n", - "8 2296\n", - "4 2293\n", - "3 2265\n", - "7 2254\n", - "5 2195\n", + "9 2317\n", + "10 2312\n", + "5 2256\n", + "7 2256\n", + "3 2249\n", "Name: count, dtype: int64\n", "Выборка после undersampling: (310, 7)\n", "new_high\n", @@ -735,14 +740,14 @@ }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 133, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Выборка до oversampling: (6931, 5)\n", + "Выборка до oversampling и undersampling: (6931, 5)\n", "new_price\n", "1 5008\n", "2 1281\n", @@ -757,6 +762,14 @@ "3 5008\n", "4 5008\n", "5 5008\n", + "Name: count, dtype: int64\n", + "Выборка после undersampling: (285, 5)\n", + "new_price\n", + "1 57\n", + "2 57\n", + "3 57\n", + "4 57\n", + "5 57\n", "Name: count, dtype: int64\n" ] } diff --git a/lab_2/requirements.txt b/lab_2/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..f17ad4b524faac00942277a654e70a3dbf2196e6 GIT binary patch literal 1620 zcmZvcQF7Wq5JdMoRXGYNOBg%($Q^P6mjDF|V_5-Vj1Qlr-?SD9Cq;>}*_rO1o?iX= zU8O_n^?6B8sZOo_`>s!!zNA|{Mf#jRrIk*1diJSG&w8l187l5B70)`$C-j$>lQ!^= z>hIDyY6_jt_1p|KXL0PrzrP?9p~3uXoh;KaZLa#is(%svoTCwaOk$LU&fzgeo!+AA zsES5&Zh{G&)L6$)^`aFFCbk#&un4tAYBc9kOglZTcuhSGpgSQZvoY zctfaYl>=D7cK)++PWRzKR>d8bo%`z zX6Vy{I-FrP;2rwD!E0|iq4+k8d4oIia$jp9)#+6VT*EoD&0}vr&dE4=5y{Lr zXSwS6NAJ+$1d=Bw&$*t3i{x;s4Nj5(f2YaV>4@K__wb|CZ)LcW_jZo-wbENg#atDB zW5M|@?Z1Xjr#nx7h@9BFk9Qm0q};B%611GDVxr(6_unQbW8Rp+smc?N?^UYoFt?p8 zRC0y6c`L2d>22U5O6-K+d~MV^;oyuJ?S)iD8h8)&Lx%j8bduNwUXo+fImy?ap2W|M OUaR6UDsm=@LGxdg`s~*L literal 0 HcmV?d00001