теперь точно все

2024-10-18 18:46:34 +04:00 · 2024-10-18 18:46:34 +04:00 · ed7c7f7298
commit ed7c7f7298
parent d19b7e0793
2 changed files with 39 additions and 26 deletions
--- a/lab_2/lab_2.ipynb
+++ b/lab_2/lab_2.ipynb
@ -10,7 +10,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 110,
+   "execution_count": 124,
   "metadata": {},
   "outputs": [
    {
@ -136,7 +136,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 111,
+   "execution_count": 125,
   "metadata": {},
   "outputs": [
    {
@ -215,7 +215,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 112,
+   "execution_count": 126,
   "metadata": {},
   "outputs": [
    {
@ -262,7 +262,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 113,
+   "execution_count": 127,
   "metadata": {},
   "outputs": [
    {
@ -471,7 +471,7 @@
       "531            8     DDR4               0.0       other  "
      ]
     },
-     "execution_count": 113,
+     "execution_count": 127,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -492,15 +492,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 114,
+   "execution_count": 128,
   "metadata": {},
   "outputs": [],
   "source": [
    "#У первого дата сета добавим новый столбец с рейтингом от 1 до 5 на основе столбца от 1 до 100.\n",
+    "\n",
    "df['new_rating'] = pd.cut(df['rating'], bins=[0,20,40,60,80,100], labels=[1,2,3,4,5], include_lowest=True)\n",
+    "\n",
    "#У второго добавим столбец с наибольшей ценой от 1 до 10, на основе столбца от 1 до 127.\n",
+    "\n",
    "df2['new_high'] = pd.cut(df2['High'], bins=[0,13,26,39,52,65,78,91,104,117,130], labels=[1,2,3,4,5,6,7,8,9,10], include_lowest=True)\n",
+    "\n",
    "#У третьего удалим слишком большие значения обслуживания и слишком маленькие и добавим новый столбец с категориями цен от 1 до 5.\n",
+    "\n",
    "df3_filtered = df3[df3['Price'] >= 10000]\n",
    "df3_filtered = df3_filtered[df3_filtered['Price'] <= 100000]\n",
    "df3_filtered['new_price'] = pd.cut(df3_filtered['Price'], bins=[10000,28000,46000,64000,82000,100000], labels=[1,2,3,4,5], include_lowest=True)"
@ -508,7 +513,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 115,
+   "execution_count": 129,
   "metadata": {},
   "outputs": [],
   "source": [
@ -565,7 +570,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 116,
+   "execution_count": 130,
   "metadata": {},
   "outputs": [
    {
@ -606,7 +611,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 121,
+   "execution_count": 131,
   "metadata": {},
   "outputs": [
    {
@ -621,11 +626,11 @@
      "1      0\n",
      "2      0\n",
      "Name: count, dtype: int64\n",
-      "Выборка после oversampling:  (748, 6)\n",
+      "Выборка после oversampling:  (750, 6)\n",
      "new_rating\n",
-      "5    252\n",
+      "5    251\n",
+      "3    250\n",
      "4    249\n",
-      "3    247\n",
      "1      0\n",
      "2      0\n",
      "Name: count, dtype: int64\n",
@ -666,14 +671,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 122,
+   "execution_count": 132,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Выборка до oversampling: (4821, 7)\n",
+      "Выборка до oversampling и undersampling: (4821, 7)\n",
      "new_high\n",
      "1     2326\n",
      "2      704\n",
@ -686,18 +691,18 @@
      "6      146\n",
      "10      31\n",
      "Name: count, dtype: int64\n",
-      "Выборка после oversampling:  (22990, 7)\n",
+      "Выборка после oversampling:  (23144, 7)\n",
      "new_high\n",
-      "6     2375\n",
-      "2     2353\n",
-      "10    2327\n",
+      "8     2374\n",
+      "6     2368\n",
+      "2     2351\n",
+      "4     2335\n",
      "1     2326\n",
-      "9     2306\n",
-      "8     2296\n",
-      "4     2293\n",
-      "3     2265\n",
-      "7     2254\n",
-      "5     2195\n",
+      "9     2317\n",
+      "10    2312\n",
+      "5     2256\n",
+      "7     2256\n",
+      "3     2249\n",
      "Name: count, dtype: int64\n",
      "Выборка после undersampling:  (310, 7)\n",
      "new_high\n",
@ -735,14 +740,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 120,
+   "execution_count": 133,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Выборка до oversampling: (6931, 5)\n",
+      "Выборка до oversampling и undersampling: (6931, 5)\n",
      "new_price\n",
      "1    5008\n",
      "2    1281\n",
@ -757,6 +762,14 @@
      "3    5008\n",
      "4    5008\n",
      "5    5008\n",
+      "Name: count, dtype: int64\n",
+      "Выборка после undersampling:  (285, 5)\n",
+      "new_price\n",
+      "1    57\n",
+      "2    57\n",
+      "3    57\n",
+      "4    57\n",
+      "5    57\n",
      "Name: count, dtype: int64\n"
     ]
    }
--- a/lab_2/requirements.txt
+++ b/lab_2/requirements.txt