теперь точно все

This commit is contained in:
Максим Яковлев 2024-10-18 18:46:34 +04:00
parent d19b7e0793
commit ed7c7f7298
2 changed files with 39 additions and 26 deletions

View File

@ -10,7 +10,7 @@
},
{
"cell_type": "code",
"execution_count": 110,
"execution_count": 124,
"metadata": {},
"outputs": [
{
@ -136,7 +136,7 @@
},
{
"cell_type": "code",
"execution_count": 111,
"execution_count": 125,
"metadata": {},
"outputs": [
{
@ -215,7 +215,7 @@
},
{
"cell_type": "code",
"execution_count": 112,
"execution_count": 126,
"metadata": {},
"outputs": [
{
@ -262,7 +262,7 @@
},
{
"cell_type": "code",
"execution_count": 113,
"execution_count": 127,
"metadata": {},
"outputs": [
{
@ -471,7 +471,7 @@
"531 8 DDR4 0.0 other "
]
},
"execution_count": 113,
"execution_count": 127,
"metadata": {},
"output_type": "execute_result"
}
@ -492,15 +492,20 @@
},
{
"cell_type": "code",
"execution_count": 114,
"execution_count": 128,
"metadata": {},
"outputs": [],
"source": [
"#У первого дата сета добавим новый столбец с рейтингом от 1 до 5 на основе столбца от 1 до 100.\n",
"\n",
"df['new_rating'] = pd.cut(df['rating'], bins=[0,20,40,60,80,100], labels=[1,2,3,4,5], include_lowest=True)\n",
"\n",
"#У второго добавим столбец с наибольшей ценой от 1 до 10, на основе столбца от 1 до 127.\n",
"\n",
"df2['new_high'] = pd.cut(df2['High'], bins=[0,13,26,39,52,65,78,91,104,117,130], labels=[1,2,3,4,5,6,7,8,9,10], include_lowest=True)\n",
"\n",
"#У третьего удалим слишком большие значения обслуживания и слишком маленькие и добавим новый столбец с категориями цен от 1 до 5.\n",
"\n",
"df3_filtered = df3[df3['Price'] >= 10000]\n",
"df3_filtered = df3_filtered[df3_filtered['Price'] <= 100000]\n",
"df3_filtered['new_price'] = pd.cut(df3_filtered['Price'], bins=[10000,28000,46000,64000,82000,100000], labels=[1,2,3,4,5], include_lowest=True)"
@ -508,7 +513,7 @@
},
{
"cell_type": "code",
"execution_count": 115,
"execution_count": 129,
"metadata": {},
"outputs": [],
"source": [
@ -565,7 +570,7 @@
},
{
"cell_type": "code",
"execution_count": 116,
"execution_count": 130,
"metadata": {},
"outputs": [
{
@ -606,7 +611,7 @@
},
{
"cell_type": "code",
"execution_count": 121,
"execution_count": 131,
"metadata": {},
"outputs": [
{
@ -621,11 +626,11 @@
"1 0\n",
"2 0\n",
"Name: count, dtype: int64\n",
"Выборка после oversampling: (748, 6)\n",
"Выборка после oversampling: (750, 6)\n",
"new_rating\n",
"5 252\n",
"5 251\n",
"3 250\n",
"4 249\n",
"3 247\n",
"1 0\n",
"2 0\n",
"Name: count, dtype: int64\n",
@ -666,14 +671,14 @@
},
{
"cell_type": "code",
"execution_count": 122,
"execution_count": 132,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Выборка до oversampling: (4821, 7)\n",
"Выборка до oversampling и undersampling: (4821, 7)\n",
"new_high\n",
"1 2326\n",
"2 704\n",
@ -686,18 +691,18 @@
"6 146\n",
"10 31\n",
"Name: count, dtype: int64\n",
"Выборка после oversampling: (22990, 7)\n",
"Выборка после oversampling: (23144, 7)\n",
"new_high\n",
"6 2375\n",
"2 2353\n",
"10 2327\n",
"8 2374\n",
"6 2368\n",
"2 2351\n",
"4 2335\n",
"1 2326\n",
"9 2306\n",
"8 2296\n",
"4 2293\n",
"3 2265\n",
"7 2254\n",
"5 2195\n",
"9 2317\n",
"10 2312\n",
"5 2256\n",
"7 2256\n",
"3 2249\n",
"Name: count, dtype: int64\n",
"Выборка после undersampling: (310, 7)\n",
"new_high\n",
@ -735,14 +740,14 @@
},
{
"cell_type": "code",
"execution_count": 120,
"execution_count": 133,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Выборка до oversampling: (6931, 5)\n",
"Выборка до oversampling и undersampling: (6931, 5)\n",
"new_price\n",
"1 5008\n",
"2 1281\n",
@ -757,6 +762,14 @@
"3 5008\n",
"4 5008\n",
"5 5008\n",
"Name: count, dtype: int64\n",
"Выборка после undersampling: (285, 5)\n",
"new_price\n",
"1 57\n",
"2 57\n",
"3 57\n",
"4 57\n",
"5 57\n",
"Name: count, dtype: int64\n"
]
}

BIN
lab_2/requirements.txt Normal file

Binary file not shown.