2388 lines
111 KiB
Plaintext
2388 lines
111 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Регрессия.\n",
|
||
"\n",
|
||
" - Прогнозирование вероятности IT-направления:\n",
|
||
" Цель: Используя такие параметры, как уровень образования, тип учебного заведения, финансовое положение, возраст и уровень гибкости, можно предсказать занятие IT-направления.\n",
|
||
"\n",
|
||
"Классификация.\n",
|
||
"\n",
|
||
" - Распределение студентов по типам учебных заведений\n",
|
||
" Цель: распределить студентов по различным типам учреждений (например, государственные/частные университеты), используя данные об их образовании, возрасте, месте проживания и финансовых возможностях.\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Education Level</th>\n",
|
||
" <th>Institution Type</th>\n",
|
||
" <th>Gender</th>\n",
|
||
" <th>Age</th>\n",
|
||
" <th>Device</th>\n",
|
||
" <th>IT Student</th>\n",
|
||
" <th>Location</th>\n",
|
||
" <th>Financial Condition</th>\n",
|
||
" <th>Internet Type</th>\n",
|
||
" <th>Network Type</th>\n",
|
||
" <th>Flexibility Level</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>University</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Male</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>Tab</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>University</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>College</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>School</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>School</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Poor</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>3G</td>\n",
|
||
" <td>Low</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1200</th>\n",
|
||
" <td>College</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Low</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1201</th>\n",
|
||
" <td>College</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Rural</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1202</th>\n",
|
||
" <td>School</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Male</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>3G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1203</th>\n",
|
||
" <td>College</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Rural</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Low</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1204</th>\n",
|
||
" <td>School</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Poor</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>3G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>1205 rows × 11 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Education Level Institution Type Gender Age Device IT Student \\\n",
|
||
"0 University 1 Male 23 Tab 0 \n",
|
||
"1 University 1 Female 23 Mobile 0 \n",
|
||
"2 College 0 Female 18 Mobile 0 \n",
|
||
"3 School 1 Female 11 Mobile 0 \n",
|
||
"4 School 1 Female 18 Mobile 0 \n",
|
||
"... ... ... ... ... ... ... \n",
|
||
"1200 College 1 Female 18 Mobile 0 \n",
|
||
"1201 College 1 Female 18 Mobile 0 \n",
|
||
"1202 School 1 Male 11 Mobile 0 \n",
|
||
"1203 College 1 Female 18 Mobile 0 \n",
|
||
"1204 School 1 Female 11 Mobile 0 \n",
|
||
"\n",
|
||
" Location Financial Condition Internet Type Network Type Flexibility Level \n",
|
||
"0 Town Mid Wifi 4G Moderate \n",
|
||
"1 Town Mid Mobile Data 4G Moderate \n",
|
||
"2 Town Mid Wifi 4G Moderate \n",
|
||
"3 Town Mid Mobile Data 4G Moderate \n",
|
||
"4 Town Poor Mobile Data 3G Low \n",
|
||
"... ... ... ... ... ... \n",
|
||
"1200 Town Mid Wifi 4G Low \n",
|
||
"1201 Rural Mid Wifi 4G Moderate \n",
|
||
"1202 Town Mid Mobile Data 3G Moderate \n",
|
||
"1203 Rural Mid Wifi 4G Low \n",
|
||
"1204 Town Poor Mobile Data 3G Moderate \n",
|
||
"\n",
|
||
"[1205 rows x 11 columns]"
|
||
]
|
||
},
|
||
"execution_count": 31,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"from sklearn import set_config\n",
|
||
"\n",
|
||
"set_config(transform_output=\"pandas\")\n",
|
||
"\n",
|
||
"random_state=9\n",
|
||
"\n",
|
||
"df = pd.read_csv(\"students_education.csv\")\n",
|
||
"def Institution_Type(value):\n",
|
||
" if value == \"Private\":\n",
|
||
" return 1\n",
|
||
" elif value == \"Public\":\n",
|
||
" return 0\n",
|
||
"\n",
|
||
"df['Institution Type'] = df['Institution Type'].map(Institution_Type)\n",
|
||
"\n",
|
||
"df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Education Level</th>\n",
|
||
" <th>Institution Type</th>\n",
|
||
" <th>Gender</th>\n",
|
||
" <th>Age</th>\n",
|
||
" <th>Device</th>\n",
|
||
" <th>IT Student</th>\n",
|
||
" <th>Location</th>\n",
|
||
" <th>Financial Condition</th>\n",
|
||
" <th>Internet Type</th>\n",
|
||
" <th>Network Type</th>\n",
|
||
" <th>Flexibility Level</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>University</td>\n",
|
||
" <td>Private</td>\n",
|
||
" <td>Male</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>Tab</td>\n",
|
||
" <td>No</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>University</td>\n",
|
||
" <td>Private</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>No</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>College</td>\n",
|
||
" <td>Public</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>No</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>School</td>\n",
|
||
" <td>Private</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>No</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>School</td>\n",
|
||
" <td>Private</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>No</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Poor</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>3G</td>\n",
|
||
" <td>Low</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1200</th>\n",
|
||
" <td>College</td>\n",
|
||
" <td>Private</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>No</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Low</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1201</th>\n",
|
||
" <td>College</td>\n",
|
||
" <td>Private</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>No</td>\n",
|
||
" <td>Rural</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1202</th>\n",
|
||
" <td>School</td>\n",
|
||
" <td>Private</td>\n",
|
||
" <td>Male</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>No</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>3G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1203</th>\n",
|
||
" <td>College</td>\n",
|
||
" <td>Private</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>No</td>\n",
|
||
" <td>Rural</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Low</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1204</th>\n",
|
||
" <td>School</td>\n",
|
||
" <td>Private</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>No</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Poor</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>3G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>1205 rows × 11 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Education Level Institution Type Gender Age Device IT Student \\\n",
|
||
"0 University Private Male 23 Tab No \n",
|
||
"1 University Private Female 23 Mobile No \n",
|
||
"2 College Public Female 18 Mobile No \n",
|
||
"3 School Private Female 11 Mobile No \n",
|
||
"4 School Private Female 18 Mobile No \n",
|
||
"... ... ... ... ... ... ... \n",
|
||
"1200 College Private Female 18 Mobile No \n",
|
||
"1201 College Private Female 18 Mobile No \n",
|
||
"1202 School Private Male 11 Mobile No \n",
|
||
"1203 College Private Female 18 Mobile No \n",
|
||
"1204 School Private Female 11 Mobile No \n",
|
||
"\n",
|
||
" Location Financial Condition Internet Type Network Type Flexibility Level \n",
|
||
"0 Town Mid Wifi 4G Moderate \n",
|
||
"1 Town Mid Mobile Data 4G Moderate \n",
|
||
"2 Town Mid Wifi 4G Moderate \n",
|
||
"3 Town Mid Mobile Data 4G Moderate \n",
|
||
"4 Town Poor Mobile Data 3G Low \n",
|
||
"... ... ... ... ... ... \n",
|
||
"1200 Town Mid Wifi 4G Low \n",
|
||
"1201 Rural Mid Wifi 4G Moderate \n",
|
||
"1202 Town Mid Mobile Data 3G Moderate \n",
|
||
"1203 Rural Mid Wifi 4G Low \n",
|
||
"1204 Town Poor Mobile Data 3G Moderate \n",
|
||
"\n",
|
||
"[1205 rows x 11 columns]"
|
||
]
|
||
},
|
||
"execution_count": 27,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pd.read_csv(\"students_education.csv\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'X_train'"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Education Level</th>\n",
|
||
" <th>Gender</th>\n",
|
||
" <th>Age</th>\n",
|
||
" <th>Device</th>\n",
|
||
" <th>IT Student</th>\n",
|
||
" <th>Location</th>\n",
|
||
" <th>Financial Condition</th>\n",
|
||
" <th>Internet Type</th>\n",
|
||
" <th>Network Type</th>\n",
|
||
" <th>Flexibility Level</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>294</th>\n",
|
||
" <td>School</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Rich</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Low</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>876</th>\n",
|
||
" <td>School</td>\n",
|
||
" <td>Male</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>3G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>382</th>\n",
|
||
" <td>School</td>\n",
|
||
" <td>Male</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>3G</td>\n",
|
||
" <td>Low</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>634</th>\n",
|
||
" <td>University</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>3G</td>\n",
|
||
" <td>Low</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>906</th>\n",
|
||
" <td>School</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>3G</td>\n",
|
||
" <td>Low</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1044</th>\n",
|
||
" <td>College</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1095</th>\n",
|
||
" <td>University</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>Computer</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Rich</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>High</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1130</th>\n",
|
||
" <td>School</td>\n",
|
||
" <td>Male</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Poor</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Low</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>860</th>\n",
|
||
" <td>University</td>\n",
|
||
" <td>Male</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Low</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1126</th>\n",
|
||
" <td>University</td>\n",
|
||
" <td>Male</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>Computer</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Rural</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>3G</td>\n",
|
||
" <td>Low</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>964 rows × 10 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Education Level Gender Age Device IT Student Location \\\n",
|
||
"294 School Female 9 Mobile 0 Town \n",
|
||
"876 School Male 11 Mobile 0 Town \n",
|
||
"382 School Male 11 Mobile 0 Town \n",
|
||
"634 University Female 23 Mobile 0 Town \n",
|
||
"906 School Female 11 Mobile 0 Town \n",
|
||
"... ... ... ... ... ... ... \n",
|
||
"1044 College Female 18 Mobile 0 Town \n",
|
||
"1095 University Female 23 Computer 1 Town \n",
|
||
"1130 School Male 11 Mobile 0 Town \n",
|
||
"860 University Male 23 Mobile 0 Town \n",
|
||
"1126 University Male 23 Computer 1 Rural \n",
|
||
"\n",
|
||
" Financial Condition Internet Type Network Type Flexibility Level \n",
|
||
"294 Rich Mobile Data 4G Low \n",
|
||
"876 Mid Mobile Data 3G Moderate \n",
|
||
"382 Mid Mobile Data 3G Low \n",
|
||
"634 Mid Wifi 3G Low \n",
|
||
"906 Mid Wifi 3G Low \n",
|
||
"... ... ... ... ... \n",
|
||
"1044 Mid Wifi 4G Moderate \n",
|
||
"1095 Rich Wifi 4G High \n",
|
||
"1130 Poor Wifi 4G Low \n",
|
||
"860 Mid Mobile Data 4G Low \n",
|
||
"1126 Mid Mobile Data 3G Low \n",
|
||
"\n",
|
||
"[964 rows x 10 columns]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'y_class_train'"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"294 0\n",
|
||
"876 1\n",
|
||
"382 1\n",
|
||
"634 0\n",
|
||
"906 0\n",
|
||
" ..\n",
|
||
"1044 1\n",
|
||
"1095 1\n",
|
||
"1130 1\n",
|
||
"860 1\n",
|
||
"1126 1\n",
|
||
"Name: Institution Type, Length: 964, dtype: int64"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'X_test'"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Education Level</th>\n",
|
||
" <th>Gender</th>\n",
|
||
" <th>Age</th>\n",
|
||
" <th>Device</th>\n",
|
||
" <th>IT Student</th>\n",
|
||
" <th>Location</th>\n",
|
||
" <th>Financial Condition</th>\n",
|
||
" <th>Internet Type</th>\n",
|
||
" <th>Network Type</th>\n",
|
||
" <th>Flexibility Level</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>101</th>\n",
|
||
" <td>School</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>Computer</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>946</th>\n",
|
||
" <td>College</td>\n",
|
||
" <td>Male</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>306</th>\n",
|
||
" <td>College</td>\n",
|
||
" <td>Male</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>Tab</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>109</th>\n",
|
||
" <td>University</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>3G</td>\n",
|
||
" <td>High</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1061</th>\n",
|
||
" <td>University</td>\n",
|
||
" <td>Male</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>Computer</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Rural</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>3G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>908</th>\n",
|
||
" <td>School</td>\n",
|
||
" <td>Male</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Rich</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1135</th>\n",
|
||
" <td>University</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>Computer</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>894</th>\n",
|
||
" <td>School</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Poor</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>3G</td>\n",
|
||
" <td>Low</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>866</th>\n",
|
||
" <td>School</td>\n",
|
||
" <td>Male</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>Mobile</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Mid</td>\n",
|
||
" <td>Mobile Data</td>\n",
|
||
" <td>3G</td>\n",
|
||
" <td>Low</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1006</th>\n",
|
||
" <td>University</td>\n",
|
||
" <td>Female</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>Computer</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>Town</td>\n",
|
||
" <td>Rich</td>\n",
|
||
" <td>Wifi</td>\n",
|
||
" <td>4G</td>\n",
|
||
" <td>Moderate</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>241 rows × 10 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Education Level Gender Age Device IT Student Location \\\n",
|
||
"101 School Female 11 Computer 0 Town \n",
|
||
"946 College Male 18 Mobile 0 Town \n",
|
||
"306 College Male 18 Tab 1 Town \n",
|
||
"109 University Female 23 Mobile 0 Town \n",
|
||
"1061 University Male 23 Computer 1 Rural \n",
|
||
"... ... ... ... ... ... ... \n",
|
||
"908 School Male 10 Mobile 0 Town \n",
|
||
"1135 University Female 18 Computer 1 Town \n",
|
||
"894 School Female 10 Mobile 0 Town \n",
|
||
"866 School Male 11 Mobile 0 Town \n",
|
||
"1006 University Female 23 Computer 0 Town \n",
|
||
"\n",
|
||
" Financial Condition Internet Type Network Type Flexibility Level \n",
|
||
"101 Mid Wifi 4G Moderate \n",
|
||
"946 Mid Wifi 4G Moderate \n",
|
||
"306 Mid Wifi 4G Moderate \n",
|
||
"109 Mid Wifi 3G High \n",
|
||
"1061 Mid Mobile Data 3G Moderate \n",
|
||
"... ... ... ... ... \n",
|
||
"908 Rich Wifi 4G Moderate \n",
|
||
"1135 Mid Wifi 4G Moderate \n",
|
||
"894 Poor Mobile Data 3G Low \n",
|
||
"866 Mid Mobile Data 3G Low \n",
|
||
"1006 Rich Wifi 4G Moderate \n",
|
||
"\n",
|
||
"[241 rows x 10 columns]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'y_class_test'"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"101 1\n",
|
||
"946 1\n",
|
||
"306 0\n",
|
||
"109 1\n",
|
||
"1061 1\n",
|
||
" ..\n",
|
||
"908 1\n",
|
||
"1135 1\n",
|
||
"894 1\n",
|
||
"866 1\n",
|
||
"1006 1\n",
|
||
"Name: Institution Type, Length: 241, dtype: int64"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"from sklearn.utils import resample\n",
|
||
"import pandas as pd\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"from sklearn.preprocessing import LabelEncoder\n",
|
||
"from sklearn import metrics\n",
|
||
"from imblearn.over_sampling import RandomOverSampler\n",
|
||
"from imblearn.under_sampling import RandomUnderSampler\n",
|
||
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
||
"from sklearn.metrics import ConfusionMatrixDisplay\n",
|
||
"from sklearn.compose import ColumnTransformer\n",
|
||
"from sklearn.pipeline import Pipeline\n",
|
||
"from sklearn.impute import SimpleImputer\n",
|
||
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
||
"from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, RandomForestClassifier, GradientBoostingClassifier\n",
|
||
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
|
||
"from sklearn.linear_model import SGDClassifier, SGDRegressor\n",
|
||
"from sklearn.metrics import (\n",
|
||
" precision_score, recall_score, accuracy_score, roc_auc_score, f1_score,\n",
|
||
" matthews_corrcoef, cohen_kappa_score, confusion_matrix\n",
|
||
")\n",
|
||
"from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error\n",
|
||
"import numpy as np\n",
|
||
"import featuretools as ft\n",
|
||
"from sklearn.metrics import accuracy_score, classification_report\n",
|
||
"\n",
|
||
"# Определение целевых переменных\n",
|
||
"X = df.drop('Institution Type', axis=1)\n",
|
||
"y_class = df['Institution Type'] # Задача классификации\n",
|
||
"y_reg = df['IT Student'] # Задача регрессии\n",
|
||
"\n",
|
||
"# Преобразование категориальных переменных\n",
|
||
"categorical_features = ['Education Level', 'Gender', 'Device', 'IT Student', 'Location', 'Financial Condition',\n",
|
||
" 'Internet Type', 'Network Type', 'Flexibility Level']\n",
|
||
"numerical_features = ['Age']\n",
|
||
"\n",
|
||
"# Создание ColumnTransformer с обработкой неизвестных категорий\n",
|
||
"preprocessor = ColumnTransformer(\n",
|
||
" transformers=[\n",
|
||
" ('num', StandardScaler(), numerical_features),\n",
|
||
" ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)]) # Используем handle_unknown='ignore'\n",
|
||
"\n",
|
||
"# Разделение данных на обучающую и тестовую выборки\n",
|
||
"X_train, X_test, y_class_train, y_class_test, y_reg_train, y_reg_test = train_test_split(X, y_class, y_reg, test_size=0.2, random_state=42) \n",
|
||
"\n",
|
||
"def estimate_bias_variance(model, X, y):\n",
|
||
" predictions = np.array([model.fit(X, y).predict(X) for _ in range(1000)])\n",
|
||
" bias = np.mean((y - np.mean(predictions, axis=0)) ** 2)\n",
|
||
" variance = np.mean(np.var(predictions, axis=0))\n",
|
||
" return bias, variance\n",
|
||
"\n",
|
||
"display(\"X_train\", X_train)\n",
|
||
"display(\"y_class_train\", y_class_train)\n",
|
||
"\n",
|
||
"display(\"X_test\", X_test)\n",
|
||
"display(\"y_class_test\", y_class_test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<style>#sk-container-id-1 {\n",
|
||
" /* Definition of color scheme common for light and dark mode */\n",
|
||
" --sklearn-color-text: black;\n",
|
||
" --sklearn-color-line: gray;\n",
|
||
" /* Definition of color scheme for unfitted estimators */\n",
|
||
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
||
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
||
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
||
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
||
" /* Definition of color scheme for fitted estimators */\n",
|
||
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
||
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
||
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
||
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
||
"\n",
|
||
" /* Specific color for light theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-icon: #696969;\n",
|
||
"\n",
|
||
" @media (prefers-color-scheme: dark) {\n",
|
||
" /* Redefinition of color scheme for dark theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-icon: #878787;\n",
|
||
" }\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 pre {\n",
|
||
" padding: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 input.sk-hidden--visually {\n",
|
||
" border: 0;\n",
|
||
" clip: rect(1px 1px 1px 1px);\n",
|
||
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||
" height: 1px;\n",
|
||
" margin: -1px;\n",
|
||
" overflow: hidden;\n",
|
||
" padding: 0;\n",
|
||
" position: absolute;\n",
|
||
" width: 1px;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-dashed-wrapped {\n",
|
||
" border: 1px dashed var(--sklearn-color-line);\n",
|
||
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" padding-bottom: 0.4em;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-container {\n",
|
||
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||
" so we also need the `!important` here to be able to override the\n",
|
||
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
||
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
||
" display: inline-block !important;\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-text-repr-fallback {\n",
|
||
" display: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-parallel-item,\n",
|
||
"div.sk-serial,\n",
|
||
"div.sk-item {\n",
|
||
" /* draw centered vertical line to link estimators */\n",
|
||
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
||
" background-size: 2px 100%;\n",
|
||
" background-repeat: no-repeat;\n",
|
||
" background-position: center center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Parallel-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-parallel-item::after {\n",
|
||
" content: \"\";\n",
|
||
" width: 100%;\n",
|
||
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||
" flex-grow: 1;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-parallel {\n",
|
||
" display: flex;\n",
|
||
" align-items: stretch;\n",
|
||
" justify-content: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-parallel-item {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
|
||
" align-self: flex-end;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
|
||
" align-self: flex-start;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
|
||
" width: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Serial-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-serial {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
" align-items: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" padding-right: 1em;\n",
|
||
" padding-left: 1em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"\n",
|
||
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
||
"clickable and can be expanded/collapsed.\n",
|
||
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
||
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
||
"*/\n",
|
||
"\n",
|
||
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-toggleable {\n",
|
||
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable label */\n",
|
||
"#sk-container-id-1 label.sk-toggleable__label {\n",
|
||
" cursor: pointer;\n",
|
||
" display: block;\n",
|
||
" width: 100%;\n",
|
||
" margin-bottom: 0;\n",
|
||
" padding: 0.5em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
|
||
" /* Arrow on the left of the label */\n",
|
||
" content: \"▸\";\n",
|
||
" float: left;\n",
|
||
" margin-right: 0.25em;\n",
|
||
" color: var(--sklearn-color-icon);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable content - dropdown */\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-toggleable__content {\n",
|
||
" max-height: 0;\n",
|
||
" max-width: 0;\n",
|
||
" overflow: hidden;\n",
|
||
" text-align: left;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-toggleable__content pre {\n",
|
||
" margin: 0.2em;\n",
|
||
" border-radius: 0.25em;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||
" /* Expand drop-down */\n",
|
||
" max-height: 200px;\n",
|
||
" max-width: 100%;\n",
|
||
" overflow: auto;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||
" content: \"▾\";\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific style */\n",
|
||
"\n",
|
||
"/* Colorize estimator box */\n",
|
||
"#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
|
||
"#sk-container-id-1 div.sk-label label {\n",
|
||
" /* The background is the default theme color */\n",
|
||
" color: var(--sklearn-color-text-on-default-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover, darken the color of the background */\n",
|
||
"#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Label box, darken color on hover, fitted */\n",
|
||
"#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator label */\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-label label {\n",
|
||
" font-family: monospace;\n",
|
||
" font-weight: bold;\n",
|
||
" display: inline-block;\n",
|
||
" line-height: 1.2em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-label-container {\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific */\n",
|
||
"#sk-container-id-1 div.sk-estimator {\n",
|
||
" font-family: monospace;\n",
|
||
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||
" border-radius: 0.25em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" margin-bottom: 0.5em;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-estimator.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* on hover */\n",
|
||
"#sk-container-id-1 div.sk-estimator:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
||
"\n",
|
||
"/* Common style for \"i\" and \"?\" */\n",
|
||
"\n",
|
||
".sk-estimator-doc-link,\n",
|
||
"a:link.sk-estimator-doc-link,\n",
|
||
"a:visited.sk-estimator-doc-link {\n",
|
||
" float: right;\n",
|
||
" font-size: smaller;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1em;\n",
|
||
" height: 1em;\n",
|
||
" width: 1em;\n",
|
||
" text-decoration: none !important;\n",
|
||
" margin-left: 1ex;\n",
|
||
" /* unfitted */\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted,\n",
|
||
"a:link.sk-estimator-doc-link.fitted,\n",
|
||
"a:visited.sk-estimator-doc-link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Span, style for the box shown on hovering the info icon */\n",
|
||
".sk-estimator-doc-link span {\n",
|
||
" display: none;\n",
|
||
" z-index: 9999;\n",
|
||
" position: relative;\n",
|
||
" font-weight: normal;\n",
|
||
" right: .2ex;\n",
|
||
" padding: .5ex;\n",
|
||
" margin: .5ex;\n",
|
||
" width: min-content;\n",
|
||
" min-width: 20ex;\n",
|
||
" max-width: 50ex;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" box-shadow: 2pt 2pt 4pt #999;\n",
|
||
" /* unfitted */\n",
|
||
" background: var(--sklearn-color-unfitted-level-0);\n",
|
||
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted span {\n",
|
||
" /* fitted */\n",
|
||
" background: var(--sklearn-color-fitted-level-0);\n",
|
||
" border: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link:hover span {\n",
|
||
" display: block;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||
"\n",
|
||
"#sk-container-id-1 a.estimator_doc_link {\n",
|
||
" float: right;\n",
|
||
" font-size: 1rem;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1rem;\n",
|
||
" height: 1rem;\n",
|
||
" width: 1rem;\n",
|
||
" text-decoration: none;\n",
|
||
" /* unfitted */\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 a.estimator_doc_link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"#sk-container-id-1 a.estimator_doc_link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>GridSearchCV(cv=5,\n",
|
||
" estimator=Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('cat',\n",
|
||
" OneHotEncoder(sparse_output=False),\n",
|
||
" ['Education '\n",
|
||
" 'Level',\n",
|
||
" 'Gender',\n",
|
||
" 'Device',\n",
|
||
" 'IT '\n",
|
||
" 'Student',\n",
|
||
" 'Location',\n",
|
||
" 'Financial '\n",
|
||
" 'Condition',\n",
|
||
" 'Internet '\n",
|
||
" 'Type',\n",
|
||
" 'Network '\n",
|
||
" 'Type',\n",
|
||
" 'Flexibility '\n",
|
||
" 'Level']),\n",
|
||
" ('num',\n",
|
||
" 'passthrough',\n",
|
||
" ['Age'])])),\n",
|
||
" ('classifier',\n",
|
||
" SGDClassifier(loss='log_loss',\n",
|
||
" max_iter=2000,\n",
|
||
" random_state=42))]),\n",
|
||
" param_grid={'classifier__alpha': [0.0001, 0.001, 0.01],\n",
|
||
" 'classifier__eta0': [0.01, 0.1],\n",
|
||
" 'classifier__learning_rate': ['constant', 'adaptive']},\n",
|
||
" scoring='accuracy')</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> GridSearchCV<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.model_selection.GridSearchCV.html\">?<span>Documentation for GridSearchCV</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>GridSearchCV(cv=5,\n",
|
||
" estimator=Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('cat',\n",
|
||
" OneHotEncoder(sparse_output=False),\n",
|
||
" ['Education '\n",
|
||
" 'Level',\n",
|
||
" 'Gender',\n",
|
||
" 'Device',\n",
|
||
" 'IT '\n",
|
||
" 'Student',\n",
|
||
" 'Location',\n",
|
||
" 'Financial '\n",
|
||
" 'Condition',\n",
|
||
" 'Internet '\n",
|
||
" 'Type',\n",
|
||
" 'Network '\n",
|
||
" 'Type',\n",
|
||
" 'Flexibility '\n",
|
||
" 'Level']),\n",
|
||
" ('num',\n",
|
||
" 'passthrough',\n",
|
||
" ['Age'])])),\n",
|
||
" ('classifier',\n",
|
||
" SGDClassifier(loss='log_loss',\n",
|
||
" max_iter=2000,\n",
|
||
" random_state=42))]),\n",
|
||
" param_grid={'classifier__alpha': [0.0001, 0.001, 0.01],\n",
|
||
" 'classifier__eta0': [0.01, 0.1],\n",
|
||
" 'classifier__learning_rate': ['constant', 'adaptive']},\n",
|
||
" scoring='accuracy')</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">best_estimator_: Pipeline</label><div class=\"sk-toggleable__content fitted\"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('cat',\n",
|
||
" OneHotEncoder(sparse_output=False),\n",
|
||
" ['Education Level', 'Gender',\n",
|
||
" 'Device', 'IT Student',\n",
|
||
" 'Location',\n",
|
||
" 'Financial Condition',\n",
|
||
" 'Internet Type',\n",
|
||
" 'Network Type',\n",
|
||
" 'Flexibility Level']),\n",
|
||
" ('num', 'passthrough',\n",
|
||
" ['Age'])])),\n",
|
||
" ('classifier',\n",
|
||
" SGDClassifier(eta0=0.1, learning_rate='adaptive',\n",
|
||
" loss='log_loss', max_iter=2000,\n",
|
||
" random_state=42))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>ColumnTransformer(transformers=[('cat', OneHotEncoder(sparse_output=False),\n",
|
||
" ['Education Level', 'Gender', 'Device',\n",
|
||
" 'IT Student', 'Location',\n",
|
||
" 'Financial Condition', 'Internet Type',\n",
|
||
" 'Network Type', 'Flexibility Level']),\n",
|
||
" ('num', 'passthrough', ['Age'])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">cat</label><div class=\"sk-toggleable__content fitted\"><pre>['Education Level', 'Gender', 'Device', 'IT Student', 'Location', 'Financial Condition', 'Internet Type', 'Network Type', 'Flexibility Level']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> OneHotEncoder<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>OneHotEncoder(sparse_output=False)</pre></div> </div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">num</label><div class=\"sk-toggleable__content fitted\"><pre>['Age']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">passthrough</label><div class=\"sk-toggleable__content fitted\"><pre>passthrough</pre></div> </div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> SGDClassifier<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.linear_model.SGDClassifier.html\">?<span>Documentation for SGDClassifier</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>SGDClassifier(eta0=0.1, learning_rate='adaptive', loss='log_loss',\n",
|
||
" max_iter=2000, random_state=42)</pre></div> </div></div></div></div></div></div></div></div></div></div></div>"
|
||
],
|
||
"text/plain": [
|
||
"GridSearchCV(cv=5,\n",
|
||
" estimator=Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('cat',\n",
|
||
" OneHotEncoder(sparse_output=False),\n",
|
||
" ['Education '\n",
|
||
" 'Level',\n",
|
||
" 'Gender',\n",
|
||
" 'Device',\n",
|
||
" 'IT '\n",
|
||
" 'Student',\n",
|
||
" 'Location',\n",
|
||
" 'Financial '\n",
|
||
" 'Condition',\n",
|
||
" 'Internet '\n",
|
||
" 'Type',\n",
|
||
" 'Network '\n",
|
||
" 'Type',\n",
|
||
" 'Flexibility '\n",
|
||
" 'Level']),\n",
|
||
" ('num',\n",
|
||
" 'passthrough',\n",
|
||
" ['Age'])])),\n",
|
||
" ('classifier',\n",
|
||
" SGDClassifier(loss='log_loss',\n",
|
||
" max_iter=2000,\n",
|
||
" random_state=42))]),\n",
|
||
" param_grid={'classifier__alpha': [0.0001, 0.001, 0.01],\n",
|
||
" 'classifier__eta0': [0.01, 0.1],\n",
|
||
" 'classifier__learning_rate': ['constant', 'adaptive']},\n",
|
||
" scoring='accuracy')"
|
||
]
|
||
},
|
||
"execution_count": 33,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Задача классификации\n",
|
||
"categorical_preprocessor = OneHotEncoder(sparse_output=False)\n",
|
||
"\n",
|
||
"# Создаем общий preprocessor\n",
|
||
"preprocessor = ColumnTransformer([\n",
|
||
" (\"cat\", categorical_preprocessor, categorical_features),\n",
|
||
" (\"num\", \"passthrough\", numerical_features), \n",
|
||
"], remainder=\"drop\") \n",
|
||
"\n",
|
||
"# Построение пайплайнов\n",
|
||
"class_pipeline_rf = Pipeline(steps=[\n",
|
||
" ('preprocessor', preprocessor),\n",
|
||
" ('classifier', RandomForestClassifier(random_state=42))])\n",
|
||
"\n",
|
||
"class_pipeline_sgd = Pipeline(steps=[\n",
|
||
" ('preprocessor', preprocessor),\n",
|
||
" ('classifier', SGDClassifier(loss='log_loss', penalty='l2', random_state=42, max_iter=2000))])\n",
|
||
"\n",
|
||
"# Настройки гиперпараметров\n",
|
||
"param_grid_class_rf = {\n",
|
||
" 'classifier__n_estimators': [100, 200],\n",
|
||
" 'classifier__max_depth': [None, 10, 20]}\n",
|
||
"\n",
|
||
"param_grid_class_sgd = {\n",
|
||
" 'classifier__alpha': [0.0001, 0.001, 0.01],\n",
|
||
" 'classifier__learning_rate': ['constant', 'adaptive'],\n",
|
||
" 'classifier__eta0': [0.01, 0.1]}\n",
|
||
"\n",
|
||
"# Поиск гиперпараметров\n",
|
||
"grid_search_class_rf = GridSearchCV(class_pipeline_rf, param_grid_class_rf, cv=5, scoring='accuracy')\n",
|
||
"grid_search_class_rf.fit(X_train, y_class_train)\n",
|
||
"\n",
|
||
"grid_search_class_sgd = GridSearchCV(class_pipeline_sgd, param_grid_class_sgd, cv=5, scoring='accuracy')\n",
|
||
"grid_search_class_sgd.fit(X_train, y_class_train)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Classification Report for Random Forest:\n",
|
||
" precision recall f1-score support\n",
|
||
"\n",
|
||
" 0 0.87 0.86 0.86 76\n",
|
||
" 1 0.93 0.94 0.94 165\n",
|
||
"\n",
|
||
" accuracy 0.91 241\n",
|
||
" macro avg 0.90 0.90 0.90 241\n",
|
||
"weighted avg 0.91 0.91 0.91 241\n",
|
||
"\n",
|
||
"Confusion Matrix for Random Forest:\n",
|
||
"[[ 65 11]\n",
|
||
" [ 10 155]]\n",
|
||
"Classification Report for SGD:\n",
|
||
" precision recall f1-score support\n",
|
||
"\n",
|
||
" 0 0.72 0.58 0.64 76\n",
|
||
" 1 0.82 0.90 0.86 165\n",
|
||
"\n",
|
||
" accuracy 0.80 241\n",
|
||
" macro avg 0.77 0.74 0.75 241\n",
|
||
"weighted avg 0.79 0.80 0.79 241\n",
|
||
"\n",
|
||
"Confusion Matrix for SGD:\n",
|
||
"[[ 44 32]\n",
|
||
" [ 17 148]]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Оценка моделей\n",
|
||
"y_class_pred_rf = grid_search_class_rf.predict(X_test)\n",
|
||
"y_class_pred_sgd = grid_search_class_sgd.predict(X_test)\n",
|
||
"\n",
|
||
"print(\"Classification Report for Random Forest:\")\n",
|
||
"print(classification_report(y_class_test, y_class_pred_rf))\n",
|
||
"print(\"Confusion Matrix for Random Forest:\")\n",
|
||
"print(confusion_matrix(y_class_test, y_class_pred_rf))\n",
|
||
"\n",
|
||
"print(\"Classification Report for SGD:\")\n",
|
||
"print(classification_report(y_class_test, y_class_pred_sgd))\n",
|
||
"print(\"Confusion Matrix for SGD:\")\n",
|
||
"print(confusion_matrix(y_class_test, y_class_pred_sgd))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 35,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Classification Bias (Random Forest): 0.05186721991701245\n",
|
||
"Classification Variance (Random Forest): 0.0\n",
|
||
"Classification Bias (SGD): 0.19398340248962656\n",
|
||
"Classification Variance (SGD): 0.0\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Оценка смещения и дисперсии\n",
|
||
"bias_class_rf, variance_class_rf = estimate_bias_variance(grid_search_class_rf.best_estimator_, X_train, y_class_train)\n",
|
||
"bias_class_sgd, variance_class_sgd = estimate_bias_variance(grid_search_class_sgd.best_estimator_, X_train, y_class_train)\n",
|
||
"\n",
|
||
"print(\"Classification Bias (Random Forest):\", bias_class_rf)\n",
|
||
"print(\"Classification Variance (Random Forest):\", variance_class_rf)\n",
|
||
"print(\"Classification Bias (SGD):\", bias_class_sgd)\n",
|
||
"print(\"Classification Variance (SGD):\", variance_class_sgd)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"c:\\Users\\novop\\Downloads\\ckmai-main\\ckmai\\.venv\\Lib\\site-packages\\sklearn\\linear_model\\_stochastic_gradient.py:1616: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.\n",
|
||
" warnings.warn(\n",
|
||
"c:\\Users\\novop\\Downloads\\ckmai-main\\ckmai\\.venv\\Lib\\site-packages\\sklearn\\linear_model\\_stochastic_gradient.py:1616: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.\n",
|
||
" warnings.warn(\n",
|
||
"c:\\Users\\novop\\Downloads\\ckmai-main\\ckmai\\.venv\\Lib\\site-packages\\sklearn\\linear_model\\_stochastic_gradient.py:1616: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.\n",
|
||
" warnings.warn(\n",
|
||
"c:\\Users\\novop\\Downloads\\ckmai-main\\ckmai\\.venv\\Lib\\site-packages\\sklearn\\linear_model\\_stochastic_gradient.py:1616: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.\n",
|
||
" warnings.warn(\n",
|
||
"c:\\Users\\novop\\Downloads\\ckmai-main\\ckmai\\.venv\\Lib\\site-packages\\sklearn\\linear_model\\_stochastic_gradient.py:1616: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.\n",
|
||
" warnings.warn(\n",
|
||
"c:\\Users\\novop\\Downloads\\ckmai-main\\ckmai\\.venv\\Lib\\site-packages\\sklearn\\linear_model\\_stochastic_gradient.py:1616: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.\n",
|
||
" warnings.warn(\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<style>#sk-container-id-2 {\n",
|
||
" /* Definition of color scheme common for light and dark mode */\n",
|
||
" --sklearn-color-text: black;\n",
|
||
" --sklearn-color-line: gray;\n",
|
||
" /* Definition of color scheme for unfitted estimators */\n",
|
||
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
||
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
||
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
||
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
||
" /* Definition of color scheme for fitted estimators */\n",
|
||
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
||
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
||
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
||
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
||
"\n",
|
||
" /* Specific color for light theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
|
||
" --sklearn-color-icon: #696969;\n",
|
||
"\n",
|
||
" @media (prefers-color-scheme: dark) {\n",
|
||
" /* Redefinition of color scheme for dark theme */\n",
|
||
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
|
||
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
|
||
" --sklearn-color-icon: #878787;\n",
|
||
" }\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 pre {\n",
|
||
" padding: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 input.sk-hidden--visually {\n",
|
||
" border: 0;\n",
|
||
" clip: rect(1px 1px 1px 1px);\n",
|
||
" clip: rect(1px, 1px, 1px, 1px);\n",
|
||
" height: 1px;\n",
|
||
" margin: -1px;\n",
|
||
" overflow: hidden;\n",
|
||
" padding: 0;\n",
|
||
" position: absolute;\n",
|
||
" width: 1px;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-dashed-wrapped {\n",
|
||
" border: 1px dashed var(--sklearn-color-line);\n",
|
||
" margin: 0 0.4em 0.5em 0.4em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" padding-bottom: 0.4em;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-container {\n",
|
||
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
||
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
||
" so we also need the `!important` here to be able to override the\n",
|
||
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
||
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
||
" display: inline-block !important;\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-text-repr-fallback {\n",
|
||
" display: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-parallel-item,\n",
|
||
"div.sk-serial,\n",
|
||
"div.sk-item {\n",
|
||
" /* draw centered vertical line to link estimators */\n",
|
||
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
||
" background-size: 2px 100%;\n",
|
||
" background-repeat: no-repeat;\n",
|
||
" background-position: center center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Parallel-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-parallel-item::after {\n",
|
||
" content: \"\";\n",
|
||
" width: 100%;\n",
|
||
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
||
" flex-grow: 1;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-parallel {\n",
|
||
" display: flex;\n",
|
||
" align-items: stretch;\n",
|
||
" justify-content: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" position: relative;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-parallel-item {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
|
||
" align-self: flex-end;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
|
||
" align-self: flex-start;\n",
|
||
" width: 50%;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
|
||
" width: 0;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Serial-specific style estimator block */\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-serial {\n",
|
||
" display: flex;\n",
|
||
" flex-direction: column;\n",
|
||
" align-items: center;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" padding-right: 1em;\n",
|
||
" padding-left: 1em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"\n",
|
||
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
||
"clickable and can be expanded/collapsed.\n",
|
||
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
||
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
||
"*/\n",
|
||
"\n",
|
||
"/* Pipeline and ColumnTransformer style (default) */\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-toggleable {\n",
|
||
" /* Default theme specific background. It is overwritten whether we have a\n",
|
||
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable label */\n",
|
||
"#sk-container-id-2 label.sk-toggleable__label {\n",
|
||
" cursor: pointer;\n",
|
||
" display: block;\n",
|
||
" width: 100%;\n",
|
||
" margin-bottom: 0;\n",
|
||
" padding: 0.5em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
|
||
" /* Arrow on the left of the label */\n",
|
||
" content: \"▸\";\n",
|
||
" float: left;\n",
|
||
" margin-right: 0.25em;\n",
|
||
" color: var(--sklearn-color-icon);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Toggleable content - dropdown */\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-toggleable__content {\n",
|
||
" max-height: 0;\n",
|
||
" max-width: 0;\n",
|
||
" overflow: hidden;\n",
|
||
" text-align: left;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-toggleable__content pre {\n",
|
||
" margin: 0.2em;\n",
|
||
" border-radius: 0.25em;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
||
" /* Expand drop-down */\n",
|
||
" max-height: 200px;\n",
|
||
" max-width: 100%;\n",
|
||
" overflow: auto;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
||
" content: \"▾\";\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Pipeline/ColumnTransformer-specific style */\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific style */\n",
|
||
"\n",
|
||
"/* Colorize estimator box */\n",
|
||
"#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
|
||
"#sk-container-id-2 div.sk-label label {\n",
|
||
" /* The background is the default theme color */\n",
|
||
" color: var(--sklearn-color-text-on-default-background);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover, darken the color of the background */\n",
|
||
"#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Label box, darken color on hover, fitted */\n",
|
||
"#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator label */\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-label label {\n",
|
||
" font-family: monospace;\n",
|
||
" font-weight: bold;\n",
|
||
" display: inline-block;\n",
|
||
" line-height: 1.2em;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-label-container {\n",
|
||
" text-align: center;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Estimator-specific */\n",
|
||
"#sk-container-id-2 div.sk-estimator {\n",
|
||
" font-family: monospace;\n",
|
||
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
||
" border-radius: 0.25em;\n",
|
||
" box-sizing: border-box;\n",
|
||
" margin-bottom: 0.5em;\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-estimator.fitted {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* on hover */\n",
|
||
"#sk-container-id-2 div.sk-estimator:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
||
"\n",
|
||
"/* Common style for \"i\" and \"?\" */\n",
|
||
"\n",
|
||
".sk-estimator-doc-link,\n",
|
||
"a:link.sk-estimator-doc-link,\n",
|
||
"a:visited.sk-estimator-doc-link {\n",
|
||
" float: right;\n",
|
||
" font-size: smaller;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1em;\n",
|
||
" height: 1em;\n",
|
||
" width: 1em;\n",
|
||
" text-decoration: none !important;\n",
|
||
" margin-left: 1ex;\n",
|
||
" /* unfitted */\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted,\n",
|
||
"a:link.sk-estimator-doc-link.fitted,\n",
|
||
"a:visited.sk-estimator-doc-link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
||
".sk-estimator-doc-link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover,\n",
|
||
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
||
".sk-estimator-doc-link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* Span, style for the box shown on hovering the info icon */\n",
|
||
".sk-estimator-doc-link span {\n",
|
||
" display: none;\n",
|
||
" z-index: 9999;\n",
|
||
" position: relative;\n",
|
||
" font-weight: normal;\n",
|
||
" right: .2ex;\n",
|
||
" padding: .5ex;\n",
|
||
" margin: .5ex;\n",
|
||
" width: min-content;\n",
|
||
" min-width: 20ex;\n",
|
||
" max-width: 50ex;\n",
|
||
" color: var(--sklearn-color-text);\n",
|
||
" box-shadow: 2pt 2pt 4pt #999;\n",
|
||
" /* unfitted */\n",
|
||
" background: var(--sklearn-color-unfitted-level-0);\n",
|
||
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link.fitted span {\n",
|
||
" /* fitted */\n",
|
||
" background: var(--sklearn-color-fitted-level-0);\n",
|
||
" border: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"\n",
|
||
".sk-estimator-doc-link:hover span {\n",
|
||
" display: block;\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
||
"\n",
|
||
"#sk-container-id-2 a.estimator_doc_link {\n",
|
||
" float: right;\n",
|
||
" font-size: 1rem;\n",
|
||
" line-height: 1em;\n",
|
||
" font-family: monospace;\n",
|
||
" background-color: var(--sklearn-color-background);\n",
|
||
" border-radius: 1rem;\n",
|
||
" height: 1rem;\n",
|
||
" width: 1rem;\n",
|
||
" text-decoration: none;\n",
|
||
" /* unfitted */\n",
|
||
" color: var(--sklearn-color-unfitted-level-1);\n",
|
||
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 a.estimator_doc_link.fitted {\n",
|
||
" /* fitted */\n",
|
||
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
||
" color: var(--sklearn-color-fitted-level-1);\n",
|
||
"}\n",
|
||
"\n",
|
||
"/* On hover */\n",
|
||
"#sk-container-id-2 a.estimator_doc_link:hover {\n",
|
||
" /* unfitted */\n",
|
||
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
||
" color: var(--sklearn-color-background);\n",
|
||
" text-decoration: none;\n",
|
||
"}\n",
|
||
"\n",
|
||
"#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
|
||
" /* fitted */\n",
|
||
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
||
"}\n",
|
||
"</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>GridSearchCV(cv=5,\n",
|
||
" estimator=Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('cat',\n",
|
||
" OneHotEncoder(sparse_output=False),\n",
|
||
" ['Education '\n",
|
||
" 'Level',\n",
|
||
" 'Gender',\n",
|
||
" 'Device',\n",
|
||
" 'IT '\n",
|
||
" 'Student',\n",
|
||
" 'Location',\n",
|
||
" 'Financial '\n",
|
||
" 'Condition',\n",
|
||
" 'Internet '\n",
|
||
" 'Type',\n",
|
||
" 'Network '\n",
|
||
" 'Type',\n",
|
||
" 'Flexibility '\n",
|
||
" 'Level']),\n",
|
||
" ('num',\n",
|
||
" 'passthrough',\n",
|
||
" ['Age'])])),\n",
|
||
" ('regressor',\n",
|
||
" SGDRegressor(max_iter=2000,\n",
|
||
" random_state=42))]),\n",
|
||
" param_grid={'regressor__alpha': [0.0001, 0.001, 0.01],\n",
|
||
" 'regressor__eta0': [0.01, 0.1],\n",
|
||
" 'regressor__learning_rate': ['constant', 'adaptive']},\n",
|
||
" scoring='r2')</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> GridSearchCV<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.model_selection.GridSearchCV.html\">?<span>Documentation for GridSearchCV</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>GridSearchCV(cv=5,\n",
|
||
" estimator=Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('cat',\n",
|
||
" OneHotEncoder(sparse_output=False),\n",
|
||
" ['Education '\n",
|
||
" 'Level',\n",
|
||
" 'Gender',\n",
|
||
" 'Device',\n",
|
||
" 'IT '\n",
|
||
" 'Student',\n",
|
||
" 'Location',\n",
|
||
" 'Financial '\n",
|
||
" 'Condition',\n",
|
||
" 'Internet '\n",
|
||
" 'Type',\n",
|
||
" 'Network '\n",
|
||
" 'Type',\n",
|
||
" 'Flexibility '\n",
|
||
" 'Level']),\n",
|
||
" ('num',\n",
|
||
" 'passthrough',\n",
|
||
" ['Age'])])),\n",
|
||
" ('regressor',\n",
|
||
" SGDRegressor(max_iter=2000,\n",
|
||
" random_state=42))]),\n",
|
||
" param_grid={'regressor__alpha': [0.0001, 0.001, 0.01],\n",
|
||
" 'regressor__eta0': [0.01, 0.1],\n",
|
||
" 'regressor__learning_rate': ['constant', 'adaptive']},\n",
|
||
" scoring='r2')</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">best_estimator_: Pipeline</label><div class=\"sk-toggleable__content fitted\"><pre>Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('cat',\n",
|
||
" OneHotEncoder(sparse_output=False),\n",
|
||
" ['Education Level', 'Gender',\n",
|
||
" 'Device', 'IT Student',\n",
|
||
" 'Location',\n",
|
||
" 'Financial Condition',\n",
|
||
" 'Internet Type',\n",
|
||
" 'Network Type',\n",
|
||
" 'Flexibility Level']),\n",
|
||
" ('num', 'passthrough',\n",
|
||
" ['Age'])])),\n",
|
||
" ('regressor',\n",
|
||
" SGDRegressor(alpha=0.001, learning_rate='adaptive',\n",
|
||
" max_iter=2000, random_state=42))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-11\" type=\"checkbox\" ><label for=\"sk-estimator-id-11\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> preprocessor: ColumnTransformer<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.compose.ColumnTransformer.html\">?<span>Documentation for preprocessor: ColumnTransformer</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>ColumnTransformer(transformers=[('cat', OneHotEncoder(sparse_output=False),\n",
|
||
" ['Education Level', 'Gender', 'Device',\n",
|
||
" 'IT Student', 'Location',\n",
|
||
" 'Financial Condition', 'Internet Type',\n",
|
||
" 'Network Type', 'Flexibility Level']),\n",
|
||
" ('num', 'passthrough', ['Age'])])</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-12\" type=\"checkbox\" ><label for=\"sk-estimator-id-12\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">cat</label><div class=\"sk-toggleable__content fitted\"><pre>['Education Level', 'Gender', 'Device', 'IT Student', 'Location', 'Financial Condition', 'Internet Type', 'Network Type', 'Flexibility Level']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-13\" type=\"checkbox\" ><label for=\"sk-estimator-id-13\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> OneHotEncoder<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.preprocessing.OneHotEncoder.html\">?<span>Documentation for OneHotEncoder</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>OneHotEncoder(sparse_output=False)</pre></div> </div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-14\" type=\"checkbox\" ><label for=\"sk-estimator-id-14\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">num</label><div class=\"sk-toggleable__content fitted\"><pre>['Age']</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-15\" type=\"checkbox\" ><label for=\"sk-estimator-id-15\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">passthrough</label><div class=\"sk-toggleable__content fitted\"><pre>passthrough</pre></div> </div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-16\" type=\"checkbox\" ><label for=\"sk-estimator-id-16\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> SGDRegressor<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.5/modules/generated/sklearn.linear_model.SGDRegressor.html\">?<span>Documentation for SGDRegressor</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>SGDRegressor(alpha=0.001, learning_rate='adaptive', max_iter=2000,\n",
|
||
" random_state=42)</pre></div> </div></div></div></div></div></div></div></div></div></div></div>"
|
||
],
|
||
"text/plain": [
|
||
"GridSearchCV(cv=5,\n",
|
||
" estimator=Pipeline(steps=[('preprocessor',\n",
|
||
" ColumnTransformer(transformers=[('cat',\n",
|
||
" OneHotEncoder(sparse_output=False),\n",
|
||
" ['Education '\n",
|
||
" 'Level',\n",
|
||
" 'Gender',\n",
|
||
" 'Device',\n",
|
||
" 'IT '\n",
|
||
" 'Student',\n",
|
||
" 'Location',\n",
|
||
" 'Financial '\n",
|
||
" 'Condition',\n",
|
||
" 'Internet '\n",
|
||
" 'Type',\n",
|
||
" 'Network '\n",
|
||
" 'Type',\n",
|
||
" 'Flexibility '\n",
|
||
" 'Level']),\n",
|
||
" ('num',\n",
|
||
" 'passthrough',\n",
|
||
" ['Age'])])),\n",
|
||
" ('regressor',\n",
|
||
" SGDRegressor(max_iter=2000,\n",
|
||
" random_state=42))]),\n",
|
||
" param_grid={'regressor__alpha': [0.0001, 0.001, 0.01],\n",
|
||
" 'regressor__eta0': [0.01, 0.1],\n",
|
||
" 'regressor__learning_rate': ['constant', 'adaptive']},\n",
|
||
" scoring='r2')"
|
||
]
|
||
},
|
||
"execution_count": 36,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Задача регрессии\n",
|
||
"reg_pipeline_rf = Pipeline(steps=[\n",
|
||
" ('preprocessor', preprocessor),\n",
|
||
" ('regressor', RandomForestRegressor(random_state=42))])\n",
|
||
"\n",
|
||
"reg_pipeline_sgd = Pipeline(steps=[\n",
|
||
" ('preprocessor', preprocessor),\n",
|
||
" ('regressor', SGDRegressor(loss='squared_error', penalty='l2', random_state=42, max_iter=2000))])\n",
|
||
"\n",
|
||
"# Настройка гиперпараметров для регрессии\n",
|
||
"param_grid_reg_rf = {\n",
|
||
" 'regressor__n_estimators': [100, 200],\n",
|
||
" 'regressor__max_depth': [None, 10, 20]}\n",
|
||
"\n",
|
||
"param_grid_reg_sgd = {\n",
|
||
" 'regressor__alpha': [0.0001, 0.001, 0.01],\n",
|
||
" 'regressor__learning_rate': ['constant', 'adaptive'],\n",
|
||
" 'regressor__eta0': [0.01, 0.1]}\n",
|
||
"\n",
|
||
"# Поиск гиперпараметров\n",
|
||
"grid_search_reg_rf = GridSearchCV(reg_pipeline_rf, param_grid_reg_rf, cv=5, scoring='r2')\n",
|
||
"grid_search_reg_rf.fit(X_train, y_reg_train)\n",
|
||
"\n",
|
||
"grid_search_reg_sgd = GridSearchCV(reg_pipeline_sgd, param_grid_reg_sgd, cv=5, scoring='r2')\n",
|
||
"grid_search_reg_sgd.fit(X_train, y_reg_train)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Regression Metrics for Random Forest:\n",
|
||
"Mean Squared Error: 0.0\n",
|
||
"R2 Score: 1.0\n",
|
||
"Regression Metrics for SGD:\n",
|
||
"Mean Squared Error: 4011897878459.718\n",
|
||
"R2 Score: -20174462396433.535\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Оценка моделей\n",
|
||
"y_reg_pred_rf = grid_search_reg_rf.predict(X_test)\n",
|
||
"y_reg_pred_sgd = grid_search_reg_sgd.predict(X_test)\n",
|
||
"\n",
|
||
"print(\"Regression Metrics for Random Forest:\")\n",
|
||
"print(\"Mean Squared Error:\", mean_squared_error(y_reg_test, y_reg_pred_rf))\n",
|
||
"print(\"R2 Score:\", r2_score(y_reg_test, y_reg_pred_rf))\n",
|
||
"\n",
|
||
"print(\"Regression Metrics for SGD:\")\n",
|
||
"print(\"Mean Squared Error:\", mean_squared_error(y_reg_test, y_reg_pred_sgd))\n",
|
||
"print(\"R2 Score:\", r2_score(y_reg_test, y_reg_pred_sgd))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Regression Bias (Random Forest): 0.0\n",
|
||
"Regression Variance (Random Forest): 0.0\n",
|
||
"Regression Bias (SGD): 4382665100501.0005\n",
|
||
"Regression Variance (SGD): 1.1089741676829076e-15\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Оценка смещения и дисперсии\n",
|
||
"bias_reg_rf, variance_reg_rf = estimate_bias_variance(grid_search_reg_rf.best_estimator_, X_train, y_reg_train)\n",
|
||
"bias_reg_sgd, variance_reg_sgd = estimate_bias_variance(grid_search_reg_sgd.best_estimator_, X_train, y_reg_train)\n",
|
||
"\n",
|
||
"print(\"Regression Bias (Random Forest):\", bias_reg_rf)\n",
|
||
"print(\"Regression Variance (Random Forest):\", variance_reg_rf)\n",
|
||
"print(\"Regression Bias (SGD):\", bias_reg_sgd)\n",
|
||
"print(\"Regression Variance (SGD):\", variance_reg_sgd)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": ".venv",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.5"
|
||
},
|
||
"orig_nbformat": 4
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|