еще изменения не все залилось почему то??
This commit is contained in:
parent
760d359e84
commit
beb0ee0ab8
123
lab_4/lab4.ipynb
123
lab_4/lab4.ipynb
@ -586,39 +586,12 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Проведем деление на выборки и создание ориентира"
|
"Проведем деление на выборки"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 63,
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Размер обучающей выборки: (743, 16)\n",
|
|
||||||
"Размер тестовой выборки: (186, 16)\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sklearn.model_selection import train_test_split\n",
|
|
||||||
"\n",
|
|
||||||
"features = ['BMI', 'Smoking', 'AlcoholDrinking', 'Stroke', 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
|
|
||||||
"target = 'HeartDisease'\n",
|
|
||||||
"\n",
|
|
||||||
"X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"Размер обучающей выборки:\", X_train.shape)\n",
|
|
||||||
"print(\"Размер тестовой выборки:\", X_test.shape)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -630,7 +603,60 @@
|
|||||||
"1 796\n",
|
"1 796\n",
|
||||||
"Name: count, dtype: int64\n",
|
"Name: count, dtype: int64\n",
|
||||||
"Размер обучающей выборки: (1273, 49)\n",
|
"Размер обучающей выборки: (1273, 49)\n",
|
||||||
"Размер тестовой выборки: (319, 49)\n",
|
"Размер тестовой выборки: (319, 49)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"\n",
|
||||||
|
"features = ['BMI', 'Smoking', 'AlcoholDrinking', 'Stroke', 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
|
||||||
|
"target = 'HeartDisease'\n",
|
||||||
|
"\n",
|
||||||
|
"label_encoder = LabelEncoder()\n",
|
||||||
|
"data[target] = label_encoder.fit_transform(data[target])\n",
|
||||||
|
"\n",
|
||||||
|
"categorical_features = ['Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
|
||||||
|
"numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth']\n",
|
||||||
|
"\n",
|
||||||
|
"categorical_transformer = Pipeline(steps=[\n",
|
||||||
|
" ('onehot', OneHotEncoder(handle_unknown='ignore'))\n",
|
||||||
|
"])\n",
|
||||||
|
"\n",
|
||||||
|
"numeric_transformer = Pipeline(steps=[\n",
|
||||||
|
" ('scaler', StandardScaler())\n",
|
||||||
|
"])\n",
|
||||||
|
"\n",
|
||||||
|
"preprocessor = ColumnTransformer(\n",
|
||||||
|
" transformers=[\n",
|
||||||
|
" ('num', numeric_transformer, numeric_features),\n",
|
||||||
|
" ('cat', categorical_transformer, categorical_features)\n",
|
||||||
|
" ])\n",
|
||||||
|
"\n",
|
||||||
|
"X = preprocessor.fit_transform(data[features])\n",
|
||||||
|
"y = data[target]\n",
|
||||||
|
"\n",
|
||||||
|
"smote = SMOTE(random_state=42)\n",
|
||||||
|
"X_resampled, y_resampled = smote.fit_resample(X, y)\n",
|
||||||
|
"\n",
|
||||||
|
"print(pd.Series(y_resampled).value_counts())\n",
|
||||||
|
"\n",
|
||||||
|
"X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Размер обучающей выборки:\", X_train.shape)\n",
|
||||||
|
"print(\"Размер тестовой выборки:\", X_test.shape)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 64,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
"Лучшие гиперпараметры для логистической регрессии:\n",
|
"Лучшие гиперпараметры для логистической регрессии:\n",
|
||||||
"{'classifier__C': np.float64(0.7272998688284025), 'classifier__penalty': 'l1', 'classifier__solver': 'liblinear'}\n",
|
"{'classifier__C': np.float64(0.7272998688284025), 'classifier__penalty': 'l1', 'classifier__solver': 'liblinear'}\n",
|
||||||
"Accuracy: 0.7398\n",
|
"Accuracy: 0.7398\n",
|
||||||
@ -639,14 +665,14 @@
|
|||||||
"F1-Score: 0.7398\n",
|
"F1-Score: 0.7398\n",
|
||||||
"ROC-AUC: 0.8338\n",
|
"ROC-AUC: 0.8338\n",
|
||||||
"Лучшие гиперпараметры для случайного леса:\n",
|
"Лучшие гиперпараметры для случайного леса:\n",
|
||||||
"{'classifier__bootstrap': True, 'classifier__max_depth': np.int64(32), 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 6, 'classifier__n_estimators': 317}\n",
|
"{'classifier__bootstrap': True, 'classifier__max_depth': np.int64(25), 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 6, 'classifier__n_estimators': 317}\n",
|
||||||
"Accuracy: 0.9122\n",
|
"Accuracy: 0.9122\n",
|
||||||
"Precision: 0.9571\n",
|
"Precision: 0.9571\n",
|
||||||
"Recall: 0.8590\n",
|
"Recall: 0.8590\n",
|
||||||
"F1-Score: 0.9054\n",
|
"F1-Score: 0.9054\n",
|
||||||
"ROC-AUC: 0.9773\n",
|
"ROC-AUC: 0.9773\n",
|
||||||
"Лучшие гиперпараметры для градиентного бустинга:\n",
|
"Лучшие гиперпараметры для градиентного бустинга:\n",
|
||||||
"{'classifier__learning_rate': np.float64(0.17269984907963387), 'classifier__max_depth': np.int64(96), 'classifier__min_samples_leaf': 8, 'classifier__min_samples_split': 8, 'classifier__n_estimators': 294, 'classifier__subsample': np.float64(0.8288064461501716)}\n",
|
"{'classifier__learning_rate': np.float64(0.17269984907963387), 'classifier__max_depth': np.int64(52), 'classifier__min_samples_leaf': 8, 'classifier__min_samples_split': 8, 'classifier__n_estimators': 294, 'classifier__subsample': np.float64(0.8288064461501716)}\n",
|
||||||
"Accuracy: 0.9185\n",
|
"Accuracy: 0.9185\n",
|
||||||
"Precision: 0.9577\n",
|
"Precision: 0.9577\n",
|
||||||
"Recall: 0.8718\n",
|
"Recall: 0.8718\n",
|
||||||
@ -692,39 +718,6 @@
|
|||||||
"from scipy.stats import uniform, randint\n",
|
"from scipy.stats import uniform, randint\n",
|
||||||
"from sklearn.model_selection import RandomizedSearchCV\n",
|
"from sklearn.model_selection import RandomizedSearchCV\n",
|
||||||
"\n",
|
"\n",
|
||||||
"features = ['BMI', 'Smoking', 'AlcoholDrinking', 'Stroke', 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
|
|
||||||
"target = 'HeartDisease'\n",
|
|
||||||
"\n",
|
|
||||||
"label_encoder = LabelEncoder()\n",
|
|
||||||
"data[target] = label_encoder.fit_transform(data[target])\n",
|
|
||||||
"\n",
|
|
||||||
"categorical_features = ['Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
|
|
||||||
"numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth']\n",
|
|
||||||
"\n",
|
|
||||||
"categorical_transformer = Pipeline(steps=[\n",
|
|
||||||
" ('onehot', OneHotEncoder(handle_unknown='ignore'))\n",
|
|
||||||
"])\n",
|
|
||||||
"\n",
|
|
||||||
"numeric_transformer = Pipeline(steps=[\n",
|
|
||||||
" ('scaler', StandardScaler())\n",
|
|
||||||
"])\n",
|
|
||||||
"\n",
|
|
||||||
"preprocessor = ColumnTransformer(\n",
|
|
||||||
" transformers=[\n",
|
|
||||||
" ('num', numeric_transformer, numeric_features),\n",
|
|
||||||
" ('cat', categorical_transformer, categorical_features)\n",
|
|
||||||
" ])\n",
|
|
||||||
"\n",
|
|
||||||
"X = preprocessor.fit_transform(data[features])\n",
|
|
||||||
"y = data[target]\n",
|
|
||||||
"\n",
|
|
||||||
"smote = SMOTE(random_state=42)\n",
|
|
||||||
"X_resampled, y_resampled = smote.fit_resample(X, y)\n",
|
|
||||||
"\n",
|
|
||||||
"print(pd.Series(y_resampled).value_counts())\n",
|
|
||||||
"\n",
|
|
||||||
"X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)\n",
|
|
||||||
"\n",
|
|
||||||
"def evaluate_model(model, X_test, y_test):\n",
|
"def evaluate_model(model, X_test, y_test):\n",
|
||||||
" y_pred = model.predict(X_test)\n",
|
" y_pred = model.predict(X_test)\n",
|
||||||
" y_pred_proba = model.predict_proba(X_test)[:, 1]\n",
|
" y_pred_proba = model.predict_proba(X_test)[:, 1]\n",
|
||||||
|
Loading…
Reference in New Issue
Block a user