DavidMakarov/AIM-PIbd-31-Makarov-DV

Fork 0

shirotame c5a7faf923 1 business goal of 2

2024-11-15 00:44:23 +04:00

207 KiB

Raw Blame History

Лабораторная 4¶

Датасет: Информация об онлайн обучении учеников

Бизнес-цель 1: Улучшение доступа к онлайн-образованию для учеников с низким уровнем финансового обеспечения.

In [104]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Tuple
from pandas import DataFrame
from sklearn import ensemble, linear_model, naive_bayes, neighbors, neural_network, tree, metrics, set_config
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.discriminant_analysis import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.discriminant_analysis import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.model_selection import GridSearchCV

set_config(transform_output="pandas")
df = pd.read_csv("..\\static\\csv\\students_adaptability_level_online_education.csv")
print(df.columns)

map_flexibility_to_int = {'Low': 0, 'Moderate': 1, 'High': 2}

df['Flexibility Level'] = df['Flexibility Level'].map(map_flexibility_to_int).astype('int32')

Index(['Education Level', 'Institution Type', 'Gender', 'Age', 'Device',
       'IT Student', 'Location', 'Financial Condition', 'Internet Type',
       'Network Type', 'Flexibility Level'],
      dtype='object')

Предварительно создадим колонку для работы с ней (ключевой фактор)

In [105]:

fincond_mapping = {'Poor': 2, 'Mid': 1, 'Rich': 0}
internet_type_mapping = {'Mobile Data': 1, 'Wifi': 0}
device_mapping = {'Mobile': 1, 'Computer': 0}
network_type = {'2G': 2, '3G': 1, '4G': 0}

df['Financial Score'] = df['Financial Condition'].map(fincond_mapping)
df['Internet Score'] = df['Internet Type'].map(internet_type_mapping)
df['Device Score'] = df['Device'].map(device_mapping)
df['Network Score'] = df['Network Type'].map(network_type)

df['Access Difficulty Score'] = df['Financial Score'] + df['Internet Score'] + df['Device Score'] + df['Network Score']

df['Access Difficulty'] = (df['Access Difficulty Score'] >= 3).astype(int)
df.drop(columns=['Financial Score', 'Device Score', 'Internet Score', 'Network Score', 'Access Difficulty Score'], inplace=True)

Формируем выборки

In [106]:

def split_stratified_into_train_val_test(
    df_input,
    stratify_colname="y",
    frac_train=0.6,
    frac_val=0.15,
    frac_test=0.25,
    random_state=None,
) -> Tuple[DataFrame, DataFrame, DataFrame, DataFrame, DataFrame, DataFrame]:
   
    if frac_train + frac_val + frac_test != 1.0:
        raise ValueError(
            "fractions %f, %f, %f do not add up to 1.0"
            % (frac_train, frac_val, frac_test)
        )
    if stratify_colname not in df_input.columns:
        raise ValueError("%s is not a column in the dataframe" % (stratify_colname))
    X = df_input
    y = df_input[
        [stratify_colname]
    ]
    df_train, df_temp, y_train, y_temp = train_test_split(
        X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state
    )
    if frac_val <= 0:
        assert len(df_input) == len(df_train) + len(df_temp)
        return df_train, pd.DataFrame(), df_temp, y_train, pd.DataFrame(), y_temp
    
    relative_frac_test = frac_test / (frac_val + frac_test)
    df_val, df_test, y_val, y_test = train_test_split(
        df_temp,
        y_temp,
        stratify=y_temp,
        test_size=relative_frac_test,
        random_state=random_state,
    )
    assert len(df_input) == len(df_train) + len(df_val) + len(df_test)
    return df_train, df_val, df_test, y_train, y_val, y_test

X_train, X_val, X_test, y_train, y_val, y_test = split_stratified_into_train_val_test(
    df, stratify_colname="Access Difficulty", frac_train=0.80, frac_val=0, frac_test=0.20, random_state=9
)

display("X_train", X_train)
display("y_train", y_train)

display("X_test", X_test)
display("y_test", y_test)

'X_train'

	Education Level	Institution Type	Gender	Age	Device	IT Student	Location	Financial Condition	Internet Type	Network Type	Flexibility Level	Access Difficulty
649	School	Public	Male	18	Mobile	No	Town	Mid	Wifi	4G	1	0
637	School	Private	Female	9	Mobile	No	Town	Mid	Mobile Data	4G	1	1
68	School	Public	Female	11	Mobile	No	Town	Mid	Wifi	4G	0	0
276	University	Private	Female	18	Mobile	Yes	Town	Mid	Mobile Data	3G	0	1
547	School	Public	Male	11	Mobile	No	Town	Mid	Wifi	4G	1	0
...	...	...	...	...	...	...	...	...	...	...	...	...
1097	University	Private	Male	23	Mobile	Yes	Town	Rich	Wifi	4G	0	0
854	School	Public	Female	18	Mobile	No	Town	Mid	Mobile Data	4G	0	1
756	University	Public	Male	18	Computer	No	Town	Mid	Wifi	3G	1	0
133	College	Public	Male	18	Mobile	No	Town	Poor	Mobile Data	4G	0	1
53	University	Public	Male	27	Mobile	Yes	Rural	Poor	Mobile Data	4G	1	1

964 rows × 12 columns

'y_train'

	Access Difficulty
649	0
637	1
68	0
276	1
547	0
...	...
1097	0
854	1
756	0
133	1
53	1

964 rows × 1 columns

'X_test'

	Education Level	Institution Type	Gender	Age	Device	IT Student	Location	Financial Condition	Internet Type	Network Type	Flexibility Level	Access Difficulty
265	School	Private	Female	9	Mobile	No	Town	Poor	Wifi	4G	1	1
358	School	Private	Female	10	Mobile	No	Town	Mid	Mobile Data	3G	1	1
316	University	Private	Male	23	Tab	No	Town	Mid	Wifi	4G	1	0
907	School	Private	Female	9	Mobile	No	Town	Poor	Mobile Data	4G	1	1
1042	University	Private	Male	23	Mobile	No	Town	Mid	Mobile Data	3G	1	1
...	...	...	...	...	...	...	...	...	...	...	...	...
421	School	Private	Female	10	Mobile	No	Town	Mid	Mobile Data	3G	1	1
936	University	Private	Male	23	Tab	No	Town	Rich	Wifi	4G	2	0
722	University	Private	Male	23	Mobile	Yes	Rural	Poor	Mobile Data	3G	1	1
1075	University	Private	Male	23	Computer	Yes	Town	Mid	Wifi	4G	0	0
577	University	Private	Male	23	Mobile	Yes	Town	Mid	Wifi	4G	0	0

241 rows × 12 columns

'y_test'

	Access Difficulty
265	1
358	1
316	0
907	1
1042	1
...	...
421	1
936	0
722	1
1075	0
577	0

241 rows × 1 columns

In [107]:

null_values = df.isnull().sum()
print("Пропущенные значения по столбцам:")
print(null_values)

stat_summary = df.describe()
print("\nСтатистический обзор данных:")
print(stat_summary)

Пропущенные значения по столбцам:
Education Level        0
Institution Type       0
Gender                 0
Age                    0
Device                 0
IT Student             0
Location               0
Financial Condition    0
Internet Type          0
Network Type           0
Flexibility Level      0
Access Difficulty      0
dtype: int64

Статистический обзор данных:
               Age  Flexibility Level  Access Difficulty
count  1205.000000        1205.000000        1205.000000
mean     17.065560           0.684647           0.624896
std       5.830369           0.618221           0.484351
min       9.000000           0.000000           0.000000
25%      11.000000           0.000000           0.000000
50%      18.000000           1.000000           1.000000
75%      23.000000           1.000000           1.000000
max      27.000000           2.000000           1.000000

Формируем конвеер для классификации данных и проверка конвеера

In [108]:

columns_to_drop = ['Age', 'Education Level', 'Gender', 'IT Student', 'Flexibility Level']
num_columns = [
    column
    for column in df.columns
    if column not in columns_to_drop and df[column].dtype != "object"
]
cat_columns = [
    column
    for column in df.columns
    if column not in columns_to_drop and df[column].dtype == "object"
]

num_imputer = SimpleImputer(strategy="median")
num_scaler = StandardScaler()
preprocessing_num = Pipeline(
    [
        ("imputer", num_imputer),
        ("scaler", num_scaler),
    ]
)

cat_imputer = SimpleImputer(strategy="constant", fill_value="unknown")
cat_encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False, drop="first")
preprocessing_cat = Pipeline(
    [
        ("imputer", cat_imputer),
        ("encoder", cat_encoder),
    ]
)

features_preprocessing = ColumnTransformer(
    verbose_feature_names_out=False,
    transformers=[
        ("prepocessing_num", preprocessing_num, num_columns),
        ("prepocessing_cat", preprocessing_cat, cat_columns),
    ],
    remainder="passthrough"
)

drop_columns = ColumnTransformer(
    verbose_feature_names_out=False,
    transformers=[
        ("drop_columns", "drop", columns_to_drop),
    ],
    remainder="passthrough",
)


pipeline_end = Pipeline(
    [
        ("features_preprocessing", features_preprocessing),
        ("drop_columns", drop_columns),
    ]
)

preprocessing_result = pipeline_end.fit_transform(X_train)
preprocessed_df = pd.DataFrame(
    preprocessing_result,
    columns=pipeline_end.get_feature_names_out(),
)

preprocessed_df

Out[108]:

	Access Difficulty	Institution Type_Public	Device_Mobile	Device_Tab	Location_Town	Financial Condition_Poor	Financial Condition_Rich	Internet Type_Wifi	Network Type_3G	Network Type_4G
649	-1.289567	1.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0
637	0.775454	0.0	1.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0
68	-1.289567	1.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0
276	0.775454	0.0	1.0	0.0	1.0	0.0	0.0	0.0	1.0	0.0
547	-1.289567	1.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	1.0
...	...	...	...	...	...	...	...	...	...	...
1097	-1.289567	0.0	1.0	0.0	1.0	0.0	1.0	1.0	0.0	1.0
854	0.775454	1.0	1.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0
756	-1.289567	1.0	0.0	0.0	1.0	0.0	0.0	1.0	1.0	0.0
133	0.775454	1.0	1.0	0.0	1.0	1.0	0.0	0.0	0.0	1.0
53	0.775454	1.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0

964 rows × 10 columns

Формируем набор моделей

In [109]:

class_models = {
    "logistic": {"model": linear_model.LogisticRegression()},
    "ridge": {"model": linear_model.LogisticRegression(penalty="l2", class_weight="balanced")},
    "decision_tree": {
        "model": tree.DecisionTreeClassifier(max_depth=7, random_state=9)
    },
    "knn": {"model": neighbors.KNeighborsClassifier(n_neighbors=7)},
    "naive_bayes": {"model": naive_bayes.GaussianNB()},
    "gradient_boosting": {
        "model": ensemble.GradientBoostingClassifier(n_estimators=210)
    },
    "random_forest": {
        "model": ensemble.RandomForestClassifier(
            max_depth=11, class_weight="balanced", random_state=9
        )
    },
    "mlp": {
        "model": neural_network.MLPClassifier(
            hidden_layer_sizes=(7,),
            max_iter=500,
            early_stopping=True,
            random_state=9,
        )
    },
}

Обучаем модели и тестируем их

In [110]:

for model_name in class_models.keys():
    print(f"Model: {model_name}")
    model = class_models[model_name]["model"]

    model_pipeline = Pipeline([("pipeline", pipeline_end), ("model", model)])
    model_pipeline = model_pipeline.fit(X_train, y_train.values.ravel())

    y_train_predict = model_pipeline.predict(X_train)
    y_test_probs = model_pipeline.predict_proba(X_test)[:, 1]
    y_test_predict = np.where(y_test_probs > 0.5, 1, 0)

    class_models[model_name]["pipeline"] = model_pipeline
    class_models[model_name]["probs"] = y_test_probs
    class_models[model_name]["preds"] = y_test_predict

    class_models[model_name]["Precision_train"] = metrics.precision_score(
        y_train, y_train_predict
    )
    class_models[model_name]["Precision_test"] = metrics.precision_score(
        y_test, y_test_predict
    )
    class_models[model_name]["Recall_train"] = metrics.recall_score(
        y_train, y_train_predict
    )
    class_models[model_name]["Recall_test"] = metrics.recall_score(
        y_test, y_test_predict
    )
    class_models[model_name]["Accuracy_train"] = metrics.accuracy_score(
        y_train, y_train_predict
    )
    class_models[model_name]["Accuracy_test"] = metrics.accuracy_score(
        y_test, y_test_predict
    )
    class_models[model_name]["ROC_AUC_test"] = metrics.roc_auc_score(
        y_test, y_test_probs
    )
    class_models[model_name]["F1_train"] = metrics.f1_score(y_train, y_train_predict, average=None)
    class_models[model_name]["F1_test"] = metrics.f1_score(y_test, y_test_predict, average=None)
    class_models[model_name]["MCC_test"] = metrics.matthews_corrcoef(
        y_test, y_test_predict
    )
    class_models[model_name]["Cohen_kappa_test"] = metrics.cohen_kappa_score(
        y_test, y_test_predict
    )
    class_models[model_name]["Confusion_matrix"] = metrics.confusion_matrix(
        y_test, y_test_predict
    )

Model: logistic
Model: ridge
Model: decision_tree
Model: knn
Model: naive_bayes
Model: gradient_boosting
Model: random_forest
Model: mlp

d:\ulstu\cr3\sem1\MAI\AIM-PIbd-31-Makarov-DV\.venv\Lib\site-packages\sklearn\metrics\_classification.py:1531: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
d:\ulstu\cr3\sem1\MAI\AIM-PIbd-31-Makarov-DV\.venv\Lib\site-packages\sklearn\metrics\_classification.py:1531: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

In [111]:

_, ax = plt.subplots(int(len(class_models) / 2), 2, figsize=(12, 10), sharex=False, sharey=False)
for index, key in enumerate(class_models.keys()):
    c_matrix = class_models[key]["Confusion_matrix"]
    disp = ConfusionMatrixDisplay(
        confusion_matrix=c_matrix, display_labels=["Low dif-ty", "High dif-ty"]
    ).plot(ax=ax.flat[index])
    disp.ax_.set_title(key)

plt.subplots_adjust(top=1, bottom=0, hspace=0.4, wspace=0.1)
plt.show()

No description has been provided for this image

Точность, полнота, верность (аккуратность), F-мера

In [112]:

class_metrics = pd.DataFrame.from_dict(class_models, "index")[
    [
        "Precision_train",
        "Precision_test",
        "Recall_train",
        "Recall_test",
        "Accuracy_train",
        "Accuracy_test",
        "F1_train",
        "F1_test",
    ]
]
class_metrics.sort_values(
    by="Accuracy_test", ascending=False
)

Out[112]:

	Precision_train	Precision_test	Recall_train	Recall_test	Accuracy_train	Accuracy_test	F1_train	F1_test
logistic	1.0	1.0	1.0	1.0	1.000000	1.000000	[1.0, 1.0]	[1.0, 1.0]
ridge	1.0	1.0	1.0	1.0	1.000000	1.000000	[1.0, 1.0]	[1.0, 1.0]
decision_tree	1.0	1.0	1.0	1.0	1.000000	1.000000	[1.0, 1.0]	[1.0, 1.0]
knn	1.0	1.0	1.0	1.0	1.000000	1.000000	[1.0, 1.0]	[1.0, 1.0]
naive_bayes	1.0	1.0	1.0	1.0	1.000000	1.000000	[1.0, 1.0]	[1.0, 1.0]
gradient_boosting	1.0	1.0	1.0	1.0	1.000000	1.000000	[1.0, 1.0]	[1.0, 1.0]
random_forest	1.0	1.0	1.0	1.0	1.000000	1.000000	[1.0, 1.0]	[1.0, 1.0]
mlp	0.0	0.0	0.0	0.0	0.375519	0.373444	[0.5460030165912518, 0.0]	[0.5438066465256798, 0.0]

ROC-кривая, каппа Коэна, коэффициент корреляции Мэтьюса

In [113]:

class_metrics = pd.DataFrame.from_dict(class_models, "index")[
    [
        "Accuracy_test",
        "F1_test",
        "ROC_AUC_test",
        "Cohen_kappa_test",
        "MCC_test",
    ]
]
class_metrics.sort_values(by="ROC_AUC_test", ascending=False)

Out[113]:

	Accuracy_test	F1_test	ROC_AUC_test	Cohen_kappa_test	MCC_test
logistic	1.000000	[1.0, 1.0]	1.000000	1.0	1.0
ridge	1.000000	[1.0, 1.0]	1.000000	1.0	1.0
decision_tree	1.000000	[1.0, 1.0]	1.000000	1.0	1.0
knn	1.000000	[1.0, 1.0]	1.000000	1.0	1.0
naive_bayes	1.000000	[1.0, 1.0]	1.000000	1.0	1.0
gradient_boosting	1.000000	[1.0, 1.0]	1.000000	1.0	1.0
random_forest	1.000000	[1.0, 1.0]	1.000000	1.0	1.0
mlp	0.373444	[0.5438066465256798, 0.0]	0.068065	0.0	0.0

Лучшая модель

In [114]:

best_model = str(class_metrics.sort_values(by="MCC_test", ascending=False).iloc[0].name)

display(best_model)

'logistic'

Находим ошибки

In [115]:

preprocessing_result = pipeline_end.transform(X_test)
preprocessed_df = pd.DataFrame(
    preprocessing_result,
    columns=pipeline_end.get_feature_names_out(),
)

y_pred = class_models[best_model]["preds"]

error_index = y_test[y_test["Access Difficulty"] != y_pred].index.tolist()
display(f"Error items count: {len(error_index)}")

error_predicted = pd.Series(y_pred, index=y_test.index).loc[error_index]
error_df = X_test.loc[error_index].copy()
error_df.insert(loc=1, column="Predicted", value=error_predicted)
error_df.sort_index()

'Error items count: 0'

Out[115]:

	Education Level	Predicted	Institution Type	Gender	Age	Device	IT Student	Location	Financial Condition	Internet Type	Network Type	Flexibility Level	Access Difficulty

Пример использования модели (конвейера) для предсказания

In [116]:

model = class_models[best_model]["pipeline"]

example_id = 450
test = pd.DataFrame(X_test.loc[example_id, :]).T
test_preprocessed = pd.DataFrame(preprocessed_df.loc[example_id, :]).T
display(test)
display(test_preprocessed)
result_proba = model.predict_proba(test)[0]
result = model.predict(test)[0]
real = int(y_test.loc[example_id].values[0])
display(f"predicted: {result} (proba: {result_proba})")
display(f"real: {real}")

	Education Level	Institution Type	Gender	Age	Device	IT Student	Location	Financial Condition	Internet Type	Network Type	Flexibility Level	Access Difficulty
450	School	Private	Female	11	Mobile	No	Town	Poor	Mobile Data	4G	1	1

	Access Difficulty	Institution Type_Public	Device_Mobile	Device_Tab	Location_Town	Financial Condition_Poor	Financial Condition_Rich	Internet Type_Wifi	Network Type_3G	Network Type_4G
450	0.775454	0.0	1.0	0.0	1.0	1.0	0.0	0.0	0.0	1.0

'predicted: 1 (proba: [0.00310819 0.99689181])'

'real: 1'

Создаем гиперпараметры методом поиска по сетке.

In [121]:

optimized_model_type = 'random_forest'
random_state = 9

random_forest_model = class_models[optimized_model_type]["pipeline"]

param_grid = {
    "model__n_estimators": [10, 20, 30, 40, 50, 100, 150, 200, 250, 500],
    "model__max_features": ["sqrt", "log2", 2],
    "model__max_depth": [2, 3, 4, 5, 6, 7, 8, 9 ,10],
    "model__criterion": ["gini", "entropy", "log_loss"],
}

gs_optomizer = GridSearchCV(
    estimator=random_forest_model, param_grid=param_grid, n_jobs=-1
)
gs_optomizer.fit(X_train, y_train.values.ravel())
gs_optomizer.best_params_

Out[121]:

{'model__criterion': 'gini',
 'model__max_depth': 2,
 'model__max_features': 'sqrt',
 'model__n_estimators': 10}

Обучение модели с новыми гиперпараметрами

In [122]:

optimized_model = ensemble.RandomForestClassifier(
    random_state=random_state,
    criterion="gini",
    max_depth=2,
    max_features="sqrt",
    n_estimators=10,
)

result = {}

result["pipeline"] = Pipeline([("pipeline", pipeline_end), ("model", optimized_model)]).fit(X_train, y_train.values.ravel())
result["train_preds"] = result["pipeline"].predict(X_train)
result["probs"] = result["pipeline"].predict_proba(X_test)[:, 1]
result["preds"] = np.where(result["probs"] > 0.5, 1, 0)

result["Precision_train"] = metrics.precision_score(y_train, result["train_preds"])
result["Precision_test"] = metrics.precision_score(y_test, result["preds"])
result["Recall_train"] = metrics.recall_score(y_train, result["train_preds"])
result["Recall_test"] = metrics.recall_score(y_test, result["preds"])
result["Accuracy_train"] = metrics.accuracy_score(y_train, result["train_preds"])
result["Accuracy_test"] = metrics.accuracy_score(y_test, result["preds"])
result["ROC_AUC_test"] = metrics.roc_auc_score(y_test, result["probs"])
result["F1_train"] = metrics.f1_score(y_train, result["train_preds"])
result["F1_test"] = metrics.f1_score(y_test, result["preds"])
result["MCC_test"] = metrics.matthews_corrcoef(y_test, result["preds"])
result["Cohen_kappa_test"] = metrics.cohen_kappa_score(y_test, result["preds"])
result["Confusion_matrix"] = metrics.confusion_matrix(y_test, result["preds"])

Формирование данных для оценки старой и новой версии модели и сама оценка данных

In [124]:

optimized_metrics = pd.DataFrame(columns=list(result.keys()))
optimized_metrics.loc[len(optimized_metrics)] = pd.Series(
    data=class_models[optimized_model_type]
)
optimized_metrics.loc[len(optimized_metrics)] = pd.Series(
    data=result
)
optimized_metrics.insert(loc=0, column="Name", value=["Old", "New"])
optimized_metrics = optimized_metrics.set_index("Name")

optimized_metrics[
    [
        "Precision_train",
        "Precision_test",
        "Recall_train",
        "Recall_test",
        "Accuracy_train",
        "Accuracy_test",
        "F1_train",
        "F1_test",
    ]
]

Out[124]:

	Precision_train	Precision_test	Recall_train	Recall_test	Accuracy_train	Accuracy_test	F1_train	F1_test
Name
Old	1.0	1.0	1.0	1.0	1.0	1.0	[1.0, 1.0]	[1.0, 1.0]
New	1.0	1.0	1.0	1.0	1.0	1.0	1.0	1.0

In [125]:

optimized_metrics[
    [
        "Accuracy_test",
        "F1_test",
        "ROC_AUC_test",
        "Cohen_kappa_test",
        "MCC_test",
    ]
]

Out[125]:

	Accuracy_test	F1_test	ROC_AUC_test	Cohen_kappa_test	MCC_test
Name
Old	1.0	[1.0, 1.0]	1.0	1.0	1.0
New	1.0	1.0	1.0	1.0	1.0

In [127]:

_, ax = plt.subplots(1, 2, figsize=(10, 4), sharex=False, sharey=False
)

for index in range(0, len(optimized_metrics)):
    c_matrix = optimized_metrics.iloc[index]["Confusion_matrix"]
    disp = ConfusionMatrixDisplay(
        confusion_matrix=c_matrix, display_labels=["Low dif-ty", "High dif-ty"]
    ).plot(ax=ax.flat[index])

plt.subplots_adjust(top=1, bottom=0, hspace=0.4, wspace=0.3)
plt.show()

Модель идеально классифицировала объекты, которые относятся к "High difficulty" и "Low difficulty".

207 KiB Raw Blame History Unescape Escape

Лабораторная 4¶

207 KiB

Raw Blame History