512 lines
19 KiB
Plaintext
512 lines
19 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"attachments": {},
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Вот как можно переделать текст под ваш датасет, учитывая его особенности:\n",
|
|||
|
"\n",
|
|||
|
"**Регрессия**\n",
|
|||
|
"\n",
|
|||
|
"- Прогнозирование цены бриллианта:\n",
|
|||
|
" Цель: Используя такие параметры, как караты, огранка, цвет, чистота, глубина, таблица, размеры (x, y, z), можно предсказать цену бриллиантов.\n",
|
|||
|
"\n",
|
|||
|
"**Классификация**\n",
|
|||
|
"\n",
|
|||
|
"- Распределение бриллиантов по категориям чистоты:\n",
|
|||
|
" Цель: Распределить бриллианты по различным категориям чистоты (например, IF, VVS1, VVS2 и т.д.) с использованием данных о каратах, огранке, цвете, глубине, таблице и размерах."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"metadata": {
|
|||
|
"ExecuteTime": {
|
|||
|
"end_time": "2025-01-19T15:30:36.844706Z",
|
|||
|
"start_time": "2025-01-19T15:30:36.697706Z"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn import set_config\n",
|
|||
|
"\n",
|
|||
|
"set_config(transform_output=\"pandas\")\n",
|
|||
|
"\n",
|
|||
|
"random_state = 9\n",
|
|||
|
"\n",
|
|||
|
"file_path = 'data/Diamonds Prices2022.csv'\n",
|
|||
|
"df = pd.read_csv(file_path)\n",
|
|||
|
"\n",
|
|||
|
"# Функция для преобразования типа огранки (cut)\n",
|
|||
|
"def Cut_Type(value):\n",
|
|||
|
" if value == \"Fair\":\n",
|
|||
|
" return 0\n",
|
|||
|
" elif value == \"Good\":\n",
|
|||
|
" return 1\n",
|
|||
|
" elif value == \"Very Good\":\n",
|
|||
|
" return 2\n",
|
|||
|
" elif value == \"Premium\":\n",
|
|||
|
" return 3\n",
|
|||
|
" elif value == \"Ideal\":\n",
|
|||
|
" return 4\n",
|
|||
|
"\n",
|
|||
|
"df['Cut_Type'] = df['cut'].map(Cut_Type)\n",
|
|||
|
"\n",
|
|||
|
"df\n"
|
|||
|
],
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
" Unnamed: 0 carat cut color clarity depth table price x \\\n",
|
|||
|
"0 1 0.23 Ideal E SI2 61.5 55.0 326 3.95 \n",
|
|||
|
"1 2 0.21 Premium E SI1 59.8 61.0 326 3.89 \n",
|
|||
|
"2 3 0.23 Good E VS1 56.9 65.0 327 4.05 \n",
|
|||
|
"3 4 0.29 Premium I VS2 62.4 58.0 334 4.20 \n",
|
|||
|
"4 5 0.31 Good J SI2 63.3 58.0 335 4.34 \n",
|
|||
|
"... ... ... ... ... ... ... ... ... ... \n",
|
|||
|
"53938 53939 0.86 Premium H SI2 61.0 58.0 2757 6.15 \n",
|
|||
|
"53939 53940 0.75 Ideal D SI2 62.2 55.0 2757 5.83 \n",
|
|||
|
"53940 53941 0.71 Premium E SI1 60.5 55.0 2756 5.79 \n",
|
|||
|
"53941 53942 0.71 Premium F SI1 59.8 62.0 2756 5.74 \n",
|
|||
|
"53942 53943 0.70 Very Good E VS2 60.5 59.0 2757 5.71 \n",
|
|||
|
"\n",
|
|||
|
" y z Cut_Type \n",
|
|||
|
"0 3.98 2.43 4 \n",
|
|||
|
"1 3.84 2.31 3 \n",
|
|||
|
"2 4.07 2.31 1 \n",
|
|||
|
"3 4.23 2.63 3 \n",
|
|||
|
"4 4.35 2.75 1 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"53938 6.12 3.74 3 \n",
|
|||
|
"53939 5.87 3.64 4 \n",
|
|||
|
"53940 5.74 3.49 3 \n",
|
|||
|
"53941 5.73 3.43 3 \n",
|
|||
|
"53942 5.76 3.47 2 \n",
|
|||
|
"\n",
|
|||
|
"[53943 rows x 12 columns]"
|
|||
|
],
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Unnamed: 0</th>\n",
|
|||
|
" <th>carat</th>\n",
|
|||
|
" <th>cut</th>\n",
|
|||
|
" <th>color</th>\n",
|
|||
|
" <th>clarity</th>\n",
|
|||
|
" <th>depth</th>\n",
|
|||
|
" <th>table</th>\n",
|
|||
|
" <th>price</th>\n",
|
|||
|
" <th>x</th>\n",
|
|||
|
" <th>y</th>\n",
|
|||
|
" <th>z</th>\n",
|
|||
|
" <th>Cut_Type</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.23</td>\n",
|
|||
|
" <td>Ideal</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>61.5</td>\n",
|
|||
|
" <td>55.0</td>\n",
|
|||
|
" <td>326</td>\n",
|
|||
|
" <td>3.95</td>\n",
|
|||
|
" <td>3.98</td>\n",
|
|||
|
" <td>2.43</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>0.21</td>\n",
|
|||
|
" <td>Premium</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>59.8</td>\n",
|
|||
|
" <td>61.0</td>\n",
|
|||
|
" <td>326</td>\n",
|
|||
|
" <td>3.89</td>\n",
|
|||
|
" <td>3.84</td>\n",
|
|||
|
" <td>2.31</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>0.23</td>\n",
|
|||
|
" <td>Good</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>VS1</td>\n",
|
|||
|
" <td>56.9</td>\n",
|
|||
|
" <td>65.0</td>\n",
|
|||
|
" <td>327</td>\n",
|
|||
|
" <td>4.05</td>\n",
|
|||
|
" <td>4.07</td>\n",
|
|||
|
" <td>2.31</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>0.29</td>\n",
|
|||
|
" <td>Premium</td>\n",
|
|||
|
" <td>I</td>\n",
|
|||
|
" <td>VS2</td>\n",
|
|||
|
" <td>62.4</td>\n",
|
|||
|
" <td>58.0</td>\n",
|
|||
|
" <td>334</td>\n",
|
|||
|
" <td>4.20</td>\n",
|
|||
|
" <td>4.23</td>\n",
|
|||
|
" <td>2.63</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>0.31</td>\n",
|
|||
|
" <td>Good</td>\n",
|
|||
|
" <td>J</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>63.3</td>\n",
|
|||
|
" <td>58.0</td>\n",
|
|||
|
" <td>335</td>\n",
|
|||
|
" <td>4.34</td>\n",
|
|||
|
" <td>4.35</td>\n",
|
|||
|
" <td>2.75</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53938</th>\n",
|
|||
|
" <td>53939</td>\n",
|
|||
|
" <td>0.86</td>\n",
|
|||
|
" <td>Premium</td>\n",
|
|||
|
" <td>H</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>61.0</td>\n",
|
|||
|
" <td>58.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>6.15</td>\n",
|
|||
|
" <td>6.12</td>\n",
|
|||
|
" <td>3.74</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53939</th>\n",
|
|||
|
" <td>53940</td>\n",
|
|||
|
" <td>0.75</td>\n",
|
|||
|
" <td>Ideal</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>SI2</td>\n",
|
|||
|
" <td>62.2</td>\n",
|
|||
|
" <td>55.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.83</td>\n",
|
|||
|
" <td>5.87</td>\n",
|
|||
|
" <td>3.64</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53940</th>\n",
|
|||
|
" <td>53941</td>\n",
|
|||
|
" <td>0.71</td>\n",
|
|||
|
" <td>Premium</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>60.5</td>\n",
|
|||
|
" <td>55.0</td>\n",
|
|||
|
" <td>2756</td>\n",
|
|||
|
" <td>5.79</td>\n",
|
|||
|
" <td>5.74</td>\n",
|
|||
|
" <td>3.49</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53941</th>\n",
|
|||
|
" <td>53942</td>\n",
|
|||
|
" <td>0.71</td>\n",
|
|||
|
" <td>Premium</td>\n",
|
|||
|
" <td>F</td>\n",
|
|||
|
" <td>SI1</td>\n",
|
|||
|
" <td>59.8</td>\n",
|
|||
|
" <td>62.0</td>\n",
|
|||
|
" <td>2756</td>\n",
|
|||
|
" <td>5.74</td>\n",
|
|||
|
" <td>5.73</td>\n",
|
|||
|
" <td>3.43</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>53942</th>\n",
|
|||
|
" <td>53943</td>\n",
|
|||
|
" <td>0.70</td>\n",
|
|||
|
" <td>Very Good</td>\n",
|
|||
|
" <td>E</td>\n",
|
|||
|
" <td>VS2</td>\n",
|
|||
|
" <td>60.5</td>\n",
|
|||
|
" <td>59.0</td>\n",
|
|||
|
" <td>2757</td>\n",
|
|||
|
" <td>5.71</td>\n",
|
|||
|
" <td>5.76</td>\n",
|
|||
|
" <td>3.47</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>53943 rows × 12 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 1,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"execution_count": 1
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"metadata": {
|
|||
|
"ExecuteTime": {
|
|||
|
"end_time": "2025-01-19T15:32:05.619593Z",
|
|||
|
"start_time": "2025-01-19T15:31:59.880477Z"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"from sklearn.utils import resample\n",
|
|||
|
"import pandas as pd\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"from sklearn.preprocessing import LabelEncoder\n",
|
|||
|
"from sklearn import metrics\n",
|
|||
|
"from imblearn.over_sampling import RandomOverSampler\n",
|
|||
|
"from imblearn.under_sampling import RandomUnderSampler\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import ConfusionMatrixDisplay\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.impute import SimpleImputer\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, RandomForestClassifier, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
|
|||
|
"from sklearn.linear_model import SGDClassifier, SGDRegressor\n",
|
|||
|
"from sklearn.metrics import (\n",
|
|||
|
" precision_score, recall_score, accuracy_score, roc_auc_score, f1_score,\n",
|
|||
|
" matthews_corrcoef, cohen_kappa_score, confusion_matrix\n",
|
|||
|
")\n",
|
|||
|
"from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import featuretools as ft\n",
|
|||
|
"from sklearn.metrics import accuracy_score, classification_report\n",
|
|||
|
"\n",
|
|||
|
"# Загрузка данных\n",
|
|||
|
"df = pd.read_csv(\"data/Diamonds Prices2022.csv\")\n",
|
|||
|
"\n",
|
|||
|
"# Определение целевых переменных\n",
|
|||
|
"# Для задачи классификации я буду использовать 'cut' как целевую переменную\n",
|
|||
|
"X = df.drop('cut', axis=1) # Убираем target переменную\n",
|
|||
|
"y_class = df['cut'] # Задача классификации (например, классификация по типу огранки)\n",
|
|||
|
"y_reg = df['price'] # Задача регрессии (например, предсказание цены бриллианта)\n",
|
|||
|
"\n",
|
|||
|
"# Преобразование категориальных переменных\n",
|
|||
|
"categorical_features = ['color', 'clarity']\n",
|
|||
|
"numerical_features = ['carat', 'depth', 'table', 'x', 'y', 'z']\n",
|
|||
|
"\n",
|
|||
|
"# Создание ColumnTransformer с обработкой неизвестных категорий\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_features),\n",
|
|||
|
" ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)]) # Используем handle_unknown='ignore'\n",
|
|||
|
"\n",
|
|||
|
"# Разделение данных на обучающую и тестовую выборки\n",
|
|||
|
"X_train, X_test, y_class_train, y_class_test, y_reg_train, y_reg_test = train_test_split(X, y_class, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"def estimate_bias_variance(model, X, y):\n",
|
|||
|
" predictions = np.array([model.fit(X, y).predict(X) for _ in range(1000)])\n",
|
|||
|
" bias = np.mean((y - np.mean(predictions, axis=0)) ** 2)\n",
|
|||
|
" variance = np.mean(np.var(predictions, axis=0))\n",
|
|||
|
" return bias, variance\n",
|
|||
|
"\n",
|
|||
|
"# Просмотр обучающих и тестовых данных\n",
|
|||
|
"print(\"X_train\", X_train.head())\n",
|
|||
|
"print(\"y_class_train\", y_class_train.head())\n",
|
|||
|
"\n",
|
|||
|
"print(\"X_test\", X_test.head())\n",
|
|||
|
"print(\"y_class_test\", y_class_test.head())\n",
|
|||
|
"\n"
|
|||
|
],
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"X_train Unnamed: 0 carat color clarity depth table price x y z\n",
|
|||
|
"9159 9160 1.01 E SI2 60.0 60.0 4540 6.57 6.49 3.92\n",
|
|||
|
"14131 14132 1.10 H VS2 62.5 58.0 5729 6.59 6.54 4.10\n",
|
|||
|
"15757 15758 1.50 E SI2 61.5 65.0 6300 7.21 7.17 4.42\n",
|
|||
|
"24633 24634 1.53 E SI1 61.3 59.0 12968 7.40 7.35 4.52\n",
|
|||
|
"49831 49832 0.84 D SI2 64.5 60.0 2167 5.92 5.84 3.79\n",
|
|||
|
"y_class_train 9159 Very Good\n",
|
|||
|
"14131 Premium\n",
|
|||
|
"15757 Good\n",
|
|||
|
"24633 Premium\n",
|
|||
|
"49831 Fair\n",
|
|||
|
"Name: cut, dtype: object\n",
|
|||
|
"X_test Unnamed: 0 carat color clarity depth table price x y z\n",
|
|||
|
"1388 1389 0.24 G VVS1 62.1 56.0 559 3.97 4.00 2.47\n",
|
|||
|
"19841 19842 1.21 F VS2 62.9 54.0 8403 6.78 6.82 4.28\n",
|
|||
|
"41647 41648 0.50 E SI1 61.7 68.0 1238 5.09 5.03 3.12\n",
|
|||
|
"41741 41742 0.50 D SI2 62.8 56.0 1243 5.06 5.03 3.17\n",
|
|||
|
"17244 17245 1.55 E SI2 62.3 55.0 6901 7.44 7.37 4.61\n",
|
|||
|
"y_class_test 1388 Ideal\n",
|
|||
|
"19841 Very Good\n",
|
|||
|
"41647 Fair\n",
|
|||
|
"41741 Ideal\n",
|
|||
|
"17244 Ideal\n",
|
|||
|
"Name: cut, dtype: object\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"execution_count": 2
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"metadata": {
|
|||
|
"ExecuteTime": {
|
|||
|
"end_time": "2025-01-19T15:32:44.541170Z",
|
|||
|
"start_time": "2025-01-19T15:32:38.117434Z"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|||
|
"from sklearn.metrics import classification_report, confusion_matrix\n",
|
|||
|
"\n",
|
|||
|
"# Загрузка данных\n",
|
|||
|
"file_path = 'data/Diamonds Prices2022.csv'\n",
|
|||
|
"df = pd.read_csv(file_path)\n",
|
|||
|
"\n",
|
|||
|
"# Очистка столбцов от пробелов\n",
|
|||
|
"df.columns = df.columns.str.strip()\n",
|
|||
|
"\n",
|
|||
|
"# Проверка столбцов\n",
|
|||
|
"print(df.columns)\n",
|
|||
|
"\n",
|
|||
|
"# Определение признаков и целевой переменной\n",
|
|||
|
"# Задача классификации: будем предсказывать 'cut' (тип огранки)\n",
|
|||
|
"X = df.drop('cut', axis=1) # Убираем целевую переменную\n",
|
|||
|
"y_class = df['cut'] # Целевая переменная для классификации\n",
|
|||
|
"\n",
|
|||
|
"# Преобразование категориальных признаков в числовые\n",
|
|||
|
"X = pd.get_dummies(X, drop_first=True) # Преобразуем категориальные признаки в числовые, исключая первую категорию\n",
|
|||
|
"\n",
|
|||
|
"# Разделение на обучающую и тестовую выборки\n",
|
|||
|
"X_train, X_test, y_train, y_test = train_test_split(X, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Масштабирование данных\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"X_train_scaled = scaler.fit_transform(X_train)\n",
|
|||
|
"X_test_scaled = scaler.transform(X_test)\n",
|
|||
|
"\n",
|
|||
|
"# Обучение модели\n",
|
|||
|
"model = RandomForestClassifier(n_estimators=100, random_state=42)\n",
|
|||
|
"model.fit(X_train_scaled, y_train)\n",
|
|||
|
"\n",
|
|||
|
"# Прогнозирование\n",
|
|||
|
"y_pred = model.predict(X_test_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# Оценка модели\n",
|
|||
|
"print(classification_report(y_test, y_pred))\n",
|
|||
|
"print(confusion_matrix(y_test, y_pred))\n",
|
|||
|
"\n"
|
|||
|
],
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Index(['Unnamed: 0', 'carat', 'cut', 'color', 'clarity', 'depth', 'table',\n",
|
|||
|
" 'price', 'x', 'y', 'z'],\n",
|
|||
|
" dtype='object')\n",
|
|||
|
" precision recall f1-score support\n",
|
|||
|
"\n",
|
|||
|
" Fair 0.91 0.87 0.89 328\n",
|
|||
|
" Good 0.77 0.69 0.73 1000\n",
|
|||
|
" Ideal 0.82 0.92 0.87 4316\n",
|
|||
|
" Premium 0.73 0.81 0.77 2734\n",
|
|||
|
" Very Good 0.66 0.47 0.55 2411\n",
|
|||
|
"\n",
|
|||
|
" accuracy 0.77 10789\n",
|
|||
|
" macro avg 0.78 0.75 0.76 10789\n",
|
|||
|
"weighted avg 0.76 0.77 0.76 10789\n",
|
|||
|
"\n",
|
|||
|
"[[ 286 31 1 7 3]\n",
|
|||
|
" [ 19 686 18 58 219]\n",
|
|||
|
" [ 6 10 3982 157 161]\n",
|
|||
|
" [ 0 11 312 2219 192]\n",
|
|||
|
" [ 2 154 527 589 1139]]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"execution_count": 3
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.8"
|
|||
|
},
|
|||
|
"orig_nbformat": 4
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|