4 punkt
This commit is contained in:
parent
0a65d77a16
commit
dc5d1ac892
111
lab_4/lab4.ipynb
111
lab_4/lab4.ipynb
@ -72,6 +72,117 @@
|
|||||||
"print(df.head())\n",
|
"print(df.head())\n",
|
||||||
"print(df.columns)"
|
"print(df.columns)"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Предобработка данных"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"ID 0\n",
|
||||||
|
"Price 0\n",
|
||||||
|
"Levy 0\n",
|
||||||
|
"Manufacturer 0\n",
|
||||||
|
"Model 0\n",
|
||||||
|
"Prod. year 0\n",
|
||||||
|
"Category 0\n",
|
||||||
|
"Leather interior 0\n",
|
||||||
|
"Fuel type 0\n",
|
||||||
|
"Engine volume 0\n",
|
||||||
|
"Mileage 0\n",
|
||||||
|
"Cylinders 0\n",
|
||||||
|
"Gear box type 0\n",
|
||||||
|
"Drive wheels 0\n",
|
||||||
|
"Doors 0\n",
|
||||||
|
"Wheel 0\n",
|
||||||
|
"Color 0\n",
|
||||||
|
"Airbags 0\n",
|
||||||
|
"dtype: int64\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"C:\\Users\\Egor\\AppData\\Local\\Temp\\ipykernel_16964\\3618217033.py:9: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
|
||||||
|
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
|
||||||
|
"\n",
|
||||||
|
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" df['Levy'].fillna(df['Levy'].median(), inplace=True)\n",
|
||||||
|
"C:\\Users\\Egor\\AppData\\Local\\Temp\\ipykernel_16964\\3618217033.py:10: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
|
||||||
|
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
|
||||||
|
"\n",
|
||||||
|
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" df['Mileage'].fillna(df['Mileage'].median(), inplace=True)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Проверка наличия пропущенных значений\n",
|
||||||
|
"print(df.isnull().sum())\n",
|
||||||
|
"\n",
|
||||||
|
"# Очистка столбца 'Levy' от нечисловых значений\n",
|
||||||
|
"df['Levy'] = pd.to_numeric(df['Levy'], errors='coerce')\n",
|
||||||
|
"df['Mileage'] = pd.to_numeric(df['Levy'], errors='coerce')\n",
|
||||||
|
"\n",
|
||||||
|
"# Заполнение пропущенных значений\n",
|
||||||
|
"df['Levy'].fillna(df['Levy'].median(), inplace=True)\n",
|
||||||
|
"df['Mileage'].fillna(df['Mileage'].median(), inplace=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Определение числовых и категориальных признаков\n",
|
||||||
|
"numeric_features = ['Levy', 'Prod. year', 'Engine volume', 'Mileage', 'Cylinders', 'Airbags']\n",
|
||||||
|
"categorical_features = ['Manufacturer', 'Model', 'Category', 'Leather interior', 'Fuel type', 'Gear box type', 'Drive wheels', 'Doors', 'Wheel', 'Color']\n",
|
||||||
|
"\n",
|
||||||
|
"# Преобразование категориальных признаков в числовые\n",
|
||||||
|
"df = pd.get_dummies(df, columns=categorical_features, drop_first=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Разделение данных на тренировочный и тестовый наборы"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Задача регрессии\n",
|
||||||
|
"X_reg = df.drop(['ID', 'Price'], axis=1)\n",
|
||||||
|
"y_reg = df['Price']\n",
|
||||||
|
"\n",
|
||||||
|
"# Задача классификации\n",
|
||||||
|
"df['Category'] = pd.cut(df['Price'], bins=[0, 10000, 20000, np.inf], labels=['Эконом', 'Средний', 'Премиум'])\n",
|
||||||
|
"X_class = df.drop(['ID', 'Price', 'Category'], axis=1)\n",
|
||||||
|
"y_class = df['Category']\n",
|
||||||
|
"\n",
|
||||||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
||||||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user