50 lines
1.9 KiB
Python
50 lines
1.9 KiB
Python
|
import pandas as pd
|
||
|
from sklearn.model_selection import train_test_split
|
||
|
from sklearn.linear_model import Lasso
|
||
|
from sklearn.metrics import mean_squared_error, r2_score
|
||
|
from sklearn.preprocessing import StandardScaler
|
||
|
|
||
|
# Загрузка данных
|
||
|
data = pd.read_csv('Hostel.csv')
|
||
|
|
||
|
# Выбор нужных признаков
|
||
|
features = ['price.from', 'Distance', 'atmosphere', 'cleanliness', 'facilities']
|
||
|
|
||
|
# Очистка данных и заполнение пропущенных значений
|
||
|
data['Distance'] = data['Distance'].str.replace('km from city centre', '').astype(float)
|
||
|
data['summary.score'] = data['summary.score'].fillna(data['summary.score'].mean())
|
||
|
|
||
|
# Заполнение пропущенных значений (например, средними)
|
||
|
data[features] = data[features].fillna(data[features].mean())
|
||
|
|
||
|
# Разделение данных на обучающий и тестовый наборы
|
||
|
X = data[features]
|
||
|
y = data['summary.score']
|
||
|
|
||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||
|
|
||
|
# Масштабирование данных (стандартизация)
|
||
|
scaler = StandardScaler()
|
||
|
X_train_scaled = scaler.fit_transform(X_train)
|
||
|
X_test_scaled = scaler.transform(X_test)
|
||
|
|
||
|
# Обучение модели Лассо-регрессии
|
||
|
alpha = 0.1 # параметр регуляризации
|
||
|
lasso = Lasso(alpha=alpha)
|
||
|
lasso.fit(X_train_scaled, y_train)
|
||
|
|
||
|
# Предсказание на тестовом наборе
|
||
|
y_pred = lasso.predict(X_test_scaled)
|
||
|
|
||
|
# Оценка качества модели
|
||
|
mse = mean_squared_error(y_test, y_pred)
|
||
|
accuracy = 1 - mse / y_test.var()
|
||
|
|
||
|
# Вывод точности модели
|
||
|
print(f"\nТочность модели: {accuracy * 100:.2f}%")
|
||
|
# Оценка качества модели
|
||
|
mse = mean_squared_error(y_test, y_pred)
|
||
|
r2 = r2_score(y_test, y_pred)
|
||
|
|
||
|
print(f'Mean Squared Error (MSE): {mse:.4f}')
|
||
|
print(f'R-squared: {r2:.4f}')
|