43 lines
1.5 KiB
Python
43 lines
1.5 KiB
Python
import pandas as pd
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.tree import DecisionTreeRegressor
|
|
from sklearn.metrics import mean_squared_error
|
|
|
|
# Загрузка данных
|
|
df = pd.read_csv("Hostel.csv")
|
|
df = df.fillna(0)
|
|
# Разделение данных
|
|
train_data, test_data = train_test_split(df, test_size=0.01, random_state=42)
|
|
|
|
# Обучение модели
|
|
features = ['summary.score', 'atmosphere', 'cleanliness', 'facilities', 'staff', 'valueformoney']
|
|
target = 'price.from'
|
|
|
|
X_train = train_data[features]
|
|
y_train = train_data[target]
|
|
|
|
model = DecisionTreeRegressor(random_state=42)
|
|
model.fit(X_train, y_train)
|
|
|
|
# Проверка работы модели
|
|
X_test = test_data[features]
|
|
y_test = test_data[target]
|
|
|
|
predictions = model.predict(X_test)
|
|
|
|
# Вывод результатов
|
|
print("Тестируемые строки:")
|
|
print(X_test)
|
|
# Точность на основе средней квадратной ошибке (MSE)
|
|
mse = mean_squared_error(y_test, predictions)
|
|
accuracy = 1 - mse / y_test.var()
|
|
print(f"\nТочность модели: {accuracy * 100:.2f}%")
|
|
|
|
importances = model.feature_importances_
|
|
normalized_importances = importances / importances.sum()
|
|
|
|
# Выбор трёх наиболее важных признаков
|
|
top_importances = normalized_importances.argsort()[-3:][::-1]
|
|
print("\nТри наиболее важных признака:")
|
|
for idx in top_importances:
|
|
print(f"{features[idx]}: {normalized_importances[idx]}") |