64 lines
2.2 KiB
Python
64 lines
2.2 KiB
Python
|
import numpy as np
|
|||
|
import pandas as pd
|
|||
|
from sklearn.linear_model import Lasso
|
|||
|
from sklearn.feature_selection import RFE
|
|||
|
from sklearn.ensemble import RandomForestRegressor
|
|||
|
from sklearn.preprocessing import StandardScaler
|
|||
|
|
|||
|
# Создаём случайные данные
|
|||
|
np.random.seed(42)
|
|||
|
n_samples = 100
|
|||
|
n_features = 10
|
|||
|
X = np.random.rand(n_samples, n_features)
|
|||
|
|
|||
|
true_coefficients = np.array([3, -1, 2, 0, 1, 2, -3, 4, 0, -2])
|
|||
|
noise = np.random.normal(0, 0.1, n_samples)
|
|||
|
y = np.dot(X, true_coefficients) + noise
|
|||
|
|
|||
|
# Нормализуем данные
|
|||
|
scaler = StandardScaler()
|
|||
|
X_normalized = scaler.fit_transform(X)
|
|||
|
|
|||
|
# Преобразуем данные в DataFrame
|
|||
|
columns = [f'Feature_{i}' for i in range(1, n_features + 1)]
|
|||
|
df = pd.DataFrame(X_normalized, columns=columns)
|
|||
|
df['Target'] = y
|
|||
|
|
|||
|
# 1. Лассо (Lasso)
|
|||
|
lasso = Lasso(alpha=0.1)
|
|||
|
lasso.fit(X_normalized, y)
|
|||
|
lasso_coefs = np.abs(lasso.coef_)
|
|||
|
|
|||
|
# 2. Рекурсивное сокращение признаков (RFE)
|
|||
|
rfe = RFE(estimator=RandomForestRegressor(), n_features_to_select=1)
|
|||
|
rfe.fit(X_normalized, y)
|
|||
|
rfe_ranks = rfe.ranking_
|
|||
|
|
|||
|
# 3. Сокращение признаков Случайными деревьями (Random Forest Regressor)
|
|||
|
rf = RandomForestRegressor()
|
|||
|
rf.fit(X_normalized, y)
|
|||
|
rf_importances = rf.feature_importances_
|
|||
|
|
|||
|
# 4. Средняя оценка
|
|||
|
average_scores = (lasso_coefs + (1 / rfe_ranks) + rf_importances) / 3
|
|||
|
|
|||
|
# Масштабируем средние оценки так, чтобы они лежали в диапазоне от 0 до 1
|
|||
|
scaled_average_scores = (average_scores - np.min(average_scores)) / (np.max(average_scores) - np.min(average_scores))
|
|||
|
|
|||
|
# Создаем DataFrame для анализа результатов
|
|||
|
results_df = pd.DataFrame({
|
|||
|
'Lasso': np.round(lasso_coefs, 2),
|
|||
|
'RFE Rank': np.round(1 / rfe_ranks, 2),
|
|||
|
'Random Forest': np.round(rf_importances, 2),
|
|||
|
'Average': np.round(scaled_average_scores, 2)
|
|||
|
}, index=columns)
|
|||
|
|
|||
|
# Выводим результаты
|
|||
|
print("Оценки признаков:")
|
|||
|
print(results_df)
|
|||
|
|
|||
|
# Четыре признака с самыми высокими средними оценками
|
|||
|
top_features = results_df.nlargest(4, 'Average').index
|
|||
|
print("\nСамые важные признаки:")
|
|||
|
print(top_features)
|