from sklearn.linear_model import LassoCV from sklearn.ensemble import RandomForestRegressor from sklearn.feature_selection import RFE from sklearn.preprocessing import MinMaxScaler import numpy as np import pandas as pd def rank_to_dict(ranks, names): ranks = np.abs(ranks) minmax = MinMaxScaler() ranks = minmax.fit_transform(np.array(ranks).reshape(14, 1)).ravel() ranks = map(lambda x: round(x, 2), ranks) return dict(zip(names, ranks)) np.random.seed(0) size = 750 X = np.random.uniform(0, 1, (size, 14)) Y = (10 * np.sin(np.pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - .5) ** 2 + 10 * X[:, 3] + 5 * X[:, 4] ** 5 + np.random.normal(0, 1, size)) X[:, 10:] = X[:, :4] + np.random.normal(0, .025, (size, 4)) lasso_cv = LassoCV(alphas=np.linspace(0.001, 1, 100), cv=5) lasso_cv.fit(X, Y) rf = RandomForestRegressor(n_estimators=100) rfe = RFE(estimator=rf, n_features_to_select=1, step=1) rfe.fit(X, Y) # названия признаков names = ["x%s" % i for i in range(1, 15)] # Stable Randomized Lasso Simulation n_resampling = 200 rlasso_coefs = np.zeros((X.shape[1], n_resampling)) for i in range(n_resampling): Y_permuted = np.random.permutation(Y) rlasso = LassoCV(alphas=np.linspace(0.001, 1, 100), cv=5) rlasso.fit(X, Y_permuted) rlasso_coefs[:, i] = rlasso.coef_ rlasso_scores = np.std(rlasso_coefs, axis=1) # словарь для ранжирования ranks = {"Lasso": rank_to_dict(lasso_cv.coef_, names), "RFE": rank_to_dict(rfe.ranking_, names), "RandomizedLassoSim": rank_to_dict(rlasso_scores, names)} mean = {} for method, values in ranks.items(): for feature, score in values.items(): # Если элемента с текущим ключом в mean нет - добавляем if feature not in mean: mean[feature] = 0 # Суммируем значения по каждому ключу-имени признака mean[feature] += score df_ranks = pd.DataFrame(ranks) # Выводим ранжирование print("ПО КАЖДОМУ МЕТОДУ:") print(df_ranks) # Находим среднее по каждому признаку for feature, score in mean.items(): mean[feature] = round(score / len(ranks), 2) # Отсортированные средние значени mean = sorted(mean.items(), key=lambda x: x[1], reverse=True) print("СРЕДНИЕ") print(mean)