IIS_2023_1/kozlov_alexey_lab_2/lab2.py
2024-01-12 11:36:28 +04:00

62 lines
2.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from sklearn.ensemble import RandomForestRegressor
from RandomizedLasso import RandomizedLasso
from sklearn.feature_selection import f_regression
from sklearn.preprocessing import MinMaxScaler
import numpy as np
names = ["x%s" % i for i in range(1, 15)]
def main():
x,y = generation_data()
# Сокращение признаков cлучайными деревьями (Random Forest Regressor)
rfr = RandomForestRegressor()
rfr.fit(x, y)
# Модель линейной корреляции
f, _ = f_regression(x, y, center=False)
# Случайное Лассо
randomized_lasso = RandomizedLasso(alpha=.01)
randomized_lasso.fit(x, y)
ranks = {"Random Forest Regressor": rank_to_dict(rfr.feature_importances_), 'f-Regression': rank_to_dict(f), "Randomize Lasso": rank_to_dict(randomized_lasso.coef_)}
get_estimation(ranks)
print_sorted_data(ranks)
def generation_data():
np.random.seed(0)
size = 750
X = np.random.uniform(0, 1, (size, 14))
Y = (10 * np.sin(np.pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - .5) ** 2 +
10 * X[:, 3] + 5 * X[:, 4] ** 5 + np.random.normal(0, 1))
X[:, 10:] = X[:, :4] + np.random.normal(0, .025, (size, 4))
return X, Y
def rank_to_dict(ranks):
ranks = np.abs(ranks)
minmax = MinMaxScaler()
ranks = minmax.fit_transform(np.array(ranks).reshape(14, 1)).ravel()
ranks = map(lambda x: round(x, 2), ranks)
return dict(zip(names, ranks))
def get_estimation(ranks: {}):
mean = {}
for key, value in ranks.items():
for item in value.items():
if(item[0] not in mean):
mean[item[0]] = 0
mean[item[0]] += item[1]
for key, value in mean.items():
res = value/len(ranks)
mean[key] = round(res, 2)
mean_sorted = sorted(mean.items(), key=lambda item: item[1], reverse=True)
print("Средние значения")
print(mean_sorted)
print("4 самых важных признака по среднему значению")
for item in mean_sorted[:4]:
print('{0} - {1}'.format(item[0], item[1]))
def print_sorted_data(ranks: {}):
print()
for key, value in ranks.items():
ranks[key] = sorted(value.items(), key=lambda item: item[1], reverse=True)
for key, value in ranks.items():
print(key)
print(value)
main()