IIS_2023_1/degtyarev_mikhail_lab_2/main.py

72 lines
2.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import f_regression
from sklearn.preprocessing import MinMaxScaler
import numpy as np
# Генерация исходных данных
np.random.seed(0)
size = 750
X = np.random.uniform(0, 1, (size, 14))
Y = (10 * np.sin(np.pi*X[:, 0]*X[:, 1]) + 20*(X[:, 2] - .5)**2 +
10*X[:, 3] + 5*X[:, 4]**5 + np.random.normal(0, 1))
X[:, 10:] = X[:, :4] + np.random.normal(0, .025, (size, 4))
# Лассо
lasso = Lasso(alpha=0.05)
lasso.fit(X, Y)
# Случайные деревья
rf = RandomForestRegressor(n_estimators=100, random_state=0)
rf.fit(X, Y)
# Линейная корреляция (f_regression)
correlation_coeffs, _ = f_regression(X, Y)
# Ранжирование с использованием MinMaxScaler
def rank_to_dict(ranks, names):
ranks = np.abs(ranks)
minmax = MinMaxScaler()
ranks = minmax.fit_transform(np.array(ranks).reshape(14, 1)).ravel()
ranks = map(lambda x: round(x, 2), ranks)
return dict(zip(names, ranks))
# Ранжирование для каждой модели
ranks = {}
names = ["x%s" % i for i in range(1, 15)]
ranks["Lasso"] = rank_to_dict(lasso.coef_, names)
ranks["Random Forest"] = rank_to_dict(rf.feature_importances_, names)
ranks["Correlation"] = rank_to_dict(correlation_coeffs, names)
# Создание пустого словаря для данных
mean = {}
# Обработка словаря ranks
for key, value in ranks.items():
for item in value.items():
if item[0] not in mean:
mean[item[0]] = 0
mean[item[0]] += item[1]
# Нахождение среднего по каждому признаку
for key, value in mean.items():
res = value / len(ranks)
mean[key] = round(res, 2)
# Сортировка и вывод списка средних значений
mean_dict = dict(mean)
print("MEAN")
print(mean_dict)
# Вывод результатов ранжирования для каждой модели
for key, value in ranks.items():
print(key)
print(value)
# Вывод топ-4 признаков с их значениями
top_features = sorted(mean.items(), key=lambda x: x[1], reverse=True)[:4]
print("Top 4 features with values:")
for feature, value in top_features:
print(f"{feature}: {value}")