63 lines
2.3 KiB
Python
63 lines
2.3 KiB
Python
|
from sklearn.feature_selection import f_regression
|
||
|
from sklearn.linear_model import LinearRegression, Lasso, LassoCV
|
||
|
from sklearn.preprocessing import MinMaxScaler
|
||
|
import numpy as np
|
||
|
from tabulate import tabulate
|
||
|
|
||
|
#генерируем исходные данные: 750 строк-наблюдений и 14 столбцов-признаков
|
||
|
np.random.seed(1)
|
||
|
size = 750
|
||
|
X = np.random.uniform(0, 1, (size, 14))
|
||
|
#Задаем функцию-выход: регрессионную проблему Фридмана
|
||
|
Y = (10 * np.sin(np.pi*X[:,0]*X[:,1]) + 20*(X[:,2] - .5)**2 +
|
||
|
10*X[:,3] + 5*X[:,4]**5 + np.random.normal(0,1))
|
||
|
#Добавляем зависимость признаков
|
||
|
X[:,10:] = X[:,:4] + np.random.normal(0, .025, (size,4))
|
||
|
|
||
|
#линейная модель
|
||
|
lr = LinearRegression()
|
||
|
lr.fit(X, Y)
|
||
|
#Лассо
|
||
|
lasso = LassoCV()
|
||
|
lasso.fit(X, Y)
|
||
|
#f_regression
|
||
|
f, pval = f_regression(X, Y, center=True)
|
||
|
|
||
|
def ranks_minmax(ranks):
|
||
|
ranks = np.abs(ranks)
|
||
|
minmax = MinMaxScaler()
|
||
|
ranks = minmax.fit_transform(np.array(ranks).reshape(14,1)).ravel()
|
||
|
return np.round(ranks, 2)
|
||
|
|
||
|
names = ["x%s" % i for i in range(1,15)]
|
||
|
lrCoef = ranks_minmax(lr.coef_)
|
||
|
lassoCoef = ranks_minmax(lasso.coef_)
|
||
|
fCoef = ranks_minmax(f)
|
||
|
mean = []
|
||
|
for i in range(len(lrCoef)):
|
||
|
mean.append(round((lrCoef[i] + lassoCoef[i] + fCoef[i]) / 3, 2))
|
||
|
|
||
|
|
||
|
firstRow = np.append(['Линейная регрессия'], lrCoef)
|
||
|
secondRow = np.append(['Лассо'], lassoCoef)
|
||
|
thirdRow = np.append(['Линейная корреляция'], fCoef)
|
||
|
fourthRow = np.append(['Среднее'], mean)
|
||
|
names = np.append(['Тип'], names)
|
||
|
print(tabulate([firstRow,
|
||
|
secondRow,
|
||
|
thirdRow,
|
||
|
fourthRow],
|
||
|
headers=names))
|
||
|
print()
|
||
|
print("Индексы максимальных значений")
|
||
|
print()
|
||
|
names = np.append(["Место по важности"], ["%s" % i for i in range(1,15)])
|
||
|
firstRow = np.append(['Линейная регрессия'], [1] + np.argsort(lrCoef)[::-1])
|
||
|
secondRow = np.append(['Лассо'], [1] + np.argsort(lassoCoef)[::-1])
|
||
|
thirdRow = np.append(['Линейная корреляция'], [1] + np.argsort(fCoef)[::-1])
|
||
|
fourthRow = np.append(['Среднее'], [1] + np.argsort(mean)[::-1])
|
||
|
print(tabulate([firstRow,
|
||
|
secondRow,
|
||
|
thirdRow,
|
||
|
fourthRow],
|
||
|
headers=names))
|