abanin_daniil_lab_2 #50
41
abanin_daniil_lab_2/README.md
Normal file
41
abanin_daniil_lab_2/README.md
Normal file
@ -0,0 +1,41 @@
|
||||
## Лабораторная работа №2
|
||||
|
||||
### Ранжирование признаков
|
||||
|
||||
## ПИбд-41 Абанин Даниил
|
||||
|
||||
### Как запустить лабораторную работу:
|
||||
|
||||
* установить python, numpy, matplotlib, sklearn
|
||||
* запустить проект (стартовая точка lab2)
|
||||
|
||||
### Какие технологии использовались:
|
||||
|
||||
* Язык программирования `Python`, библиотеки numpy, matplotlib, sklearn
|
||||
* Среда разработки `PyCharm`
|
||||
|
||||
### Что делает лабораторная работа:
|
||||
|
||||
* Генерирует данные и обучает такие модели, как: LinearRegression, RandomizedLasso, Recursive Feature Elimination (RFE)
|
||||
* Производиться ранжирование признаков с помощью моделей LinearRegression, RandomizedLasso, Recursive Feature Elimination (RFE)
|
||||
* Отображение получившихся результатов: 4 самых важных признака по среднему значению, значения признаков для каждой модели
|
||||
|
||||
|
||||
### 4 самых важных признака по среднему значению
|
||||
* Параметр - x4, значение - 0.56
|
||||
* Параметр - x1, значение - 0.45
|
||||
* Параметр - x2, значение - 0.33
|
||||
* Параметр - x9, значение - 0.33
|
||||
|
||||
####Linear Regression
|
||||
[('x1', 1.0), ('x4', 0.69), ('x2', 0.61), ('x11', 0.59), ('x3', 0.51), ('x13', 0.48), ('x5', 0.19), ('x12', 0.19), ('x14', 0.12), ('x8', 0.03), ('x6', 0.02), ('x10', 0.01), ('x7', 0.0), ('x9', 0.0)]
|
||||
|
||||
####Recursive Feature Elimination
|
||||
[('x9', 1.0), ('x7', 0.86), ('x10', 0.71), ('x6', 0.57), ('x8', 0.43), ('x14', 0.29), ('x12', 0.14), ('x1', 0.0), ('x2', 0.0), ('x3', 0.0), ('x4', 0.0), ('x5', 0.0), ('x11', 0.0), ('x13', 0.0)]
|
||||
|
||||
####Randomize Lasso
|
||||
[('x4', 1.0), ('x2', 0.37), ('x1', 0.36), ('x5', 0.32), ('x6', 0.02), ('x8', 0.02), ('x3', 0.01), ('x7', 0.0), ('x9', 0.0), ('x10', 0.0), ('x11', 0.0), ('x12', 0.0), ('x13', 0.0), ('x14', 0.0)]
|
||||
|
||||
#### Результаты:
|
||||
|
||||
![Result](result.png)
|
76
abanin_daniil_lab_2/RadomizedLasso.py
Normal file
76
abanin_daniil_lab_2/RadomizedLasso.py
Normal file
@ -0,0 +1,76 @@
|
||||
from sklearn.utils import check_X_y, check_random_state
|
||||
from sklearn.linear_model import Lasso
|
||||
from scipy.sparse import issparse
|
||||
from scipy import sparse
|
||||
|
||||
|
||||
def _rescale_data(x, weights):
|
||||
if issparse(x):
|
||||
size = weights.shape[0]
|
||||
weight_dia = sparse.dia_matrix((1 - weights, 0), (size, size))
|
||||
x_rescaled = x * weight_dia
|
||||
else:
|
||||
x_rescaled = x * (1 - weights)
|
||||
|
||||
return x_rescaled
|
||||
|
||||
|
||||
class RandomizedLasso(Lasso):
|
||||
"""
|
||||
Randomized version of scikit-learns Lasso class.
|
||||
|
||||
Randomized LASSO is a generalization of the LASSO. The LASSO penalises
|
||||
the absolute value of the coefficients with a penalty term proportional
|
||||
to `alpha`, but the randomized LASSO changes the penalty to a randomly
|
||||
chosen value in the range `[alpha, alpha/weakness]`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
weakness : float
|
||||
Weakness value for randomized LASSO. Must be in (0, 1].
|
||||
|
||||
See also
|
||||
--------
|
||||
sklearn.linear_model.LogisticRegression : learns logistic regression models
|
||||
using the same algorithm.
|
||||
"""
|
||||
def __init__(self, weakness=0.5, alpha=1.0, fit_intercept=True,
|
||||
precompute=False, copy_X=True, max_iter=1000,
|
||||
tol=1e-4, warm_start=False, positive=False,
|
||||
random_state=None, selection='cyclic'):
|
||||
self.weakness = weakness
|
||||
super(RandomizedLasso, self).__init__(
|
||||
alpha=alpha, fit_intercept=fit_intercept, precompute=precompute, copy_X=copy_X,
|
||||
max_iter=max_iter, tol=tol, warm_start=warm_start,
|
||||
positive=positive, random_state=random_state,
|
||||
selection=selection)
|
||||
|
||||
def fit(self, X, y):
|
||||
"""Fit the model according to the given training data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
|
||||
The training input samples.
|
||||
|
||||
y : array-like, shape = [n_samples]
|
||||
The target values.
|
||||
"""
|
||||
if not isinstance(self.weakness, float) or not (0.0 < self.weakness <= 1.0):
|
||||
raise ValueError('weakness should be a float in (0, 1], got %s' % self.weakness)
|
||||
|
||||
X, y = check_X_y(X, y, accept_sparse=True)
|
||||
|
||||
n_features = X.shape[1]
|
||||
weakness = 1. - self.weakness
|
||||
random_state = check_random_state(self.random_state)
|
||||
|
||||
weights = weakness * random_state.randint(0, 1 + 1, size=(n_features,))
|
||||
|
||||
# TODO: I am afraid this will do double normalization if set to true
|
||||
#X, y, _, _ = _preprocess_data(X, y, self.fit_intercept, normalize=self.normalize, copy=False,
|
||||
# sample_weight=None, return_mean=False)
|
||||
|
||||
# TODO: Check if this is a problem if it happens before standardization
|
||||
X_rescaled = _rescale_data(X, weights)
|
||||
return super(RandomizedLasso, self).fit(X_rescaled, y)
|
BIN
abanin_daniil_lab_2/__pycache__/RadomizedLasso.cpython-39.pyc
Normal file
BIN
abanin_daniil_lab_2/__pycache__/RadomizedLasso.cpython-39.pyc
Normal file
Binary file not shown.
81
abanin_daniil_lab_2/lab2.py
Normal file
81
abanin_daniil_lab_2/lab2.py
Normal file
@ -0,0 +1,81 @@
|
||||
from matplotlib import pyplot as plt
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from RadomizedLasso import RandomizedLasso
|
||||
from sklearn.feature_selection import RFE
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
import numpy as np
|
||||
|
||||
names = ["x%s" % i for i in range(1, 15)]
|
||||
|
||||
|
||||
def start_point():
|
||||
X,Y = generation_data()
|
||||
# Линейная модель
|
||||
lr = LinearRegression()
|
||||
lr.fit(X, Y)
|
||||
# Рекурсивное сокращение признаков
|
||||
rfe = RFE(lr)
|
||||
rfe.fit(X, Y)
|
||||
# Случайное Лассо
|
||||
randomized_lasso = RandomizedLasso(alpha=.01)
|
||||
randomized_lasso.fit(X, Y)
|
||||
|
||||
ranks = {"Linear Regression": rank_to_dict(lr.coef_), "Recursive Feature Elimination": rank_to_dict(rfe.ranking_),
|
||||
"Randomize Lasso": rank_to_dict(randomized_lasso.coef_)}
|
||||
|
||||
get_estimation(ranks)
|
||||
print_sorted_data(ranks)
|
||||
|
||||
|
||||
def generation_data():
|
||||
np.random.seed(0)
|
||||
size = 750
|
||||
X = np.random.uniform(0, 1, (size, 14))
|
||||
Y = (10 * np.sin(np.pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - .5) ** 2 +
|
||||
10 * X[:, 3] + 5 * X[:, 4] ** 5 + np.random.normal(0, 1))
|
||||
X[:, 10:] = X[:, :4] + np.random.normal(0, .025, (size, 4))
|
||||
return X, Y
|
||||
|
||||
|
||||
def rank_to_dict(ranks):
|
||||
ranks = np.abs(ranks)
|
||||
minmax = MinMaxScaler()
|
||||
ranks = minmax.fit_transform(np.array(ranks).reshape(14, 1)).ravel()
|
||||
ranks = map(lambda x: round(x, 2), ranks)
|
||||
return dict(zip(names, ranks))
|
||||
|
||||
|
||||
def get_estimation(ranks: {}):
|
||||
mean = {}
|
||||
#«Бежим» по списку ranks
|
||||
for key, value in ranks.items():
|
||||
for item in value.items():
|
||||
if(item[0] not in mean):
|
||||
mean[item[0]] = 0
|
||||
mean[item[0]] += item[1]
|
||||
|
||||
for key, value in mean.items():
|
||||
res = value/len(ranks)
|
||||
mean[key] = round(res, 2)
|
||||
|
||||
mean_sorted = sorted(mean.items(), key=lambda item: item[1], reverse=True)
|
||||
print("Средние значения")
|
||||
print(mean_sorted)
|
||||
|
||||
|
||||
print("4 самых важных признака по среднему значению")
|
||||
for item in mean_sorted[:4]:
|
||||
print('Параметр - {0}, значение - {1}'.format(item[0], item[1]))
|
||||
|
||||
|
||||
|
||||
def print_sorted_data(ranks: {}):
|
||||
print()
|
||||
for key, value in ranks.items():
|
||||
ranks[key] = sorted(value.items(), key=lambda item: item[1], reverse=True)
|
||||
for key, value in ranks.items():
|
||||
print(key)
|
||||
print(value)
|
||||
|
||||
|
||||
start_point()
|
BIN
abanin_daniil_lab_2/result.png
Normal file
BIN
abanin_daniil_lab_2/result.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 178 KiB |
Loading…
Reference in New Issue
Block a user