lab 2 is ready
This commit is contained in:
parent
5d8a090a38
commit
ae454ae9ef
@ -1,17 +1,14 @@
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.linear_model import LinearRegression, RandomizedLasso
|
||||
from sklearn.feature_selection import RFE
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
from sklearn.svm import SVR
|
||||
from matplotlib import pyplot as plt
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
from alexandrov_dmitrii_lab_2.rand_lasso import RandomizedLasso
|
||||
import random as rand
|
||||
|
||||
figure = plt.figure(1, figsize=(16, 9))
|
||||
axis = figure.subplots(1, 4)
|
||||
col = 0
|
||||
y = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
||||
y = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
|
||||
|
||||
|
||||
def rank_to_dict(ranks, names, n_features):
|
||||
@ -35,26 +32,29 @@ def createView(key, val):
|
||||
|
||||
|
||||
def start():
|
||||
np.random.seed(random.randrange(50))
|
||||
np.random.seed(rand.randint(0, 50))
|
||||
size = 750
|
||||
n_features = 10
|
||||
n_features = 14
|
||||
X = np.random.uniform(0, 1, (size, n_features))
|
||||
|
||||
Y = (10 * np.sin(np.pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - .5) ** 2 +
|
||||
10 * X[:, 3] + 5 * X[:, 4] ** 5 + np.random.normal(0, 1))
|
||||
X[:, 10:] = X[:, :4] + np.random.normal(0, .025, (size, 4))
|
||||
|
||||
lr = LinearRegression()
|
||||
rl = RandomizedLasso()
|
||||
rfe = RFE(estimator=SVR(kernel="linear"), n_features_to_select=n_features)
|
||||
rfe = RFE(estimator=LinearRegression(), n_features_to_select=1)
|
||||
lr.fit(X, Y)
|
||||
rl.fit(X, Y)
|
||||
rfe.fit(X, Y)
|
||||
|
||||
names = ["x%s" % i for i in range(1, n_features + 1)]
|
||||
|
||||
rfe_res = rfe.ranking_
|
||||
for i in range(rfe_res.size):
|
||||
rfe_res[i] = 14 - rfe_res[i]
|
||||
ranks = {"Linear regression": rank_to_dict(lr.coef_, names, n_features),
|
||||
"Random lasso": rank_to_dict(rl.scores_, names, n_features),
|
||||
"RFE": rank_to_dict(rfe.estimator_.coef_, names, n_features)}
|
||||
"RFE": rank_to_dict(rfe_res, names, n_features)}
|
||||
|
||||
mean = {}
|
||||
|
||||
|
@ -1,399 +0,0 @@
|
||||
import numbers
|
||||
import warnings
|
||||
from abc import ABCMeta, abstractmethod
|
||||
|
||||
import numpy as np
|
||||
import scipy.sparse as sp
|
||||
import six
|
||||
from joblib import Memory, Parallel, delayed
|
||||
from scipy.interpolate import interp1d
|
||||
from sklearn.preprocessing import normalize as f_normalize
|
||||
from sklearn.base import BaseEstimator
|
||||
from sklearn.exceptions import ConvergenceWarning
|
||||
from sklearn.feature_selection import SelectorMixin
|
||||
from sklearn.linear_model import lars_path, LassoLarsIC
|
||||
from sklearn.utils import check_X_y, check_random_state, safe_mask, check_array
|
||||
|
||||
__all__ = ['RandomizedLasso']
|
||||
|
||||
from sklearn.utils.sparsefuncs import inplace_column_scale, mean_variance_axis
|
||||
|
||||
from sklearn.utils.validation import check_is_fitted, as_float_array, FLOAT_DTYPES
|
||||
|
||||
|
||||
def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
|
||||
sample_weight=None, return_mean=False):
|
||||
"""
|
||||
Centers data to have mean zero along axis 0. If fit_intercept=False or if
|
||||
the X is a sparse matrix, no centering is done, but normalization can still
|
||||
be applied. The function returns the statistics necessary to reconstruct
|
||||
the input data, which are X_offset, y_offset, X_scale, such that the output
|
||||
|
||||
X = (X - X_offset) / X_scale
|
||||
|
||||
X_scale is the L2 norm of X - X_offset. If sample_weight is not None,
|
||||
then the weighted mean of X and y is zero, and not the mean itself. If
|
||||
return_mean=True, the mean, eventually weighted, is returned, independently
|
||||
of whether X was centered (option used for optimization with sparse data in
|
||||
coordinate_descend).
|
||||
|
||||
This is here because nearly all linear models will want their data to be
|
||||
centered. This function also systematically makes y consistent with X.dtype
|
||||
"""
|
||||
|
||||
if isinstance(sample_weight, numbers.Number):
|
||||
sample_weight = None
|
||||
|
||||
X = check_array(X, copy=copy, accept_sparse=['csr', 'csc'],
|
||||
dtype=FLOAT_DTYPES)
|
||||
y = np.asarray(y, dtype=X.dtype)
|
||||
|
||||
if fit_intercept:
|
||||
if sp.issparse(X):
|
||||
X_offset, X_var = mean_variance_axis(X, axis=0)
|
||||
if not return_mean:
|
||||
X_offset[:] = X.dtype.type(0)
|
||||
|
||||
if normalize:
|
||||
X_var *= X.shape[0]
|
||||
X_scale = np.sqrt(X_var, X_var)
|
||||
del X_var
|
||||
X_scale[X_scale == 0] = 1
|
||||
inplace_column_scale(X, 1. / X_scale)
|
||||
else:
|
||||
X_scale = np.ones(X.shape[1], dtype=X.dtype)
|
||||
|
||||
else:
|
||||
X_offset = np.average(X, axis=0, weights=sample_weight)
|
||||
X -= X_offset
|
||||
if normalize:
|
||||
X, X_scale = f_normalize(X, axis=0, copy=False,
|
||||
return_norm=True)
|
||||
else:
|
||||
X_scale = np.ones(X.shape[1], dtype=X.dtype)
|
||||
y_offset = np.average(y, axis=0, weights=sample_weight)
|
||||
y = y - y_offset
|
||||
else:
|
||||
X_offset = np.zeros(X.shape[1], dtype=X.dtype)
|
||||
X_scale = np.ones(X.shape[1], dtype=X.dtype)
|
||||
if y.ndim == 1:
|
||||
y_offset = X.dtype.type(0)
|
||||
else:
|
||||
y_offset = np.zeros(y.shape[1], dtype=X.dtype)
|
||||
|
||||
return X, y, X_offset, y_offset, X_scale
|
||||
|
||||
|
||||
def _resample_model(estimator_func, X, y, scaling=.5, n_resampling=200,
|
||||
n_jobs=1, verbose=False, pre_dispatch='3*n_jobs',
|
||||
random_state=None, sample_fraction=.75, **params):
|
||||
random_state = check_random_state(random_state)
|
||||
# We are generating 1 - weights, and not weights
|
||||
n_samples, n_features = X.shape
|
||||
|
||||
if not (0 < scaling < 1):
|
||||
raise ValueError(
|
||||
"'scaling' should be between 0 and 1. Got %r instead." % scaling)
|
||||
|
||||
scaling = 1. - scaling
|
||||
scores_ = 0.0
|
||||
for active_set in Parallel(n_jobs=n_jobs, verbose=verbose,
|
||||
pre_dispatch=pre_dispatch)(
|
||||
delayed(estimator_func)(
|
||||
X, y, weights=scaling * random_state.randint(
|
||||
0, 2, size=(n_features,)),
|
||||
mask=(random_state.rand(n_samples) < sample_fraction),
|
||||
verbose=max(0, verbose - 1),
|
||||
**params)
|
||||
for _ in range(n_resampling)):
|
||||
scores_ += active_set
|
||||
|
||||
scores_ /= n_resampling
|
||||
return scores_
|
||||
|
||||
|
||||
class BaseRandomizedLinearModel(six.with_metaclass(ABCMeta, BaseEstimator,
|
||||
SelectorMixin)):
|
||||
"""Base class to implement randomized linear models for feature selection
|
||||
|
||||
This implements the strategy by Meinshausen and Buhlman:
|
||||
stability selection with randomized sampling, and random re-weighting of
|
||||
the penalty.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
_preprocess_data = staticmethod(_preprocess_data)
|
||||
|
||||
def fit(self, X, y):
|
||||
"""Fit the model using X, y as training data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like, shape = [n_samples, n_features]
|
||||
Training data.
|
||||
|
||||
y : array-like, shape = [n_samples]
|
||||
Target values. Will be cast to X's dtype if necessary
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
Returns an instance of self.
|
||||
"""
|
||||
X, y = check_X_y(X, y, ['csr', 'csc'], y_numeric=True,
|
||||
ensure_min_samples=2, estimator=self)
|
||||
X = as_float_array(X, copy=False)
|
||||
n_samples, n_features = X.shape
|
||||
|
||||
X, y, X_offset, y_offset, X_scale = \
|
||||
self._preprocess_data(X, y, self.fit_intercept, self.normalize)
|
||||
|
||||
estimator_func, params = self._make_estimator_and_params(X, y)
|
||||
memory = self.memory
|
||||
if memory is None:
|
||||
memory = Memory(cachedir=None, verbose=0)
|
||||
elif isinstance(memory, six.string_types):
|
||||
memory = Memory(cachedir=memory, verbose=0)
|
||||
elif not isinstance(memory, Memory):
|
||||
raise ValueError("'memory' should either be a string or"
|
||||
" a sklearn.externals.joblib.Memory"
|
||||
" instance, got 'memory={!r}' instead.".format(
|
||||
type(memory)))
|
||||
|
||||
scores_ = memory.cache(
|
||||
_resample_model, ignore=['verbose', 'n_jobs', 'pre_dispatch']
|
||||
)(
|
||||
estimator_func, X, y,
|
||||
scaling=self.scaling, n_resampling=self.n_resampling,
|
||||
n_jobs=self.n_jobs, verbose=self.verbose,
|
||||
pre_dispatch=self.pre_dispatch, random_state=self.random_state,
|
||||
sample_fraction=self.sample_fraction, **params)
|
||||
|
||||
if scores_.ndim == 1:
|
||||
scores_ = scores_[:, np.newaxis]
|
||||
self.all_scores_ = scores_
|
||||
self.scores_ = np.max(self.all_scores_, axis=1)
|
||||
return self
|
||||
|
||||
def _make_estimator_and_params(self, X, y):
|
||||
"""Return the parameters passed to the estimator"""
|
||||
raise NotImplementedError
|
||||
|
||||
def _get_support_mask(self):
|
||||
"""Get the boolean mask indicating which features are selected.
|
||||
|
||||
Returns
|
||||
-------
|
||||
support : boolean array of shape [# input features]
|
||||
An element is True iff its corresponding feature is selected
|
||||
for retention.
|
||||
"""
|
||||
check_is_fitted(self, 'scores_')
|
||||
return self.scores_ > self.selection_threshold
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Randomized lasso: regression settings
|
||||
|
||||
def _randomized_lasso(X, y, weights, mask, alpha=1., verbose=False,
|
||||
precompute=False, eps=np.finfo(np.float).eps,
|
||||
max_iter=500):
|
||||
X = X[safe_mask(X, mask)]
|
||||
y = y[mask]
|
||||
|
||||
# Center X and y to avoid fit the intercept
|
||||
X -= X.mean(axis=0)
|
||||
y -= y.mean()
|
||||
|
||||
alpha = np.atleast_1d(np.asarray(alpha, dtype=np.float64))
|
||||
|
||||
X = (1 - weights) * X
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore', ConvergenceWarning)
|
||||
alphas_, _, coef_ = lars_path(X, y,
|
||||
Gram=precompute, copy_X=False,
|
||||
copy_Gram=False, alpha_min=np.min(alpha),
|
||||
method='lasso', verbose=verbose,
|
||||
max_iter=max_iter, eps=eps)
|
||||
|
||||
if len(alpha) > 1:
|
||||
if len(alphas_) > 1: # np.min(alpha) < alpha_min
|
||||
interpolator = interp1d(alphas_[::-1], coef_[:, ::-1],
|
||||
bounds_error=False, fill_value=0.)
|
||||
scores = (interpolator(alpha) != 0.0)
|
||||
else:
|
||||
scores = np.zeros((X.shape[1], len(alpha)), dtype=np.bool)
|
||||
else:
|
||||
scores = coef_[:, -1] != 0.0
|
||||
return scores
|
||||
|
||||
|
||||
class RandomizedLasso(BaseRandomizedLinearModel):
|
||||
"""Randomized Lasso.
|
||||
|
||||
Randomized Lasso works by subsampling the training data and
|
||||
computing a Lasso estimate where the penalty of a random subset of
|
||||
coefficients has been scaled. By performing this double
|
||||
randomization several times, the method assigns high scores to
|
||||
features that are repeatedly selected across randomizations. This
|
||||
is known as stability selection. In short, features selected more
|
||||
often are considered good features.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
alpha : float, 'aic', or 'bic', optional
|
||||
The regularization parameter alpha parameter in the Lasso.
|
||||
Warning: this is not the alpha parameter in the stability selection
|
||||
article which is scaling.
|
||||
|
||||
scaling : float, optional
|
||||
The s parameter used to randomly scale the penalty of different
|
||||
features.
|
||||
Should be between 0 and 1.
|
||||
|
||||
sample_fraction : float, optional
|
||||
The fraction of samples to be used in each randomized design.
|
||||
Should be between 0 and 1. If 1, all samples are used.
|
||||
|
||||
n_resampling : int, optional
|
||||
Number of randomized models.
|
||||
|
||||
selection_threshold : float, optional
|
||||
The score above which features should be selected.
|
||||
|
||||
fit_intercept : boolean, optional
|
||||
whether to calculate the intercept for this model. If set
|
||||
to false, no intercept will be used in calculations
|
||||
(e.g. data is expected to be already centered).
|
||||
|
||||
verbose : boolean or integer, optional
|
||||
Sets the verbosity amount
|
||||
|
||||
normalize : boolean, optional, default True
|
||||
If True, the regressors X will be normalized before regression.
|
||||
This parameter is ignored when `fit_intercept` is set to False.
|
||||
When the regressors are normalized, note that this makes the
|
||||
hyperparameters learned more robust and almost independent of
|
||||
the number of samples. The same property is not valid for
|
||||
standardized data. However, if you wish to standardize, please
|
||||
use `preprocessing.StandardScaler` before calling `fit` on an
|
||||
estimator with `normalize=False`.
|
||||
|
||||
precompute : True | False | 'auto' | array-like
|
||||
Whether to use a precomputed Gram matrix to speed up calculations.
|
||||
If set to 'auto' let us decide.
|
||||
The Gram matrix can also be passed as argument, but it will be used
|
||||
only for the selection of parameter alpha, if alpha is 'aic' or 'bic'.
|
||||
|
||||
max_iter : integer, optional
|
||||
Maximum number of iterations to perform in the Lars algorithm.
|
||||
|
||||
eps : float, optional
|
||||
The machine-precision regularization in the computation of the
|
||||
Cholesky diagonal factors. Increase this for very ill-conditioned
|
||||
systems. Unlike the 'tol' parameter in some iterative
|
||||
optimization-based algorithms, this parameter does not control
|
||||
the tolerance of the optimization.
|
||||
|
||||
random_state : int, RandomState instance or None, optional (default=None)
|
||||
If int, random_state is the seed used by the random number generator;
|
||||
If RandomState instance, random_state is the random number generator;
|
||||
If None, the random number generator is the RandomState instance used
|
||||
by `np.random`.
|
||||
|
||||
n_jobs : integer, optional
|
||||
Number of CPUs to use during the resampling. If '-1', use
|
||||
all the CPUs
|
||||
|
||||
pre_dispatch : int, or string, optional
|
||||
Controls the number of jobs that get dispatched during parallel
|
||||
execution. Reducing this number can be useful to avoid an
|
||||
explosion of memory consumption when more jobs get dispatched
|
||||
than CPUs can process. This parameter can be:
|
||||
|
||||
- None, in which case all the jobs are immediately
|
||||
created and spawned. Use this for lightweight and
|
||||
fast-running jobs, to avoid delays due to on-demand
|
||||
spawning of the jobs
|
||||
|
||||
- An int, giving the exact number of total jobs that are
|
||||
spawned
|
||||
|
||||
- A string, giving an expression as a function of n_jobs,
|
||||
as in '2*n_jobs'
|
||||
|
||||
memory : None, str or object with the joblib.Memory interface, optional \
|
||||
(default=None)
|
||||
Used for internal caching. By default, no caching is done.
|
||||
If a string is given, it is the path to the caching directory.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
scores_ : array, shape = [n_features]
|
||||
Feature scores between 0 and 1.
|
||||
|
||||
all_scores_ : array, shape = [n_features, n_reg_parameter]
|
||||
Feature scores between 0 and 1 for all values of the regularization \
|
||||
parameter. The reference article suggests ``scores_`` is the max of \
|
||||
``all_scores_``.
|
||||
|
||||
|
||||
References
|
||||
----------
|
||||
Stability selection
|
||||
Nicolai Meinshausen, Peter Buhlmann
|
||||
Journal of the Royal Statistical Society: Series B
|
||||
Volume 72, Issue 4, pages 417-473, September 2010
|
||||
DOI: 10.1111/j.1467-9868.2010.00740.x
|
||||
|
||||
See also
|
||||
--------
|
||||
RandomizedLogisticRegression, Lasso, ElasticNet
|
||||
"""
|
||||
def __init__(self, alpha='aic', scaling=.5, sample_fraction=.75,
|
||||
n_resampling=200, selection_threshold=.25,
|
||||
fit_intercept=True, verbose=False,
|
||||
normalize=True, precompute='auto',
|
||||
max_iter=500,
|
||||
eps=np.finfo(np.float).eps, random_state=None,
|
||||
n_jobs=1, pre_dispatch='3*n_jobs',
|
||||
memory=None):
|
||||
self.alpha = alpha
|
||||
self.scaling = scaling
|
||||
self.sample_fraction = sample_fraction
|
||||
self.n_resampling = n_resampling
|
||||
self.fit_intercept = fit_intercept
|
||||
self.max_iter = max_iter
|
||||
self.verbose = verbose
|
||||
self.normalize = normalize
|
||||
self.precompute = precompute
|
||||
self.eps = eps
|
||||
self.random_state = random_state
|
||||
self.n_jobs = n_jobs
|
||||
self.selection_threshold = selection_threshold
|
||||
self.pre_dispatch = pre_dispatch
|
||||
self.memory = memory
|
||||
|
||||
def _make_estimator_and_params(self, X, y):
|
||||
alpha = self.alpha
|
||||
if isinstance(alpha, six.string_types) and alpha in ('aic', 'bic'):
|
||||
model = LassoLarsIC(precompute=self.precompute,
|
||||
criterion=self.alpha,
|
||||
max_iter=self.max_iter,
|
||||
eps=self.eps)
|
||||
model.fit(X, y)
|
||||
self.alpha_ = alpha = model.alpha_
|
||||
|
||||
precompute = self.precompute
|
||||
# A precomputed Gram array is useless, since _randomized_lasso
|
||||
# change X a each iteration
|
||||
if hasattr(precompute, '__array__'):
|
||||
precompute = 'auto'
|
||||
assert precompute in (True, False, None, 'auto')
|
||||
return _randomized_lasso, dict(alpha=alpha, max_iter=self.max_iter,
|
||||
eps=self.eps,
|
||||
precompute=precompute)
|
@ -8,45 +8,42 @@
|
||||
* Рекурсивное сокращение признаков (Recursive Feature Elimination – RFE)
|
||||
|
||||
### Запуск программы
|
||||
Файл lab2.py содержит и запускает программу, аргументов и настройки ~~вроде~~ не требует,
|
||||
Программа работает на Python 3.7, поскольку только в нём можно подключить нужную версию библиотеки scikit-learn, которая ещё содержит RandomizedLasso
|
||||
Файл lab2.py содержит и запускает программу, аргументов и настройки ~~вроде~~ не требует.
|
||||
|
||||
### Описание программы
|
||||
Файл rand_lasso.py содержит реализацию RandomizedLasso, которая была 'устарена' со skilearn 0.19 и удалена с 0.21. Код взят с их гита, версии 0.19.
|
||||
Пробовались готовые решения с гита, однако они были либо совсем нерабочими, либо у их результатов не прослеживалось корреляции с остальными моделями, что говорило о их некачественности.
|
||||
|
||||
Файл lab2.py содержит непосредственно программу.
|
||||
|
||||
Программа создаёт набор данных с 10 признаками для последующего их ранжирования, и обрабатывает тремя моделями по варианту.
|
||||
Программа строит столбчатые диаграммы, которые показывают как распределились оценки важности признаков, и выводит в консоль отсортированные по убыванию важности признаки.
|
||||
Таким образом можно легко определить наиважнейшие признаки.
|
||||
|
||||
Сперва в качестве оценщика в модели RFE использовалась линейная регрессия. Однако тогда результаты были идентичны с результатами обычной модели линейной регрессии.
|
||||
Поэтому оценщик был заменён на предложенную в примерах sklearn модель SVR.
|
||||
|
||||
### Результаты тестирования
|
||||
По результатам тестирования, можно сказать следующее:
|
||||
* линейная регрессия и рекурсивное сокращение признаков показывают близкие значения, которые, тем не менее, расходятся в деталях.
|
||||
* случайное лассо показывает сильно завышенные результаты, однако они более-менее коррелируют с результатами других моделей.
|
||||
* средние значения позволяют выявить взвешенный результат.
|
||||
* определить, какая модель ближе к действительности однозначно сказать невозможно из-за разброса.
|
||||
* какая модель (её реализация) дальше всего от действительности наоборот немного очевидно.
|
||||
* линейная регрессия показывает хорошие результаты, выделяет все 9 значимых признаков.
|
||||
* случайное лассо справляется хуже других моделей, иногда выделяя шумовые признаки в значимые, а значимые - в шумовые.
|
||||
* рекурсивное сокращение признаков показывает хорошие результаты, правильно правильно выделяя 9 самых значимых признаков.
|
||||
* хотя линейная регрессия и рекурсивное сокращение признаков правильно выделяют значимые признаки, саму значимость они оценивают по-разному.
|
||||
* среднее значение позволяет c хорошей уверенностью определять истинные значимые признаки.
|
||||
|
||||
Итого. Если необходимо просто ранжирование, достаточно взять модель RFE, однако, если необходимо анализировать признаки по коэффициентам, имея меру (коэффициенты), то брать нужно линейную регрессию. Случайное лассо лучше не надо.
|
||||
|
||||
Пример консольных результатов:
|
||||
|
||||
>Linear regression
|
||||
|
||||
>[('x4', 1.0), ('x1', 0.73), ('x2', 0.73), ('x5', 0.38), ('x10', 0.05), ('x6', 0.03), ('x9', 0.03), ('x3', 0.01), ('x7', 0.01), ('x8', 0.0)]
|
||||
>[('x1', 1.0), ('x4', 0.69), ('x2', 0.61), ('x11', 0.59), ('x3', 0.51), ('x13', 0.48), ('x5', 0.19), ('x12', 0.19), ('x14', 0.12), ('x8', 0.03), ('x6', 0.02), ('x10', 0.01), ('x7', 0.0), ('x9', 0.0)]
|
||||
|
||||
>Random lasso
|
||||
|
||||
>[('x1', 1.0), ('x2', 1.0), ('x4', 1.0), ('x5', 1.0), ('x10', 0.97), ('x6', 0.89), ('x9', 0.82), ('x3', 0.55), ('x7', 0.36), ('x8', 0.0)]
|
||||
>[('x5', 1.0), ('x4', 0.76), ('x2', 0.74), ('x1', 0.72), ('x14', 0.44), ('x12', 0.32), ('x11', 0.28), ('x8', 0.22), ('x6', 0.17), ('x3', 0.08), ('x7', 0.02), ('x13', 0.02), ('x9', 0.01), ('x10', 0.0)]
|
||||
|
||||
>RFE
|
||||
|
||||
>[('x4', 1.0), ('x1', 0.86), ('x2', 0.8), ('x5', 0.44), ('x10', 0.08), ('x6', 0.05), ('x7', 0.04), ('x3', 0.01), ('x8', 0.01), ('x9', 0.0)]
|
||||
>[('x4', 1.0), ('x1', 0.92), ('x11', 0.85), ('x2', 0.77), ('x3', 0.69), ('x13', 0.62), ('x5', 0.54), ('x12', 0.46), ('x14', 0.38), ('x8', 0.31), ('x6', 0.23), ('x10', 0.15), ('x7', 0.08), ('x9', 0.0)]
|
||||
|
||||
>Mean
|
||||
|
||||
>[('x4', 1.0), ('x1', 0.86), ('x2', 0.84), ('x5', 0.61), ('x10', 0.37), ('x6', 0.32), ('x9', 0.28), ('x3', 0.19), ('x7', 0.14), ('x8', 0.0)]
|
||||
>[('x1', 0.88), ('x4', 0.82), ('x2', 0.71), ('x5', 0.58), ('x11', 0.57), ('x3', 0.43), ('x13', 0.37), ('x12', 0.32), ('x14', 0.31), ('x8', 0.19), ('x6', 0.14), ('x10', 0.05), ('x7', 0.03), ('x9', 0.0)]
|
||||
|
||||
По данным результатам можно заключить, что наиболее влиятельные признаки по убыванию: x4, x1, x2, x5.
|
||||
По данным результатам можно заключить, что наиболее влиятельные признаки по убыванию: x1, x4, x2, x5.
|
Loading…
Reference in New Issue
Block a user