IIS_2023_1/abanin_daniil_lab_2/RadomizedLasso.py

from sklearn.utils import check_X_y, check_random_state
from sklearn.linear_model import Lasso
from scipy.sparse import issparse
from scipy import sparse


def _rescale_data(x, weights):
    if issparse(x):
        size = weights.shape[0]
        weight_dia = sparse.dia_matrix((1 - weights, 0), (size, size))
        x_rescaled = x * weight_dia
    else:
        x_rescaled = x * (1 - weights)

    return x_rescaled


class RandomizedLasso(Lasso):
    """
    Randomized version of scikit-learns Lasso class.

    Randomized LASSO is a generalization of the LASSO. The LASSO penalises
    the absolute value of the coefficients with a penalty term proportional
    to `alpha`, but the randomized LASSO changes the penalty to a randomly
    chosen value in the range `[alpha, alpha/weakness]`.

    Parameters
    ----------
    weakness : float
        Weakness value for randomized LASSO. Must be in (0, 1].

    See also
    --------
    sklearn.linear_model.LogisticRegression : learns logistic regression models
    using the same algorithm.
    """
    def __init__(self, weakness=0.5, alpha=1.0, fit_intercept=True,
                 precompute=False, copy_X=True, max_iter=1000,
                 tol=1e-4, warm_start=False, positive=False,
                 random_state=None, selection='cyclic'):
        self.weakness = weakness
        super(RandomizedLasso, self).__init__(
            alpha=alpha, fit_intercept=fit_intercept, precompute=precompute, copy_X=copy_X,
            max_iter=max_iter, tol=tol, warm_start=warm_start,
            positive=positive, random_state=random_state,
            selection=selection)

    def fit(self, X, y):
        """Fit the model according to the given training data.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            The training input samples.

        y : array-like, shape = [n_samples]
            The target values.
        """
        if not isinstance(self.weakness, float) or not (0.0 < self.weakness <= 1.0):
            raise ValueError('weakness should be a float in (0, 1], got %s' % self.weakness)

        X, y = check_X_y(X, y, accept_sparse=True)

        n_features = X.shape[1]
        weakness = 1. - self.weakness
        random_state = check_random_state(self.random_state)

        weights = weakness * random_state.randint(0, 1 + 1, size=(n_features,))

        # TODO: I am afraid this will do double normalization if set to true
        #X, y, _, _ = _preprocess_data(X, y, self.fit_intercept, normalize=self.normalize, copy=False,
        #             sample_weight=None, return_mean=False)

        # TODO: Check if this is a problem if it happens before standardization
        X_rescaled = _rescale_data(X, weights)
        return super(RandomizedLasso, self).fit(X_rescaled, y)
Lab2 2023-10-15 19:33:03 +04:00			`from sklearn.utils import check_X_y, check_random_state`
			`from sklearn.linear_model import Lasso`
			`from scipy.sparse import issparse`
			`from scipy import sparse`


			`def _rescale_data(x, weights):`
			`if issparse(x):`
			`size = weights.shape[0]`
			`weight_dia = sparse.dia_matrix((1 - weights, 0), (size, size))`
			`x_rescaled = x * weight_dia`
			`else:`
			`x_rescaled = x * (1 - weights)`

			`return x_rescaled`


			`class RandomizedLasso(Lasso):`
			`"""`
			`Randomized version of scikit-learns Lasso class.`

			`Randomized LASSO is a generalization of the LASSO. The LASSO penalises`
			`the absolute value of the coefficients with a penalty term proportional`
			to `alpha`, but the randomized LASSO changes the penalty to a randomly
			chosen value in the range `[alpha, alpha/weakness]`.

			`Parameters`
			`----------`
			`weakness : float`
			`Weakness value for randomized LASSO. Must be in (0, 1].`

			`See also`
			`--------`
			`sklearn.linear_model.LogisticRegression : learns logistic regression models`
			`using the same algorithm.`
			`"""`
			`def __init__(self, weakness=0.5, alpha=1.0, fit_intercept=True,`
			`precompute=False, copy_X=True, max_iter=1000,`
			`tol=1e-4, warm_start=False, positive=False,`
			`random_state=None, selection='cyclic'):`
			`self.weakness = weakness`
			`super(RandomizedLasso, self).__init__(`
			`alpha=alpha, fit_intercept=fit_intercept, precompute=precompute, copy_X=copy_X,`
			`max_iter=max_iter, tol=tol, warm_start=warm_start,`
			`positive=positive, random_state=random_state,`
			`selection=selection)`

			`def fit(self, X, y):`
			`"""Fit the model according to the given training data.`

			`Parameters`
			`----------`
			`X : {array-like, sparse matrix}, shape = [n_samples, n_features]`
			`The training input samples.`

			`y : array-like, shape = [n_samples]`
			`The target values.`
			`"""`
			`if not isinstance(self.weakness, float) or not (0.0 < self.weakness <= 1.0):`
			`raise ValueError('weakness should be a float in (0, 1], got %s' % self.weakness)`

			`X, y = check_X_y(X, y, accept_sparse=True)`

			`n_features = X.shape[1]`
			`weakness = 1. - self.weakness`
			`random_state = check_random_state(self.random_state)`

			`weights = weakness * random_state.randint(0, 1 + 1, size=(n_features,))`

			`# TODO: I am afraid this will do double normalization if set to true`
			`#X, y, _, _ = _preprocess_data(X, y, self.fit_intercept, normalize=self.normalize, copy=False,`
			`# sample_weight=None, return_mean=False)`

			`# TODO: Check if this is a problem if it happens before standardization`
			`X_rescaled = _rescale_data(X, weights)`
			`return super(RandomizedLasso, self).fit(X_rescaled, y)`