diff --git a/lipatov_ilya_lab_2/README.md b/lipatov_ilya_lab_2/README.md new file mode 100644 index 0000000..e2c4911 --- /dev/null +++ b/lipatov_ilya_lab_2/README.md @@ -0,0 +1,38 @@ +## Лабораторная работа №2 + +### Ранжирование признаков + +## Выполнил студент группы ПИбд-41 Липатов Илья + +### Как запустить лабораторную работу: + +* установить python, numpy, matplotlib, sklearn +* запустить проект (стартовая точка класс lab2) + +### Какие технологии использовались: + +* Язык программирования `Python`, библиотеки numpy, matplotlib, sklearn +* Среда разработки `PyCharm` + +### Что делает лабораторная работа: + +* генерирует данные и обучает модели модели RandomizedLasso, Ridge,Random Forest Regressor. +* ранжирует признаки с помощью моделей RandomizedLasso, Ridge,Random Forest Regressor. +* отображает получившиеся значения\оценки каждого признака каждым методом\моделью и среднюю оценку. + +### Примеры работы: + +#### Результаты: +* RandomizedLasso: 1, 2, 4, 5 +* Ridge: 4, 11, 12 и 1 или 2 (одинаковый результат) +* Random Forest Regressor: 4, 1 11, 12 + +#### Среднее: 4, 1, 2 и 5 признаки + +#### Графики результатов ранжирования признаков по каждой модели и средняя оценка: + +![Result](result.png) + +#### Средние оценки для признаков у каждой модели и средние оценки моделей: + +![Means](means.png) \ No newline at end of file diff --git a/lipatov_ilya_lab_2/RandomizedLasso.py b/lipatov_ilya_lab_2/RandomizedLasso.py new file mode 100644 index 0000000..0140286 --- /dev/null +++ b/lipatov_ilya_lab_2/RandomizedLasso.py @@ -0,0 +1,44 @@ +from sklearn.utils import check_X_y, check_random_state +from sklearn.linear_model import Lasso +from scipy.sparse import issparse +from pandas._libs import sparse + + +def _rescale_data(x, weights): + if issparse(x): + size = weights.shape[0] + weight_dia = sparse.dia_matrix((1 - weights, 0), (size, size)) + x_rescaled = x * weight_dia + else: + x_rescaled = x * (1 - weights) + + return x_rescaled + + +class RandomizedLasso(Lasso): + def __init__(self, weakness=0.5, alpha=1.0, fit_intercept=True, normalize=False, + precompute=False, copy_x=True, max_iter=1000, + tol=1e-4, warm_start=False, positive=False, + random_state=None, selection='cyclic'): + self.weakness = weakness + super(RandomizedLasso, self).__init__( + alpha=alpha, fit_intercept=fit_intercept, + normalize=normalize, precompute=precompute, copy_X=copy_x, + max_iter=max_iter, tol=tol, warm_start=warm_start, + positive=positive, random_state=random_state, + selection=selection) + + def fit(self, x, y): + if not isinstance(self.weakness, float) or not (0.0 < self.weakness <= 1.0): + raise ValueError('weakness should be a float in (0, 1], got %s' % self.weakness) + + x, y = check_X_y(x, y, accept_sparse=True) + + n_features = x.shape[1] + weakness = 1. - self.weakness + random_state = check_random_state(self.random_state) + + weights = weakness * random_state.randint(0, 1 + 1, size=(n_features,)) + + x_rescaled = _rescale_data(x, weights) + return super(RandomizedLasso, self).fit(x_rescaled, y) diff --git a/lipatov_ilya_lab_2/lab2.py b/lipatov_ilya_lab_2/lab2.py new file mode 100644 index 0000000..59d48b4 --- /dev/null +++ b/lipatov_ilya_lab_2/lab2.py @@ -0,0 +1,67 @@ +from sklearn.ensemble import RandomForestRegressor +from sklearn.preprocessing import MinMaxScaler +from RandomizedLasso import RandomizedLasso +from sklearn.linear_model import Ridge +from matplotlib import pyplot as plt +import numpy as np + +np.random.seed(0) +size = 1000 +X = np.random.uniform(0, 1, (size, 14)) +Y = (10 * np.sin(np.pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - .5) ** 2 + 10 * X[:, 3] + 5 * X[:, 4] ** 5 + np.random.normal(0, 1)) +X[:, 10:] = X[:, :4] + np.random.normal(0, .025, (size, 4)) + +ridge = Ridge(alpha=1.0) +ridge.fit(X, Y) + +lasso = RandomizedLasso(alpha=0.007) +lasso.fit(X, Y) + +randForestRegression = RandomForestRegressor(max_depth=4, min_samples_leaf=1, min_impurity_decrease=0, ccp_alpha=0) +randForestRegression.fit(X, Y) + + +def rank_to_dict(ranks, names): + ranks = np.abs(ranks) + minmax = MinMaxScaler() + ranks = minmax.fit_transform(np.array(ranks).reshape(14, 1)).ravel() + ranks = map(lambda x: round(x, 2), ranks) + return dict(zip(names, ranks)) + + +ranks = {'Ridge': {}, 'RandomizedLasso': {}, 'RandomForestRegressor': {}} +names = ["x%s" % i for i in range(1, 15)] + +ranks["Ridge"] = rank_to_dict(ridge.coef_, names) +ranks["RandomizedLasso"] = rank_to_dict(lasso.coef_, names) +ranks["RandomForestRegressor"] = rank_to_dict(randForestRegression.feature_importances_, names) + +mean = {} + +for key, value in ranks.items(): + for item in value.items(): + if item[0] not in mean: + mean[item[0]] = 0 + mean[item[0]] += item[1] + +for key, value in mean.items(): + res = value / len(ranks) + mean[key] = round(res, 2) + +print('VALUES') + +for r in ranks.items(): + print(r) + print('MEAN') + print(mean) + +for i, (model_name, features) in enumerate(ranks.items()): + subplot = plt.subplot(2, 2, i + 1) + subplot.set_title(model_name) + subplot.bar(list(features.keys()), list(features.values())) + +subplot = plt.subplot(2, 2, 4) +subplot.set_title('Mean') +subplot.bar(list(mean.keys()), list(mean.values())) + +plt.show() diff --git a/lipatov_ilya_lab_2/means.png b/lipatov_ilya_lab_2/means.png new file mode 100644 index 0000000..632cec2 Binary files /dev/null and b/lipatov_ilya_lab_2/means.png differ diff --git a/lipatov_ilya_lab_2/result.png b/lipatov_ilya_lab_2/result.png new file mode 100644 index 0000000..ceeec39 Binary files /dev/null and b/lipatov_ilya_lab_2/result.png differ