139 lines
4.9 KiB
Python
139 lines
4.9 KiB
Python
|
import pandas
|
||
|
import numpy as np
|
||
|
from flask import Flask
|
||
|
from sklearn.feature_selection import RFE
|
||
|
from sklearn.linear_model import Lasso, LinearRegression
|
||
|
from sklearn.preprocessing import MinMaxScaler
|
||
|
|
||
|
app = Flask(__name__)
|
||
|
|
||
|
|
||
|
@app.route("/")
|
||
|
def home():
|
||
|
return "<html>" \
|
||
|
"<h1>Жукова Алина ПИбд-41</h1>" \
|
||
|
"<h1>Лабораторная работа №2</h1>" \
|
||
|
"<table>" \
|
||
|
"<td>" \
|
||
|
"<form Action='http://127.0.0.1:5000/k4_1_task_2' Method=get>" \
|
||
|
"<input type=submit value='Ранжирование признаков'>" \
|
||
|
"</form>" \
|
||
|
"</td>" \
|
||
|
"</table>" \
|
||
|
"</html>"
|
||
|
|
||
|
# Ранжирование признаков
|
||
|
# Линейная регрессия, Лассо, Рекурсивное сокращение признаков
|
||
|
# Отобразить оценки/значения каждого признака каждым методом, и среднюю оценку
|
||
|
# 4 самых важных признака (индексы/названия)
|
||
|
@app.route("/k4_1_task_2", methods=['GET'])
|
||
|
def k4_1_task_2():
|
||
|
# Генерация даных
|
||
|
np.random.seed(0)
|
||
|
size = 750
|
||
|
X = np.random.uniform(0, 1, (size, 14))
|
||
|
# Функция выход: регрессионная проблема Фридмана
|
||
|
Y = (10 * np.sin(np.pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - .5)**2 +
|
||
|
10*X[:, 3] + 5*X[:, 4]**5 + np.random.normal(0, 1))
|
||
|
# Зависимость признаков
|
||
|
X[:, 10:] = X[:, : 4] + np.random.normal(0, .025, (size, 4))
|
||
|
|
||
|
linear_Regr = LinearRegression()
|
||
|
linear_Regr.fit(X, Y)
|
||
|
|
||
|
lasso = Lasso(alpha=.05)
|
||
|
lasso.fit(X, Y)
|
||
|
|
||
|
recurs_sokr_RFE = RFE(linear_Regr, n_features_to_select=1)
|
||
|
recurs_sokr_RFE.fit(X, Y)
|
||
|
|
||
|
new_RFE = []
|
||
|
elem_ind = 0
|
||
|
for value in recurs_sokr_RFE.ranking_:
|
||
|
new_RFE.append(recurs_sokr_RFE.ranking_.max() - value + 1)
|
||
|
elem_ind += 1
|
||
|
|
||
|
|
||
|
names = ["x%s" % i for i in range(1, 15)]
|
||
|
|
||
|
ranks = {}
|
||
|
ranks["Linear"] = rank_to_dict(linear_Regr.coef_, names)
|
||
|
ranks["Lasso"] = rank_to_dict(lasso.coef_, names)
|
||
|
ranks["RFE"] = rank_to_dict(np.array(new_RFE), names)
|
||
|
|
||
|
mean = {}
|
||
|
# Пройти по словарю
|
||
|
for key, value in ranks.items():
|
||
|
# Пройти по списку значений которые являются парой имя:оценка
|
||
|
for item in value.items():
|
||
|
# Имя = ключ для mean
|
||
|
if(item[0] not in mean):
|
||
|
mean[item[0]] = 0
|
||
|
mean[item[0]] += item[1]
|
||
|
|
||
|
for key, value in mean.items():
|
||
|
res = value/len(ranks)
|
||
|
mean[key] = round(res, 2)
|
||
|
|
||
|
mean_1 = [None, None]
|
||
|
mean_2 = [None, None]
|
||
|
mean_3 = [None, None]
|
||
|
mean_4 = [None, None]
|
||
|
|
||
|
for key, value in mean.items():
|
||
|
if(mean_1[0] == None or value >= mean_1[0]):
|
||
|
mean_4 = mean_3
|
||
|
mean_3 = mean_2
|
||
|
mean_2 = mean_1
|
||
|
mean_1 = [value, key]
|
||
|
else :
|
||
|
if (mean_2[0] == None or value >= mean_2[0]):
|
||
|
mean_4 = mean_3
|
||
|
mean_3 = mean_2
|
||
|
mean_2 = [value, key]
|
||
|
else:
|
||
|
if (mean_3[0] == None or value >= mean_3[0]):
|
||
|
mean_4 = mean_3
|
||
|
mean_3 = [value, key]
|
||
|
else:
|
||
|
if (mean_4[0] == None or value >= mean_4[0]):
|
||
|
mean_4 = [value, key]
|
||
|
|
||
|
mean_mass = []
|
||
|
mass_linear = []
|
||
|
mass_lasso = []
|
||
|
mass_RFE = []
|
||
|
for key, value in mean.items():
|
||
|
mean_mass.append(value)
|
||
|
for key, value in ranks['Linear'].items():
|
||
|
mass_linear.append(value)
|
||
|
for key, value in ranks['Lasso'].items():
|
||
|
mass_lasso.append(value)
|
||
|
for key, value in ranks['RFE'].items():
|
||
|
mass_RFE.append(value)
|
||
|
|
||
|
data = {"Linear": mass_linear,
|
||
|
"Lasso": mass_lasso,
|
||
|
"RFE": mass_RFE,
|
||
|
"Mean": mean_mass}
|
||
|
df = pandas.DataFrame(data, index=names)
|
||
|
|
||
|
return "<html>" \
|
||
|
"<h1>Ранжирование признаков</h1>" \
|
||
|
"<h2>Вариант 10. Линейная регрессия, Лассо, Рекурсивное сокращение признаков</h2>" \
|
||
|
"<h2>Отобразить оценки/значения каждого признака каждым методом, и среднюю оценку</h2>" \
|
||
|
"<h2>4 самых важных признака: 1) " + mean_1[1] + "; 2) " + mean_2[1] + "; 3) " + mean_3[1] + "; 4) " + mean_4[1] + "</h2>" \
|
||
|
"<div>" + df.to_html() + "</div>" \
|
||
|
"</html>"
|
||
|
|
||
|
def rank_to_dict(ranks, names):
|
||
|
ranks = np.abs(ranks)
|
||
|
|
||
|
minmax = MinMaxScaler()
|
||
|
|
||
|
ranks = minmax.fit_transform(np.array(ranks).reshape(14, 1)).ravel()
|
||
|
ranks = map(lambda x: round(x, 2), ranks)
|
||
|
return dict(zip(names, ranks))
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
app.run(debug=True)
|