IIS_2023_1/volkov_rafael_lab_2/app.py
2023-12-05 12:27:36 +04:00

97 lines
2.8 KiB
Python

from flask import Flask, request, render_template
from sklearn.linear_model import Lasso, LinearRegression
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
import numpy as np
app = Flask(__name__)
# Generate random data
np.random.seed(0)
size = 750
X = np.random.uniform(0, 1, (size, 14))
Y = (10 * np.sin(np.pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - .5) ** 2 +
10 * X[:, 3] + 5 * X[:, 4] ** 5 + np.random.normal(0, 1, size))
X[:, 10:] = X[:, :4] + np.random.normal(0, .025, (size, 4))
# Define and train models
lasso = Lasso(alpha=0.1) # Example alpha value, you can adjust
rfe = RFE(estimator=LinearRegression(), n_features_to_select=1)
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=0)
# Number of bootstrap samples
n_bootstrap = 100
# Dictionary to store rankings
feature_rankings = {}
# Function to rank features using Lasso
def rank_lasso():
lasso.fit(X, Y)
coef = lasso.coef_
ranking = np.abs(coef)
ranking = min_max_scale(ranking)
return ranking
# Function to rank features using RFE
def rank_rfe():
rfe.fit(X, Y)
ranking = rfe.ranking_
ranking = min_max_scale(ranking)
return ranking
# Function to rank features using Random Forest Regressor
def rank_random_forest_regressor():
rf_regressor.fit(X, Y)
ranking = rf_regressor.feature_importances_
ranking = min_max_scale(ranking)
return ranking
# Function to scale scores to the range [0, 1]
def min_max_scale(arr):
scaler = MinMaxScaler()
scaled = scaler.fit_transform(np.array(arr).reshape(-1, 1))
return scaled
# Function to perform ranking and compute mean ranking
def rank_features():
feature_rankings['Lasso'] = rank_lasso()
feature_rankings['RFE'] = rank_rfe()
feature_rankings['Random Forest Regressor'] = rank_random_forest_regressor()
# Mean ranking
mean_ranking = np.mean(list(feature_rankings.values()), axis=0)
feature_rankings['Mean Ranking'] = mean_ranking
# Get indices of the top 4 most important features
top_4_indices = np.argsort(mean_ranking)[-4:][::-1]
# Get feature names based on indices
top_4_feature_names = [f'Feature {i + 1}' for i in top_4_indices]
# Add X and Y values to the context
return {
'feature_rankings': feature_rankings,
'X_values': X[:, top_4_indices].tolist(),
'Y_values': Y.tolist(),
'top_4_feature_names': top_4_feature_names # Add the most important features
}
@app.route('/', methods=['GET', 'POST'])
def index():
if request.method == 'POST':
context = rank_features()
return render_template('index.html', **context)
return render_template('index.html', feature_rankings=feature_rankings)
if __name__ == '__main__':
app.run(debug=True)