115 lines
3.1 KiB
Python
115 lines
3.1 KiB
Python
|
from flask import Flask, request, render_template
|
||
|
from sklearn.linear_model import LinearRegression, Ridge, Lasso
|
||
|
from sklearn.preprocessing import MinMaxScaler
|
||
|
import pandas as pd
|
||
|
import numpy as np
|
||
|
|
||
|
app = Flask(__name__)
|
||
|
|
||
|
# Generate random data
|
||
|
np.random.seed(0)
|
||
|
size = 750
|
||
|
X = np.random.uniform(0, 1, (size, 14))
|
||
|
Y = (10 * np.sin(np.pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - .5) ** 2 +
|
||
|
10 * X[:, 3] + 5 * X[:, 4] ** 5 + np.random.normal(0, 1, size))
|
||
|
X[:, 10:] = X[:, :4] + np.random.normal(0, .025, (size, 4))
|
||
|
|
||
|
# Define and train models
|
||
|
lr = LinearRegression()
|
||
|
ridge = Ridge()
|
||
|
lasso = Lasso(alpha=0.1) # Example alpha value, you can adjust
|
||
|
|
||
|
# Number of bootstrap samples
|
||
|
n_bootstrap = 100
|
||
|
|
||
|
# Dictionary to store rankings
|
||
|
feature_rankings = {}
|
||
|
|
||
|
|
||
|
# Function to rank features using linear regression
|
||
|
def rank_lr():
|
||
|
lr.fit(X, Y)
|
||
|
coef = lr.coef_
|
||
|
ranking = np.abs(coef)
|
||
|
ranking = min_max_scale(ranking)
|
||
|
return ranking
|
||
|
|
||
|
|
||
|
# Function to rank features using ridge regression
|
||
|
def rank_ridge():
|
||
|
ridge.fit(X, Y)
|
||
|
coef = ridge.coef_
|
||
|
ranking = np.abs(coef)
|
||
|
ranking = min_max_scale(ranking)
|
||
|
return ranking
|
||
|
|
||
|
# Function to rank features using lasso
|
||
|
def rank_lasso():
|
||
|
lasso.fit(X, Y)
|
||
|
coef = lasso.coef_
|
||
|
ranking = np.abs(coef)
|
||
|
ranking = min_max_scale(ranking)
|
||
|
return ranking
|
||
|
|
||
|
# Function to rank features using lasso with bootstrap
|
||
|
def rank_randomized_lasso():
|
||
|
n_features = X.shape[1]
|
||
|
scores = np.zeros(n_features)
|
||
|
|
||
|
for _ in range(n_bootstrap):
|
||
|
indices = np.random.choice(size, size=size, replace=True)
|
||
|
X_bootstrap = X[indices]
|
||
|
Y_bootstrap = Y[indices]
|
||
|
|
||
|
lasso.fit(X_bootstrap, Y_bootstrap)
|
||
|
scores += np.abs(lasso.coef_)
|
||
|
|
||
|
scores /= n_bootstrap
|
||
|
scores = min_max_scale(scores)
|
||
|
return scores
|
||
|
|
||
|
|
||
|
# Function to scale scores to the range [0, 1]
|
||
|
def min_max_scale(arr):
|
||
|
scaler = MinMaxScaler()
|
||
|
scaled = scaler.fit_transform(np.array(arr).reshape(-1, 1))
|
||
|
return scaled
|
||
|
|
||
|
|
||
|
# Function to perform ranking and compute mean ranking
|
||
|
def rank_features():
|
||
|
feature_rankings['Linear Regression'] = rank_lr()
|
||
|
feature_rankings['Ridge'] = rank_ridge()
|
||
|
feature_rankings['Lasso'] = rank_lasso()
|
||
|
feature_rankings['Randomized Lasso'] = rank_randomized_lasso()
|
||
|
|
||
|
# Mean ranking
|
||
|
mean_ranking = np.mean(list(feature_rankings.values()), axis=0)
|
||
|
feature_rankings['Mean Ranking'] = mean_ranking
|
||
|
|
||
|
# Get indices of the top 4 most important features
|
||
|
top_4_indices = np.argsort(mean_ranking)[-4:][::-1]
|
||
|
|
||
|
# Get feature names based on indices
|
||
|
top_4_feature_names = [f'Feature {i + 1}' for i in top_4_indices]
|
||
|
|
||
|
# Add X and Y values to the context
|
||
|
return {
|
||
|
'feature_rankings': feature_rankings,
|
||
|
'X_values': X[:, top_4_indices].tolist(),
|
||
|
'Y_values': Y.tolist(),
|
||
|
'top_4_feature_names': top_4_feature_names # Add the most important features
|
||
|
}
|
||
|
|
||
|
|
||
|
@app.route('/', methods=['GET', 'POST'])
|
||
|
def index():
|
||
|
if request.method == 'POST':
|
||
|
context = rank_features()
|
||
|
return render_template('index.html', **context)
|
||
|
return render_template('index.html', feature_rankings=feature_rankings)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
app.run(debug=True)
|