from flask import Flask, render_template import pandas as pd from scipy.cluster.hierarchy import linkage, fcluster import seaborn as sns from sklearn.preprocessing import LabelEncoder import io import base64 app = Flask(__name__) # Load data from student-mat.csv data = pd.read_csv('student-mat.csv') def generate_clusters_image(): # Selecting columns of interest for clustering features = data[['Pstatus', 'guardian', 'internet', 'romantic', 'famrel', 'freetime', 'goout', 'Dalc', 'Walc', 'health', 'absences']] # Convert categorical variables to numerical using LabelEncoder label_encoder = LabelEncoder() features_categorical = features.select_dtypes(include='object') features[features_categorical.columns] = features_categorical.apply(label_encoder.fit_transform) # Calculate linkage matrix linkage_matrix = linkage(features, method='ward') # Cut the tree to form clusters data['cluster'] = fcluster(linkage_matrix, 3, criterion='maxclust') # You can adjust the number of clusters # Drop the 'cluster' column to avoid confusion in sns.clustermap features_for_heatmap = features.copy() # Make a copy to avoid modifying the original DataFrame features_for_heatmap['cluster'] = data['cluster'] features_for_heatmap = features_for_heatmap.drop('cluster', axis=1) # Visualize clusters sns.set(style='white') g = sns.clustermap(features_for_heatmap, col_cluster=False, figsize=(24, 16), cmap="viridis") # Save the image to a buffer img_buffer = io.BytesIO() g.savefig(img_buffer, format='png') img_buffer.seek(0) # Convert the image to a base64 string img_base64 = base64.b64encode(img_buffer.read()).decode() return img_base64 @app.route('/') def index(): clusters_image = generate_clusters_image() return render_template('index.html', clusters_image=clusters_image) if __name__ == '__main__': app.run(port=5000)