import numpy as np from sklearn.manifold import TSNE from matplotlib import pyplot as plt from sklearn.cluster import AgglomerativeClustering import plotly.express as px from constants import * from data import load_data, fit_model, create_data def show_plots(y, x_axis, y_axis, plot_name): only_labels = y.to_list() labels = { "x": "Price", "y": "Value", } fig = px.scatter(None, x=x_axis, y=y_axis, labels=labels, opacity=1, color=only_labels) fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', showline=True, linewidth=1, linecolor='black') fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', showline=True, linewidth=1, linecolor='black') fig.update_layout(title_text=plot_name, plot_bgcolor='white') fig.update_traces(marker=dict(size=4)) fig.write_html(f'images/image.html') def build_launch_name(filename: str, model: TSNE): data = [ filename, ' t-SNE ', 'learning_rate=', str(model.learning_rate), ' ', 'perplexity=', str(model.perplexity), ' ', 'early_exaggeration=', str(model.early_exaggeration), ' ', 'n_iter=', str(model.n_iter), ' ', 'n_iter_without_progress=', str(model.n_iter_without_progress), ' ', 'min_grad_norm=', str(model.min_grad_norm), ' ', 'metric=', str(model.metric), ' ', 'init=', str(model.init), ' ', 'method=', str(model.method), ' ', 'angle=', str(model.angle), ] return ''.join(data) def launch(filename, X, Y, model: TSNE): x_axis, y_axis = fit_model(model, X) plot_name = build_launch_name(filename, model) show_plots(Y, x_axis, y_axis, plot_name=plot_name) def draw_experimental_clusters(): data = create_data() cluster = AgglomerativeClustering(n_clusters=NUMBER_OF_CLUSTERS, affinity='euclidean', linkage='ward') plt.xlabel("Год", fontweight="bold") plt.ylabel("Цена", fontweight="bold") corr = np.array(data) cluster.fit_predict(corr) plt.scatter(corr[:, 0], corr[:, 1], c=cluster.labels_, cmap='rainbow') plt.show() def tnse_cluster(): X, Y = load_data() model = TSNE(n_components=2, learning_rate=200, perplexity=50, early_exaggeration=6, angle=0.1) launch(filename='true_car_listings.csv', X=X, Y=Y, model=model)