73 lines
2.5 KiB
Python
73 lines
2.5 KiB
Python
import numpy as np
|
|
from sklearn.manifold import TSNE
|
|
from matplotlib import pyplot as plt
|
|
from sklearn.cluster import AgglomerativeClustering
|
|
import plotly.express as px
|
|
from constants import *
|
|
from data import load_data, fit_model, create_data
|
|
|
|
|
|
def show_plots(y, x_axis, y_axis, plot_name):
|
|
only_labels = y.to_list()
|
|
|
|
labels = {
|
|
"x": "Price",
|
|
"y": "Value",
|
|
}
|
|
|
|
fig = px.scatter(None, x=x_axis, y=y_axis, labels=labels, opacity=1, color=only_labels)
|
|
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey',
|
|
zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey',
|
|
showline=True, linewidth=1, linecolor='black')
|
|
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey',
|
|
zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey',
|
|
showline=True, linewidth=1, linecolor='black')
|
|
|
|
fig.update_layout(title_text=plot_name, plot_bgcolor='white')
|
|
fig.update_traces(marker=dict(size=4))
|
|
|
|
fig.write_html(f'images/image.html')
|
|
|
|
|
|
def build_launch_name(filename: str, model: TSNE):
|
|
data = [
|
|
filename, ' t-SNE ',
|
|
'learning_rate=', str(model.learning_rate), ' ',
|
|
'perplexity=', str(model.perplexity), ' ',
|
|
'early_exaggeration=', str(model.early_exaggeration), ' ',
|
|
'n_iter=', str(model.n_iter), ' ',
|
|
'n_iter_without_progress=', str(model.n_iter_without_progress), ' ',
|
|
'min_grad_norm=', str(model.min_grad_norm), ' ',
|
|
'metric=', str(model.metric), ' ',
|
|
'init=', str(model.init), ' ',
|
|
'method=', str(model.method), ' ',
|
|
'angle=', str(model.angle),
|
|
]
|
|
return ''.join(data)
|
|
|
|
|
|
def launch(filename, X, Y, model: TSNE):
|
|
x_axis, y_axis = fit_model(model, X)
|
|
plot_name = build_launch_name(filename, model)
|
|
show_plots(Y, x_axis, y_axis, plot_name=plot_name)
|
|
|
|
|
|
def draw_experimental_clusters():
|
|
data = create_data()
|
|
cluster = AgglomerativeClustering(n_clusters=NUMBER_OF_CLUSTERS, affinity='euclidean', linkage='ward')
|
|
|
|
plt.xlabel("Год", fontweight="bold")
|
|
plt.ylabel("Цена", fontweight="bold")
|
|
|
|
corr = np.array(data)
|
|
cluster.fit_predict(corr)
|
|
|
|
plt.scatter(corr[:, 0], corr[:, 1], c=cluster.labels_, cmap='rainbow')
|
|
plt.show()
|
|
|
|
|
|
def tnse_cluster():
|
|
X, Y = load_data()
|
|
model = TSNE(n_components=2, learning_rate=200, perplexity=50, early_exaggeration=6, angle=0.1)
|
|
launch(filename='true_car_listings.csv', X=X, Y=Y, model=model)
|