import pandas
from flask import Flask, render_template
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
app = Flask(__name__)
@app.route("/")
def home():
return "" \
"
Жукова Алина ПИбд-41
" \
"Лабораторная работа №4
" \
"" \
""
# Кластеризация
# 4 Решите задачу кластеризации методом k-means
@app.route("/k4_1_task_4", methods=['GET'])
def k4_1_task_4():
data = pandas.read_csv('Data_chess_games.csv')
data = data.loc[data['created_at'] - data['last_move_at'] != 0]
data = data.drop_duplicates()
i = len(data)
data = data.sample(n=5000, replace=True, random_state=1)
count_klasters = 3
labels_clasters = ["Очень напряженная", "Напряженная", "Спокойная"]
# отбор нужных столбцов
corr = data[['rated', 'turns', 'white_rating', 'black_rating', 'created_at', 'last_move_at', 'increment_code']]
# Добавление времени игры
corr['time_game'] = corr['last_move_at'] - corr['created_at']
corr['middle_time_turn'] = corr['time_game'] / corr['turns']
def formatted_float(ch):
return float('{:.2f}'.format(ch))
corr['middle_time_turn'] = corr['middle_time_turn'].apply(formatted_float)
def true_false_changer(str_true_false):
if (str_true_false == True):
return 1
else:
if (str_true_false == False):
return 0
return 2
corr['new_rated'] = corr['rated'].apply(true_false_changer)
corr = corr[['new_rated', 'middle_time_turn', 'white_rating', 'black_rating', 'increment_code']]
corr = corr.loc[corr['middle_time_turn'] < 10000000]
# Преобразование системы учета времени к численным значениям
def new_code(code):
return (int(code.split("+")[1]) * 100) + int(code.split("+")[0])
corr['n_increment_code'] = corr['increment_code'].apply(new_code)
# Нормирование к 1
max_zn = corr['middle_time_turn'].max()
min_zn = corr['middle_time_turn'].min()
def normirovanie(elem):
return ((elem - min_zn) / (max_zn - min_zn))
weight = 1
def WeightAdd(elem):
return elem * weight
weight = 1
corr['new_rated'] = corr['new_rated'].apply(WeightAdd)
weight = 1
corr['middle_time_turn'] = corr['middle_time_turn'].apply(normirovanie).apply(WeightAdd)
max_zn = corr['white_rating'].max()
min_zn = corr['white_rating'].min()
corr['white_rating'] = corr['white_rating'].apply(normirovanie)
max_zn = corr['black_rating'].max()
min_zn = corr['black_rating'].min()
corr['black_rating'] = corr['black_rating'].apply(normirovanie)
max_zn = corr['n_increment_code'].max()
min_zn = corr['n_increment_code'].min()
weight = 1
corr['n_increment_code'] = corr['n_increment_code'].apply(normirovanie).apply(WeightAdd)
def ObratnoeNormirovanie(elem):
return 1 - elem
corr['n_increment_code'] = corr['n_increment_code'].apply(ObratnoeNormirovanie)
# Исключение из модели рейтинговой игры
corr = corr[['new_rated', 'middle_time_turn', 'white_rating', 'black_rating', 'n_increment_code']]
# создание и обучение алгоритма
kmeans = KMeans(n_clusters=count_klasters)
kmeans.fit(corr)
accuracy = silhouette_score(corr, kmeans.labels_, metric='euclidean')
colors_mass = ['blue', 'red', 'orange', 'green', 'black', 'gold', 'purple', 'pink', 'olive', 'gray',
'cyan', 'crimson', 'royalblue', 'greenyellow', 'maroon']
for klaster in range(count_klasters):
ind = 0
matr_x = []
matr_y = []
for label in kmeans.labels_:
if(label == klaster):
elem = corr.iloc[ind]
X = elem['white_rating']
Y = elem['new_rated'] + elem['middle_time_turn'] + elem['n_increment_code'] + elem['black_rating']
matr_x.append(X)
matr_y.append(Y)
ind += 1
plt.scatter(matr_x, matr_y, alpha=0.6, s=25, c=colors_mass[klaster])
plt.savefig('static/k4_1_4_klaster.png')
return "" \
"Кластеризация
" \
"Вариант 10. Задание 4 - метод Kmeans
" \
" Точность модели по метрике Силуэт: " + str(accuracy) + "
" \
"" + render_template('4_1_l4_figure1.html') + "
" \
""
if __name__ == "__main__":
app.run(debug=True)