73 lines
3.0 KiB
Python
73 lines
3.0 KiB
Python
from flask import Flask, render_template, request
|
|
import pandas as pd
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.linear_model import LogisticRegression
|
|
from sklearn.pipeline import Pipeline
|
|
from sklearn.preprocessing import Binarizer
|
|
import joblib
|
|
|
|
app = Flask(__name__)
|
|
|
|
# Загрузка данных
|
|
data_bgg = pd.read_csv("bgg_dataset.csv", delimiter=";")
|
|
|
|
# Выбор нужных столбцов
|
|
selected_columns_bgg = ['Year Published', 'Users Rated', 'Rating Average', 'BGG Rank', 'Owned Users', 'Complexity Average']
|
|
features = data_bgg[selected_columns_bgg]
|
|
|
|
# Замена запятых на точки в столбцах 'Users Rated' и 'Owned Users'
|
|
features['Rating Average'] = features['Rating Average'].str.replace(',', '.').astype(float)
|
|
features['Complexity Average'] = features['Complexity Average'].str.replace(',', '.').astype(float)
|
|
|
|
# Замена пропущенных значений средними значениями по столбцам
|
|
features = features.fillna(features.mean())
|
|
|
|
# Определение порога для классификации (средний рейтинг)
|
|
threshold = features['Rating Average'].mean()
|
|
|
|
# Разделение данных
|
|
X_bgg = features.drop('Rating Average', axis=1)
|
|
y_bgg = features['Rating Average'] > threshold # Классификация: 1 - выше среднего, 0 - ниже среднего
|
|
X_train_bgg, X_test_bgg, y_train_bgg, y_test_bgg = train_test_split(X_bgg, y_bgg, test_size=0.2, random_state=42)
|
|
|
|
# Обучение модели логистической регрессии
|
|
logistic_regression_model = Pipeline([
|
|
('binarizer', Binarizer(threshold=threshold)),
|
|
('logistic_regression', LogisticRegression())
|
|
])
|
|
logistic_regression_model.fit(X_train_bgg, y_train_bgg)
|
|
|
|
# Сохранение модели
|
|
joblib.dump(logistic_regression_model, 'logistic_regression_model.joblib')
|
|
|
|
# Загрузка модели
|
|
logistic_regression_model = joblib.load('logistic_regression_model.joblib')
|
|
|
|
# Обновление маршрута для предсказания
|
|
@app.route('/')
|
|
def index():
|
|
return render_template('index.html')
|
|
|
|
@app.route('/predict', methods=['POST'])
|
|
def predict():
|
|
if request.method == 'POST':
|
|
# Получение данных из формы
|
|
input_data_bgg = {
|
|
'Year Published': int(request.form['Year Published']),
|
|
'Users Rated': int(request.form['Users Rated']),
|
|
'BGG Rank': int(request.form['BGG Rank']),
|
|
'Owned Users': int(request.form['Owned Users']),
|
|
'Complexity Average': float(request.form['Complexity Average'])
|
|
}
|
|
|
|
# Преобразование данных в DataFrame
|
|
input_df_bgg = pd.DataFrame([input_data_bgg])
|
|
|
|
# Предсказание
|
|
prediction_bgg = logistic_regression_model.predict(input_df_bgg)[0]
|
|
|
|
return render_template('index.html', prediction_bgg=prediction_bgg)
|
|
|
|
if __name__ == '__main__':
|
|
app.run(debug=True)
|