Predict for TV
This commit is contained in:
@@ -1,18 +1,24 @@
|
|||||||
from fastapi import APIRouter, HTTPException
|
from fastapi import APIRouter, HTTPException
|
||||||
from schemas.schemas import LaptopCreate, LaptopResponse, PredictPriceResponse
|
from schemas.schemas import LaptopCreate, TVCreate, PredictPriceResponse
|
||||||
from services.service import LaptopService
|
from services.service import LaptopService, TVService
|
||||||
import os
|
import os
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
# Инициализация сервиса
|
# Инициализация сервиса
|
||||||
MODEL_PATH = os.getenv("MODEL_PATH", "services/ml/laptop_price_model.pkl")
|
MODEL_PATH = os.getenv("MODEL_PATH", "services/ml/laptopML/laptop_price_model.pkl")
|
||||||
FEATURE_COLUMNS_PATH = os.getenv("FEATURE_COLUMNS_PATH", "services/ml/feature_columns.pkl")
|
FEATURE_COLUMNS_PATH = os.getenv("FEATURE_COLUMNS_PATH", "services/ml/laptopML/feature_columns.pkl")
|
||||||
POLY_PATH = os.getenv("POLY_PATH", "services/ml/poly_transformer.pkl")
|
POLY_PATH = os.getenv("POLY_PATH", "services/ml/laptopML/poly_transformer.pkl")
|
||||||
SCALER_PATH = os.getenv("SCALER_PATH", "services/ml/scaler.pkl")
|
SCALER_PATH = os.getenv("SCALER_PATH", "services/ml/laptopML/scaler.pkl")
|
||||||
laptop_service = LaptopService(model_path=MODEL_PATH, feature_columns_path=FEATURE_COLUMNS_PATH, poly_path=POLY_PATH, scaler_path=SCALER_PATH)
|
laptop_service = LaptopService(model_path=MODEL_PATH, feature_columns_path=FEATURE_COLUMNS_PATH, poly_path=POLY_PATH, scaler_path=SCALER_PATH)
|
||||||
|
|
||||||
@router.post("/predict_price/", response_model=PredictPriceResponse, summary="Predict laptop price", description="Predict the price of a laptop based on its specifications.", response_description="The predicted price of the laptop.")
|
MODEL_PATH = os.getenv("MODEL_PATH", "services/ml/tvML/tv_price_model.pkl")
|
||||||
|
FEATURE_COLUMNS_PATH = os.getenv("FEATURE_COLUMNS_PATH", "services/ml/tvML/feature_columns_tv.pkl")
|
||||||
|
POLY_PATH = os.getenv("POLY_PATH", "services/ml/tvML/poly_transformer.pkl")
|
||||||
|
SCALER_PATH = os.getenv("SCALER_PATH", "services/ml/tvML/scaler.pkl")
|
||||||
|
tv_service = TVService(model_path=MODEL_PATH, feature_columns_path=FEATURE_COLUMNS_PATH, poly_path=POLY_PATH, scaler_path=SCALER_PATH)
|
||||||
|
|
||||||
|
@router.post("/predict_price/laptop/", response_model=PredictPriceResponse, summary="Predict laptop price", description="Predict the price of a laptop based on its specifications.", response_description="The predicted price of the laptop.")
|
||||||
def predict_price(data: LaptopCreate):
|
def predict_price(data: LaptopCreate):
|
||||||
"""
|
"""
|
||||||
Predict the price of a laptop given its specifications.
|
Predict the price of a laptop given its specifications.
|
||||||
@@ -27,3 +33,10 @@ def predict_price(data: LaptopCreate):
|
|||||||
return laptop_service.predict_price(data.dict())
|
return laptop_service.predict_price(data.dict())
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise HTTPException(status_code=400, detail=str(e))
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
|
||||||
|
@router.post("/predict_price/tv/", response_model=PredictPriceResponse, summary="Predict TV price", description="Predict the price of a TV based on its specifications.", response_description="The predicted price of the TV.")
|
||||||
|
def predict_price(data: TVCreate):
|
||||||
|
try:
|
||||||
|
return tv_service.predict_price(data.dict())
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
@@ -17,3 +17,15 @@ class Laptop(Base):
|
|||||||
battery_size = Column(Integer)
|
battery_size = Column(Integer)
|
||||||
release_year = Column(Integer)
|
release_year = Column(Integer)
|
||||||
display_type = Column(String, index=True)
|
display_type = Column(String, index=True)
|
||||||
|
|
||||||
|
class TV(Base):
|
||||||
|
__tablename__ = "tvs"
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, index=True)
|
||||||
|
display = Column(String, index=True)
|
||||||
|
tuners = Column(String)
|
||||||
|
features = Column(String)
|
||||||
|
os = Column(String)
|
||||||
|
power_of_volume = Column(String)
|
||||||
|
screen_sizes: int
|
||||||
|
color = Column(String)
|
||||||
@@ -14,6 +14,15 @@ class LaptopCreate(BaseModel):
|
|||||||
release_year: int
|
release_year: int
|
||||||
display_type: str
|
display_type: str
|
||||||
|
|
||||||
|
class TVCreate(BaseModel):
|
||||||
|
display: str
|
||||||
|
tuners: str
|
||||||
|
features: str
|
||||||
|
os: str
|
||||||
|
power_of_volume: str
|
||||||
|
screen_sizes: int
|
||||||
|
color: str
|
||||||
|
|
||||||
class LaptopResponse(BaseModel):
|
class LaptopResponse(BaseModel):
|
||||||
id: int
|
id: int
|
||||||
brand: str
|
brand: str
|
||||||
@@ -31,5 +40,15 @@ class LaptopResponse(BaseModel):
|
|||||||
class Config:
|
class Config:
|
||||||
orm_mode = True
|
orm_mode = True
|
||||||
|
|
||||||
|
class TVResponse(BaseModel):
|
||||||
|
id: int
|
||||||
|
display: str
|
||||||
|
tuners: str
|
||||||
|
features: str
|
||||||
|
os: str
|
||||||
|
power_of_volume: str
|
||||||
|
screen_sizes: int
|
||||||
|
color: str
|
||||||
|
|
||||||
class PredictPriceResponse(BaseModel):
|
class PredictPriceResponse(BaseModel):
|
||||||
predicted_price: float
|
predicted_price: float
|
||||||
|
|||||||
@@ -1,26 +0,0 @@
|
|||||||
import matplotlib.pyplot as plt
|
|
||||||
import joblib
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from services.ml.modelBuilder import X_train
|
|
||||||
|
|
||||||
# Загрузка модели и признаков
|
|
||||||
model_rf = joblib.load('laptop_price_model.pkl')
|
|
||||||
feature_columns = joblib.load('feature_columns.pkl')
|
|
||||||
|
|
||||||
# Получение важности признаков
|
|
||||||
importances = model_rf.feature_importances_
|
|
||||||
indices = np.argsort(importances)[::-1]
|
|
||||||
|
|
||||||
# Вывод наиболее важных признаков
|
|
||||||
print("Важность признаков:")
|
|
||||||
for f in range(X_train.shape[1]):
|
|
||||||
print(f"{f + 1}. {feature_columns[indices[f]]} ({importances[indices[f]]})")
|
|
||||||
|
|
||||||
# Визуализация важности признаков
|
|
||||||
plt.figure(figsize=(12, 8))
|
|
||||||
plt.title("Важность признаков (Random Forest)")
|
|
||||||
plt.bar(range(X_train.shape[1]), importances[indices], align='center')
|
|
||||||
plt.xticks(range(X_train.shape[1]), [feature_columns[i] for i in indices], rotation=90)
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.show()
|
|
||||||
@@ -174,5 +174,5 @@ print("\nСтатистика по ценам:")
|
|||||||
print(synthetic_df['price'].describe())
|
print(synthetic_df['price'].describe())
|
||||||
|
|
||||||
# Сохранение в CSV
|
# Сохранение в CSV
|
||||||
synthetic_df.to_csv('synthetic_laptops.csv', index=False)
|
synthetic_df.to_csv('../../../../datasets/synthetic_laptops.csv', index=False)
|
||||||
print("\nСинтетические данные сохранены в 'synthetic_laptops.csv'.")
|
print("\nСинтетические данные сохранены в 'synthetic_laptops.csv'.")
|
||||||
107
services/ml/scripts/dataGenerators/generate_synthetic_data_tv.py
Normal file
107
services/ml/scripts/dataGenerators/generate_synthetic_data_tv.py
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import random
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# Установка случайного зерна для воспроизводимости
|
||||||
|
np.random.seed(42)
|
||||||
|
random.seed(42)
|
||||||
|
|
||||||
|
# Определение возможных значений для категориальных признаков
|
||||||
|
displays = ['LED', 'OLED', 'QLED', 'LCD', 'Plasma']
|
||||||
|
screen_sizes = [32, 40, 43, 50, 55, 65, 75, 85] # в дюймах
|
||||||
|
tuners = ['DVB-T2', 'DVB-C', 'DVB-S2', 'ATSC', 'ISDB-T']
|
||||||
|
features = ['Smart TV', 'HDR', '3D', 'Voice Control', 'Bluetooth', 'WiFi', 'Ambient Mode']
|
||||||
|
oss = ['WebOS', 'Android TV', 'Tizen', 'Roku', 'Fire TV']
|
||||||
|
power_of_volume = ['10W', '20W', '30W', '40W', '50W'] # мощность динамиков
|
||||||
|
colors = ['Black', 'Silver', 'White', 'Gray', 'Metallic']
|
||||||
|
|
||||||
|
# Функции для генерации признаков
|
||||||
|
def generate_display():
|
||||||
|
return random.choice(displays)
|
||||||
|
|
||||||
|
def generate_screen_size():
|
||||||
|
return random.choice(screen_sizes)
|
||||||
|
|
||||||
|
def generate_tuners():
|
||||||
|
return random.choice(tuners)
|
||||||
|
|
||||||
|
def generate_features():
|
||||||
|
return ', '.join(random.sample(features, random.randint(1, 4))) # случайный набор фич
|
||||||
|
|
||||||
|
def generate_os():
|
||||||
|
return random.choice(oss)
|
||||||
|
|
||||||
|
def generate_power_of_volume():
|
||||||
|
return random.choice(power_of_volume)
|
||||||
|
|
||||||
|
def generate_color():
|
||||||
|
return random.choice(colors)
|
||||||
|
|
||||||
|
# Функция для расчёта цены
|
||||||
|
def calculate_price(display, screen_size, tuners, features, os, power_of_volume, color):
|
||||||
|
base_price = 20000 # базовая цена
|
||||||
|
|
||||||
|
# Тип дисплея
|
||||||
|
display_premium = {'LED': 0, 'OLED': 40000, 'QLED': 30000, 'LCD': 10000, 'Plasma': 15000}
|
||||||
|
base_price += display_premium.get(display, 0)
|
||||||
|
|
||||||
|
# Размер экрана
|
||||||
|
base_price += (screen_size - 32) * 1000
|
||||||
|
|
||||||
|
# Функции
|
||||||
|
base_price += len(features.split(', ')) * 5000
|
||||||
|
|
||||||
|
# ОС
|
||||||
|
os_premium = {'WebOS': 10000, 'Android TV': 15000, 'Tizen': 12000, 'Roku': 8000, 'Fire TV': 7000}
|
||||||
|
base_price += os_premium.get(os, 5000)
|
||||||
|
|
||||||
|
# Мощность звука
|
||||||
|
power_value = int(power_of_volume.rstrip('W'))
|
||||||
|
base_price += power_value * 500
|
||||||
|
|
||||||
|
|
||||||
|
# Добавление случайного шума
|
||||||
|
noise = np.random.normal(0, 3000)
|
||||||
|
final_price = base_price + noise
|
||||||
|
|
||||||
|
return max(round(final_price, 2), 5000)
|
||||||
|
|
||||||
|
# Функция для генерации синтетических данных
|
||||||
|
def generate_synthetic_data(num_samples=100000):
|
||||||
|
data = []
|
||||||
|
for _ in range(num_samples):
|
||||||
|
display= generate_display()
|
||||||
|
screen_size = generate_screen_size()
|
||||||
|
tuners = generate_tuners()
|
||||||
|
features = generate_features()
|
||||||
|
os = generate_os()
|
||||||
|
power_of_volume = generate_power_of_volume()
|
||||||
|
color = generate_color()
|
||||||
|
|
||||||
|
price = calculate_price(
|
||||||
|
display, screen_size, tuners, features, os, power_of_volume, color
|
||||||
|
)
|
||||||
|
|
||||||
|
data.append({
|
||||||
|
'display': display,
|
||||||
|
'screen_size': screen_size,
|
||||||
|
'tuners': tuners,
|
||||||
|
'features': features,
|
||||||
|
'os': os,
|
||||||
|
'power_of_volume': power_of_volume,
|
||||||
|
'color': color,
|
||||||
|
'price': price
|
||||||
|
})
|
||||||
|
return pd.DataFrame(data)
|
||||||
|
|
||||||
|
print("Генерация синтетических данных для телевизоров...")
|
||||||
|
synthetic_df = generate_synthetic_data(num_samples=100000)
|
||||||
|
|
||||||
|
# Просмотр первых строк
|
||||||
|
print("\nПример данных после генерации:")
|
||||||
|
print(synthetic_df.head())
|
||||||
|
|
||||||
|
# Сохранение в CSV
|
||||||
|
synthetic_df.to_csv('../../../../datasets/synthetic_tvs.csv', index=False)
|
||||||
|
print("\nСинтетические данные сохранены в 'synthetic_tvs.csv'.")
|
||||||
@@ -8,7 +8,7 @@ import joblib
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
# Шаг 1: Загрузка данных
|
# Шаг 1: Загрузка данных
|
||||||
df = pd.read_csv('../../datasets/synthetic_laptops.csv')
|
df = pd.read_csv('../../../../datasets/synthetic_laptops.csv')
|
||||||
|
|
||||||
# Шаг 2: Проверка и очистка имен столбцов
|
# Шаг 2: Проверка и очистка имен столбцов
|
||||||
df.columns = df.columns.str.strip().str.lower()
|
df.columns = df.columns.str.strip().str.lower()
|
||||||
@@ -84,10 +84,10 @@ print(f"Random Forest - MAE: {mae}, RMSE: {rmse}, R²: {r2}")
|
|||||||
|
|
||||||
# Шаг 13: Сохранение модели
|
# Шаг 13: Сохранение модели
|
||||||
feature_columns = X.columns.tolist()
|
feature_columns = X.columns.tolist()
|
||||||
joblib.dump(feature_columns, 'feature_columns.pkl')
|
joblib.dump(feature_columns, '../../laptopML/feature_columns.pkl')
|
||||||
joblib.dump(best_model, 'laptop_price_model.pkl')
|
joblib.dump(best_model, '../../laptopML/laptop_price_model.pkl')
|
||||||
joblib.dump(poly, 'poly_transformer.pkl')
|
joblib.dump(poly, '../../laptopML/poly_transformer.pkl')
|
||||||
joblib.dump(scaler, 'scaler.pkl')
|
joblib.dump(scaler, '../../laptopML/scaler.pkl')
|
||||||
print("Модель, трансформер и скейлер сохранены.")
|
print("Модель, трансформер и скейлер сохранены.")
|
||||||
|
|
||||||
# Шаг 14: Важность признаков
|
# Шаг 14: Важность признаков
|
||||||
73
services/ml/scripts/modelBuilders/modelBuilderTV.py
Normal file
73
services/ml/scripts/modelBuilders/modelBuilderTV.py
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from sklearn.model_selection import train_test_split, GridSearchCV
|
||||||
|
from sklearn.ensemble import RandomForestRegressor
|
||||||
|
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
|
||||||
|
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import joblib
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# Загрузка данных
|
||||||
|
df = pd.read_csv('../../../../datasets/synthetic_tvs.csv')
|
||||||
|
|
||||||
|
# Проверка и очистка данных
|
||||||
|
required_columns = ['display', 'tuners', 'features', 'os', 'power_of_volume', 'color', 'screen_size', 'price']
|
||||||
|
missing_columns = [col for col in required_columns if col not in df.columns]
|
||||||
|
if missing_columns:
|
||||||
|
raise Exception(f"Отсутствуют столбцы: {missing_columns}")
|
||||||
|
|
||||||
|
df = df.dropna(subset=required_columns)
|
||||||
|
|
||||||
|
# Преобразование категориальных переменных
|
||||||
|
categorical_features = ['display', 'tuners', 'features', 'os', 'power_of_volume','color']
|
||||||
|
df = pd.get_dummies(df, columns=categorical_features, drop_first=True)
|
||||||
|
|
||||||
|
# Разделение на X и y
|
||||||
|
X = df.drop('price', axis=1)
|
||||||
|
y = df['price']
|
||||||
|
|
||||||
|
# Полиномиальные признаки
|
||||||
|
poly = PolynomialFeatures(degree=1, interaction_only=True, include_bias=False)
|
||||||
|
X_poly = poly.fit_transform(X)
|
||||||
|
|
||||||
|
# Масштабирование
|
||||||
|
scaler = StandardScaler()
|
||||||
|
X_poly_scaled = scaler.fit_transform(X_poly)
|
||||||
|
|
||||||
|
# Разделение на обучающую и тестовую выборки
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X_poly_scaled, y, test_size=0.5, random_state=42)
|
||||||
|
|
||||||
|
# Настройка Random Forest
|
||||||
|
param_grid = {
|
||||||
|
'n_estimators': [100, 200],
|
||||||
|
'max_depth': [10, 20],
|
||||||
|
'max_features': ['sqrt', 'log2', 0.5],
|
||||||
|
'min_samples_split': [5, 10],
|
||||||
|
'min_samples_leaf': [2, 4]
|
||||||
|
}
|
||||||
|
|
||||||
|
grid_search = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=3, scoring='neg_mean_absolute_error')
|
||||||
|
grid_search.fit(X_train, y_train)
|
||||||
|
best_model = grid_search.best_estimator_
|
||||||
|
|
||||||
|
# Вывод важности признаков
|
||||||
|
feature_importances = best_model.feature_importances_
|
||||||
|
feature_names = poly.get_feature_names_out(X.columns)
|
||||||
|
|
||||||
|
# Построение графика важности признаков
|
||||||
|
sorted_indices = np.argsort(feature_importances)[::-1]
|
||||||
|
plt.figure(figsize=(10, 8))
|
||||||
|
plt.barh([feature_names[i] for i in sorted_indices[:20]], feature_importances[sorted_indices[:20]])
|
||||||
|
plt.xlabel('Importance')
|
||||||
|
plt.ylabel('Feature')
|
||||||
|
plt.title('Top 20 Feature Importances')
|
||||||
|
plt.gca().invert_yaxis()
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
# Сохранение модели
|
||||||
|
feature_columns = X.columns.tolist()
|
||||||
|
joblib.dump(feature_columns, '../../tvML/feature_columns.pkl')
|
||||||
|
joblib.dump(best_model, '../../tvML/tv_price_model.pkl')
|
||||||
|
joblib.dump(poly, '../../tvML/poly_transformer.pkl')
|
||||||
|
joblib.dump(scaler, '../../tvML/scaler.pkl')
|
||||||
|
print("Модель для телевизоров сохранена.")
|
||||||
@@ -52,3 +52,49 @@ class LaptopService:
|
|||||||
predicted_price = self.model.predict(input_scaled)[0]
|
predicted_price = self.model.predict(input_scaled)[0]
|
||||||
|
|
||||||
return PredictPriceResponse(predicted_price=round(predicted_price, 2))
|
return PredictPriceResponse(predicted_price=round(predicted_price, 2))
|
||||||
|
|
||||||
|
class TVService:
|
||||||
|
def __init__(self, model_path: str, feature_columns_path: str, poly_path: str, scaler_path: str):
|
||||||
|
try:
|
||||||
|
self.model = joblib.load(model_path)
|
||||||
|
except FileNotFoundError:
|
||||||
|
raise Exception(f"Model file not found at {model_path}")
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"Error loading model: {str(e)}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.feature_columns = joblib.load(feature_columns_path)
|
||||||
|
except FileNotFoundError:
|
||||||
|
raise Exception(f"Feature columns file not found at {feature_columns_path}")
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"Error loading feature columns: {str(e)}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.poly_transformer = joblib.load(poly_path)
|
||||||
|
self.scaler = joblib.load(scaler_path)
|
||||||
|
except FileNotFoundError:
|
||||||
|
raise Exception("Polynomial transformer or scaler file not found.")
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"Error loading polynomial transformer or scaler: {str(e)}")
|
||||||
|
|
||||||
|
def predict_price(self, data: Dict[str, any]) -> PredictPriceResponse:
|
||||||
|
input_df = pd.DataFrame([data])
|
||||||
|
|
||||||
|
# Применение One-Hot Encoding
|
||||||
|
input_df = pd.get_dummies(input_df, columns=['display', 'tuners', 'features', 'os', 'color'], drop_first=True)
|
||||||
|
|
||||||
|
# Добавление отсутствующих признаков
|
||||||
|
for col in self.feature_columns:
|
||||||
|
if col not in input_df.columns and col != 'price':
|
||||||
|
input_df[col] = 0
|
||||||
|
|
||||||
|
input_df = input_df[self.feature_columns]
|
||||||
|
|
||||||
|
# Полиномиальные и масштабированные данные
|
||||||
|
input_poly = self.poly_transformer.transform(input_df)
|
||||||
|
input_scaled = self.scaler.transform(input_poly)
|
||||||
|
|
||||||
|
# Предсказание
|
||||||
|
predicted_price = self.model.predict(input_scaled)[0]
|
||||||
|
return PredictPriceResponse(predicted_price=round(predicted_price, 2))
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user