Predict for TV

This commit is contained in:
2024-12-02 21:27:11 +04:00
parent c6ad3a213a
commit ee01e85053
9 changed files with 283 additions and 39 deletions

View File

@@ -1,18 +1,24 @@
from fastapi import APIRouter, HTTPException from fastapi import APIRouter, HTTPException
from schemas.schemas import LaptopCreate, LaptopResponse, PredictPriceResponse from schemas.schemas import LaptopCreate, TVCreate, PredictPriceResponse
from services.service import LaptopService from services.service import LaptopService, TVService
import os import os
router = APIRouter() router = APIRouter()
# Инициализация сервиса # Инициализация сервиса
MODEL_PATH = os.getenv("MODEL_PATH", "services/ml/laptop_price_model.pkl") MODEL_PATH = os.getenv("MODEL_PATH", "services/ml/laptopML/laptop_price_model.pkl")
FEATURE_COLUMNS_PATH = os.getenv("FEATURE_COLUMNS_PATH", "services/ml/feature_columns.pkl") FEATURE_COLUMNS_PATH = os.getenv("FEATURE_COLUMNS_PATH", "services/ml/laptopML/feature_columns.pkl")
POLY_PATH = os.getenv("POLY_PATH", "services/ml/poly_transformer.pkl") POLY_PATH = os.getenv("POLY_PATH", "services/ml/laptopML/poly_transformer.pkl")
SCALER_PATH = os.getenv("SCALER_PATH", "services/ml/scaler.pkl") SCALER_PATH = os.getenv("SCALER_PATH", "services/ml/laptopML/scaler.pkl")
laptop_service = LaptopService(model_path=MODEL_PATH, feature_columns_path=FEATURE_COLUMNS_PATH, poly_path=POLY_PATH, scaler_path=SCALER_PATH) laptop_service = LaptopService(model_path=MODEL_PATH, feature_columns_path=FEATURE_COLUMNS_PATH, poly_path=POLY_PATH, scaler_path=SCALER_PATH)
@router.post("/predict_price/", response_model=PredictPriceResponse, summary="Predict laptop price", description="Predict the price of a laptop based on its specifications.", response_description="The predicted price of the laptop.") MODEL_PATH = os.getenv("MODEL_PATH", "services/ml/tvML/tv_price_model.pkl")
FEATURE_COLUMNS_PATH = os.getenv("FEATURE_COLUMNS_PATH", "services/ml/tvML/feature_columns_tv.pkl")
POLY_PATH = os.getenv("POLY_PATH", "services/ml/tvML/poly_transformer.pkl")
SCALER_PATH = os.getenv("SCALER_PATH", "services/ml/tvML/scaler.pkl")
tv_service = TVService(model_path=MODEL_PATH, feature_columns_path=FEATURE_COLUMNS_PATH, poly_path=POLY_PATH, scaler_path=SCALER_PATH)
@router.post("/predict_price/laptop/", response_model=PredictPriceResponse, summary="Predict laptop price", description="Predict the price of a laptop based on its specifications.", response_description="The predicted price of the laptop.")
def predict_price(data: LaptopCreate): def predict_price(data: LaptopCreate):
""" """
Predict the price of a laptop given its specifications. Predict the price of a laptop given its specifications.
@@ -25,5 +31,12 @@ def predict_price(data: LaptopCreate):
""" """
try: try:
return laptop_service.predict_price(data.dict()) return laptop_service.predict_price(data.dict())
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
@router.post("/predict_price/tv/", response_model=PredictPriceResponse, summary="Predict TV price", description="Predict the price of a TV based on its specifications.", response_description="The predicted price of the TV.")
def predict_price(data: TVCreate):
try:
return tv_service.predict_price(data.dict())
except Exception as e: except Exception as e:
raise HTTPException(status_code=400, detail=str(e)) raise HTTPException(status_code=400, detail=str(e))

View File

@@ -17,3 +17,15 @@ class Laptop(Base):
battery_size = Column(Integer) battery_size = Column(Integer)
release_year = Column(Integer) release_year = Column(Integer)
display_type = Column(String, index=True) display_type = Column(String, index=True)
class TV(Base):
__tablename__ = "tvs"
id = Column(Integer, primary_key=True, index=True)
display = Column(String, index=True)
tuners = Column(String)
features = Column(String)
os = Column(String)
power_of_volume = Column(String)
screen_sizes: int
color = Column(String)

View File

@@ -14,6 +14,15 @@ class LaptopCreate(BaseModel):
release_year: int release_year: int
display_type: str display_type: str
class TVCreate(BaseModel):
display: str
tuners: str
features: str
os: str
power_of_volume: str
screen_sizes: int
color: str
class LaptopResponse(BaseModel): class LaptopResponse(BaseModel):
id: int id: int
brand: str brand: str
@@ -31,5 +40,15 @@ class LaptopResponse(BaseModel):
class Config: class Config:
orm_mode = True orm_mode = True
class TVResponse(BaseModel):
id: int
display: str
tuners: str
features: str
os: str
power_of_volume: str
screen_sizes: int
color: str
class PredictPriceResponse(BaseModel): class PredictPriceResponse(BaseModel):
predicted_price: float predicted_price: float

View File

@@ -1,26 +0,0 @@
import matplotlib.pyplot as plt
import joblib
import numpy as np
from services.ml.modelBuilder import X_train
# Загрузка модели и признаков
model_rf = joblib.load('laptop_price_model.pkl')
feature_columns = joblib.load('feature_columns.pkl')
# Получение важности признаков
importances = model_rf.feature_importances_
indices = np.argsort(importances)[::-1]
# Вывод наиболее важных признаков
print("Важность признаков:")
for f in range(X_train.shape[1]):
print(f"{f + 1}. {feature_columns[indices[f]]} ({importances[indices[f]]})")
# Визуализация важности признаков
plt.figure(figsize=(12, 8))
plt.title("Важность признаков (Random Forest)")
plt.bar(range(X_train.shape[1]), importances[indices], align='center')
plt.xticks(range(X_train.shape[1]), [feature_columns[i] for i in indices], rotation=90)
plt.tight_layout()
plt.show()

View File

@@ -174,5 +174,5 @@ print("\nСтатистика по ценам:")
print(synthetic_df['price'].describe()) print(synthetic_df['price'].describe())
# Сохранение в CSV # Сохранение в CSV
synthetic_df.to_csv('synthetic_laptops.csv', index=False) synthetic_df.to_csv('../../../../datasets/synthetic_laptops.csv', index=False)
print("\nСинтетические данные сохранены в 'synthetic_laptops.csv'.") print("\nСинтетические данные сохранены в 'synthetic_laptops.csv'.")

View File

@@ -0,0 +1,107 @@
import pandas as pd
import numpy as np
import random
from datetime import datetime
# Установка случайного зерна для воспроизводимости
np.random.seed(42)
random.seed(42)
# Определение возможных значений для категориальных признаков
displays = ['LED', 'OLED', 'QLED', 'LCD', 'Plasma']
screen_sizes = [32, 40, 43, 50, 55, 65, 75, 85] # в дюймах
tuners = ['DVB-T2', 'DVB-C', 'DVB-S2', 'ATSC', 'ISDB-T']
features = ['Smart TV', 'HDR', '3D', 'Voice Control', 'Bluetooth', 'WiFi', 'Ambient Mode']
oss = ['WebOS', 'Android TV', 'Tizen', 'Roku', 'Fire TV']
power_of_volume = ['10W', '20W', '30W', '40W', '50W'] # мощность динамиков
colors = ['Black', 'Silver', 'White', 'Gray', 'Metallic']
# Функции для генерации признаков
def generate_display():
return random.choice(displays)
def generate_screen_size():
return random.choice(screen_sizes)
def generate_tuners():
return random.choice(tuners)
def generate_features():
return ', '.join(random.sample(features, random.randint(1, 4))) # случайный набор фич
def generate_os():
return random.choice(oss)
def generate_power_of_volume():
return random.choice(power_of_volume)
def generate_color():
return random.choice(colors)
# Функция для расчёта цены
def calculate_price(display, screen_size, tuners, features, os, power_of_volume, color):
base_price = 20000 # базовая цена
# Тип дисплея
display_premium = {'LED': 0, 'OLED': 40000, 'QLED': 30000, 'LCD': 10000, 'Plasma': 15000}
base_price += display_premium.get(display, 0)
# Размер экрана
base_price += (screen_size - 32) * 1000
# Функции
base_price += len(features.split(', ')) * 5000
# ОС
os_premium = {'WebOS': 10000, 'Android TV': 15000, 'Tizen': 12000, 'Roku': 8000, 'Fire TV': 7000}
base_price += os_premium.get(os, 5000)
# Мощность звука
power_value = int(power_of_volume.rstrip('W'))
base_price += power_value * 500
# Добавление случайного шума
noise = np.random.normal(0, 3000)
final_price = base_price + noise
return max(round(final_price, 2), 5000)
# Функция для генерации синтетических данных
def generate_synthetic_data(num_samples=100000):
data = []
for _ in range(num_samples):
display= generate_display()
screen_size = generate_screen_size()
tuners = generate_tuners()
features = generate_features()
os = generate_os()
power_of_volume = generate_power_of_volume()
color = generate_color()
price = calculate_price(
display, screen_size, tuners, features, os, power_of_volume, color
)
data.append({
'display': display,
'screen_size': screen_size,
'tuners': tuners,
'features': features,
'os': os,
'power_of_volume': power_of_volume,
'color': color,
'price': price
})
return pd.DataFrame(data)
print("Генерация синтетических данных для телевизоров...")
synthetic_df = generate_synthetic_data(num_samples=100000)
# Просмотр первых строк
print("\nПример данных после генерации:")
print(synthetic_df.head())
# Сохранение в CSV
synthetic_df.to_csv('../../../../datasets/synthetic_tvs.csv', index=False)
print("\nСинтетические данные сохранены в 'synthetic_tvs.csv'.")

View File

@@ -8,7 +8,7 @@ import joblib
import numpy as np import numpy as np
# Шаг 1: Загрузка данных # Шаг 1: Загрузка данных
df = pd.read_csv('../../datasets/synthetic_laptops.csv') df = pd.read_csv('../../../../datasets/synthetic_laptops.csv')
# Шаг 2: Проверка и очистка имен столбцов # Шаг 2: Проверка и очистка имен столбцов
df.columns = df.columns.str.strip().str.lower() df.columns = df.columns.str.strip().str.lower()
@@ -84,10 +84,10 @@ print(f"Random Forest - MAE: {mae}, RMSE: {rmse}, R²: {r2}")
# Шаг 13: Сохранение модели # Шаг 13: Сохранение модели
feature_columns = X.columns.tolist() feature_columns = X.columns.tolist()
joblib.dump(feature_columns, 'feature_columns.pkl') joblib.dump(feature_columns, '../../laptopML/feature_columns.pkl')
joblib.dump(best_model, 'laptop_price_model.pkl') joblib.dump(best_model, '../../laptopML/laptop_price_model.pkl')
joblib.dump(poly, 'poly_transformer.pkl') joblib.dump(poly, '../../laptopML/poly_transformer.pkl')
joblib.dump(scaler, 'scaler.pkl') joblib.dump(scaler, '../../laptopML/scaler.pkl')
print("Модель, трансформер и скейлер сохранены.") print("Модель, трансформер и скейлер сохранены.")
# Шаг 14: Важность признаков # Шаг 14: Важность признаков

View File

@@ -0,0 +1,73 @@
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
import matplotlib.pyplot as plt
import joblib
import numpy as np
# Загрузка данных
df = pd.read_csv('../../../../datasets/synthetic_tvs.csv')
# Проверка и очистка данных
required_columns = ['display', 'tuners', 'features', 'os', 'power_of_volume', 'color', 'screen_size', 'price']
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
raise Exception(f"Отсутствуют столбцы: {missing_columns}")
df = df.dropna(subset=required_columns)
# Преобразование категориальных переменных
categorical_features = ['display', 'tuners', 'features', 'os', 'power_of_volume','color']
df = pd.get_dummies(df, columns=categorical_features, drop_first=True)
# Разделение на X и y
X = df.drop('price', axis=1)
y = df['price']
# Полиномиальные признаки
poly = PolynomialFeatures(degree=1, interaction_only=True, include_bias=False)
X_poly = poly.fit_transform(X)
# Масштабирование
scaler = StandardScaler()
X_poly_scaled = scaler.fit_transform(X_poly)
# Разделение на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X_poly_scaled, y, test_size=0.5, random_state=42)
# Настройка Random Forest
param_grid = {
'n_estimators': [100, 200],
'max_depth': [10, 20],
'max_features': ['sqrt', 'log2', 0.5],
'min_samples_split': [5, 10],
'min_samples_leaf': [2, 4]
}
grid_search = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=3, scoring='neg_mean_absolute_error')
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_
# Вывод важности признаков
feature_importances = best_model.feature_importances_
feature_names = poly.get_feature_names_out(X.columns)
# Построение графика важности признаков
sorted_indices = np.argsort(feature_importances)[::-1]
plt.figure(figsize=(10, 8))
plt.barh([feature_names[i] for i in sorted_indices[:20]], feature_importances[sorted_indices[:20]])
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.title('Top 20 Feature Importances')
plt.gca().invert_yaxis()
plt.show()
# Сохранение модели
feature_columns = X.columns.tolist()
joblib.dump(feature_columns, '../../tvML/feature_columns.pkl')
joblib.dump(best_model, '../../tvML/tv_price_model.pkl')
joblib.dump(poly, '../../tvML/poly_transformer.pkl')
joblib.dump(scaler, '../../tvML/scaler.pkl')
print("Модель для телевизоров сохранена.")

View File

@@ -52,3 +52,49 @@ class LaptopService:
predicted_price = self.model.predict(input_scaled)[0] predicted_price = self.model.predict(input_scaled)[0]
return PredictPriceResponse(predicted_price=round(predicted_price, 2)) return PredictPriceResponse(predicted_price=round(predicted_price, 2))
class TVService:
def __init__(self, model_path: str, feature_columns_path: str, poly_path: str, scaler_path: str):
try:
self.model = joblib.load(model_path)
except FileNotFoundError:
raise Exception(f"Model file not found at {model_path}")
except Exception as e:
raise Exception(f"Error loading model: {str(e)}")
try:
self.feature_columns = joblib.load(feature_columns_path)
except FileNotFoundError:
raise Exception(f"Feature columns file not found at {feature_columns_path}")
except Exception as e:
raise Exception(f"Error loading feature columns: {str(e)}")
try:
self.poly_transformer = joblib.load(poly_path)
self.scaler = joblib.load(scaler_path)
except FileNotFoundError:
raise Exception("Polynomial transformer or scaler file not found.")
except Exception as e:
raise Exception(f"Error loading polynomial transformer or scaler: {str(e)}")
def predict_price(self, data: Dict[str, any]) -> PredictPriceResponse:
input_df = pd.DataFrame([data])
# Применение One-Hot Encoding
input_df = pd.get_dummies(input_df, columns=['display', 'tuners', 'features', 'os', 'color'], drop_first=True)
# Добавление отсутствующих признаков
for col in self.feature_columns:
if col not in input_df.columns and col != 'price':
input_df[col] = 0
input_df = input_df[self.feature_columns]
# Полиномиальные и масштабированные данные
input_poly = self.poly_transformer.transform(input_df)
input_scaled = self.scaler.transform(input_poly)
# Предсказание
predicted_price = self.model.predict(input_scaled)[0]
return PredictPriceResponse(predicted_price=round(predicted_price, 2))