IIS_2023_1/kutygin_andrey_lab_5/main.py

49 lines
2.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from datetime import datetime
# Загрузка данных
try:
data = pd.read_csv('ufo_sighting_data.csv')
except Exception as e:
print("Ошибка при чтении файла:", e)
# Разделение признаков и целевой переменной
X = data[['latitude', 'longitude']] # Оставляем только числовые признаки, такие как latitude и longitude
y = data['length_of_encounter_seconds']
# Преобразование столбца date_time в числовой формат с использованием timestamp
data[['date','time']] = data['date_time'].str.split(expand=True)
data['date_time'] = (pd.to_datetime(data.pop('date'), format='%d/%m/%Y') +
pd.to_timedelta(data.pop('time') + ':00'))
# Разделение данных на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
# Создание полиномиальных признаков
poly_features = PolynomialFeatures(degree=2)
X_train_poly = poly_features.fit_transform(X_train)
X_test_poly = poly_features.transform(X_test)
# Обучение модели полиномиальной регрессии
model = LinearRegression()
model.fit(X_train_poly, y_train)
# Оценка модели
y_train_pred = model.predict(X_train_poly)
y_test_pred = model.predict(X_test_poly)
# Оценка метрик модели
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)
print("Train MSE:", train_mse)
print("Test MSE:", test_mse)
print("Train R^2:", train_r2)
print("Test R^2:", test_r2)