Merge pull request 'malkova_anastasia_lab_5 ready' (#162) from malkova_anastasia_lab_5 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/162
This commit is contained in:
commit
34651ab250
48
malkova_anastasia_lab_5/README.md
Normal file
48
malkova_anastasia_lab_5/README.md
Normal file
@ -0,0 +1,48 @@
|
||||
# Лабораторная работа №5
|
||||
|
||||
> Регрессия
|
||||
|
||||
### Как запустить лабораторную работу
|
||||
|
||||
1. Установить python, numpy, sklearn
|
||||
2. Запустить команду `python main1.py` в корне проекта для запуска 1 части
|
||||
3. Запустить команду `python main2.py` в корне проекта для запуска 2 части
|
||||
|
||||
### Использованные технологии
|
||||
|
||||
* Язык программирования `python`
|
||||
* Библиотеки `numpy, sklearn`
|
||||
* Среда разработки `PyCharm`
|
||||
|
||||
### Что делает программа?
|
||||
|
||||
Цель программы: на основе данных об автомобилях на вторичном рынке обучить модель регрессии на предсказание цены.
|
||||
|
||||
Используемая модель: Лассо-регрессия
|
||||
|
||||
#### Определим признаки
|
||||
|
||||
LINEAR [('Year', 1.0), ('Mileage', 0.4868), ('State', 0.0729), ('Vin', 0.015), ('City', 0.0037), ('Model', 0.0002), ('Make', 0.0)]
|
||||
|
||||
RIDGE [('Year', 1.0), ('Mileage', 0.4868), ('State', 0.0729), ('Vin', 0.015), ('City', 0.0037), ('Model', 0.0002), ('Make', 0.0)]
|
||||
|
||||
LASSO [('Year', 1.0), ('Mileage', 0.4868), ('State', 0.0729), ('Vin', 0.015), ('City', 0.0037), ('Model', 0.0002), ('Make', 0.0)]
|
||||
|
||||
RFE [('Year', 1.0), ('State', 1.0), ('Mileage', 1.0), ('City', 0.5), ('Vin', 0.5), ('Model', 0.0), ('Make', 0.0)]
|
||||
|
||||
f_regression [('Year', 1.0), ('State', 0.1438), ('Vin', 0.0878), ('City', 0.0845), ('Mileage', 0.0711), ('Model', 0.0335), ('Make', 0.0)]
|
||||
|
||||
MEAN [('Year', 0.2), ('Mileage', 0.0974), ('State', 0.0146), ('Vin', 0.003), ('City', 0.0007), ('Model', 0.0), ('Make', 0.0)]
|
||||
|
||||
Отсечение признаков, у которых MEAN ниже 0.001. Выделенные признаки для дальнейшего обучения модели Lasso.
|
||||
|
||||
##### Запуск модели Lasso с параметрами
|
||||
|
||||
![main2.py](main2.png)
|
||||
|
||||
#### Итоговые выводы
|
||||
|
||||
Были выбраны основные признаки для обучения модели, которые имеют большее влияние на предсказание.
|
||||
Проведены несколько тестов по обучению модели Lasso с разными alpha (силой влияния регуляризации),
|
||||
но это не оказало большого влияния. Точность в 94.33% приемлемая.
|
||||
|
8
malkova_anastasia_lab_5/config.py
Normal file
8
malkova_anastasia_lab_5/config.py
Normal file
@ -0,0 +1,8 @@
|
||||
LINEAR_TITLE = 'LINEAR'
|
||||
RIDGE_TITLE = 'RIDGE'
|
||||
LASSO_TITLE = 'LASSO'
|
||||
RFE_TITLE = 'RFE'
|
||||
F_REGRESSION_TITLE = 'f_regression'
|
||||
DATA_SIZE = 1000
|
||||
|
||||
FEATURES_AMOUNT = 14
|
23
malkova_anastasia_lab_5/dataset.py
Normal file
23
malkova_anastasia_lab_5/dataset.py
Normal file
@ -0,0 +1,23 @@
|
||||
import pandas as pd
|
||||
|
||||
from config import DATA_SIZE
|
||||
|
||||
|
||||
def load_dataset():
|
||||
data = pd.read_csv('true_car_listings.csv')[:DATA_SIZE]
|
||||
|
||||
names = ['Year', 'Mileage', 'City', 'State', 'Vin', 'Make', 'Model']
|
||||
convert_to_num(data, 'City')
|
||||
convert_to_num(data, 'State')
|
||||
convert_to_num(data, 'Vin')
|
||||
convert_to_num(data, 'Make')
|
||||
convert_to_num(data, 'Model')
|
||||
Y = data['Price']
|
||||
X = data[names]
|
||||
|
||||
return X, Y, names
|
||||
|
||||
|
||||
def convert_to_num(data, col):
|
||||
unique_numbers = list(set(data[col]))
|
||||
data[col] = data[col].apply(unique_numbers.index)
|
20
malkova_anastasia_lab_5/fit.py
Normal file
20
malkova_anastasia_lab_5/fit.py
Normal file
@ -0,0 +1,20 @@
|
||||
from sklearn.linear_model import Lasso, Ridge, LinearRegression
|
||||
from sklearn.feature_selection import RFE, f_regression
|
||||
|
||||
|
||||
def fit_models(x, y):
|
||||
lm = LinearRegression()
|
||||
lm.fit(x, y)
|
||||
|
||||
ridge = Ridge(alpha=0.001)
|
||||
ridge.fit(x, y)
|
||||
|
||||
lasso = Lasso(alpha=0.001)
|
||||
lasso.fit(x, y)
|
||||
|
||||
rfe = RFE(lasso, step=2)
|
||||
rfe.fit(x, y)
|
||||
|
||||
f, pval = f_regression(x, y, center=False)
|
||||
|
||||
return lm, ridge, lasso, rfe, f
|
17
malkova_anastasia_lab_5/lasso_test.py
Normal file
17
malkova_anastasia_lab_5/lasso_test.py
Normal file
@ -0,0 +1,17 @@
|
||||
from scores import MAPE
|
||||
from sklearn.linear_model import Lasso
|
||||
|
||||
|
||||
def lasso_test(X_train, X_test, y_train, y_test, alpha):
|
||||
lasso = Lasso(alpha=alpha)
|
||||
|
||||
# Train model
|
||||
lasso.fit(X_train, y_train)
|
||||
|
||||
# Show score of model
|
||||
lasso_predict = lasso.predict(X_test)
|
||||
lasso_MAPE = MAPE(y_test, lasso_predict)
|
||||
print("MAPE value: ", lasso_MAPE)
|
||||
Accuracy = 100 - lasso_MAPE
|
||||
print(
|
||||
'Accuracy of Lasso Regression(alpha={:}): {:0.2f}%.'.format(alpha, Accuracy))
|
13
malkova_anastasia_lab_5/main1.py
Normal file
13
malkova_anastasia_lab_5/main1.py
Normal file
@ -0,0 +1,13 @@
|
||||
from dataset import load_dataset
|
||||
from fit import fit_models
|
||||
from ranks import calc_mean, get_ranks
|
||||
|
||||
x, y, names = load_dataset()
|
||||
|
||||
lm, ridge, lasso, rfe, f = fit_models(x, y)
|
||||
|
||||
ranks = get_ranks(lm, ridge, lasso, rfe, f, names)
|
||||
|
||||
mean = calc_mean(ranks)
|
||||
|
||||
print("MEAN", mean)
|
BIN
malkova_anastasia_lab_5/main2.png
Normal file
BIN
malkova_anastasia_lab_5/main2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 60 KiB |
15
malkova_anastasia_lab_5/main2.py
Normal file
15
malkova_anastasia_lab_5/main2.py
Normal file
@ -0,0 +1,15 @@
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
from dataset import load_dataset
|
||||
from lasso_test import lasso_test
|
||||
|
||||
|
||||
# Load dataset
|
||||
X, Y, name = load_dataset()
|
||||
|
||||
# Split dataset
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, Y, test_size=0.05, random_state=42)
|
||||
|
||||
for i in range(6):
|
||||
lasso_test(X_train, X_test, y_train, y_test, i/pow(10, i))
|
48
malkova_anastasia_lab_5/ranks.py
Normal file
48
malkova_anastasia_lab_5/ranks.py
Normal file
@ -0,0 +1,48 @@
|
||||
import config
|
||||
import numpy as np
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
from operator import itemgetter
|
||||
from sklearn.linear_model import Lasso, Ridge, LinearRegression
|
||||
|
||||
|
||||
def rank_to_dict(ranks, names):
|
||||
ranks = np.abs(ranks)
|
||||
minmax = MinMaxScaler()
|
||||
ranks = minmax.fit_transform(
|
||||
np.array(ranks).reshape(len(names), 1)).ravel()
|
||||
ranks = map(lambda x: round(x, 4), ranks)
|
||||
ranks = dict(zip(names, ranks))
|
||||
ranks = sorted(ranks.items(), key=itemgetter(1), reverse=True)
|
||||
return ranks
|
||||
|
||||
|
||||
def flip_array(arr):
|
||||
return -1 * arr + np.max(arr)
|
||||
|
||||
|
||||
def get_ranks(lm: LinearRegression, ridge: Ridge, lasso: Lasso, rfe, f, names):
|
||||
ranks = dict()
|
||||
|
||||
ranks[config.LINEAR_TITLE] = rank_to_dict(lm.coef_, names)
|
||||
ranks[config.RIDGE_TITLE] = rank_to_dict(ridge.coef_, names)
|
||||
ranks[config.LASSO_TITLE] = rank_to_dict(lasso.coef_, names)
|
||||
ranks[config.RFE_TITLE] = rank_to_dict(flip_array(rfe.ranking_), names)
|
||||
ranks[config.F_REGRESSION_TITLE] = rank_to_dict(f, names)
|
||||
|
||||
for key, value in ranks.items():
|
||||
print(key, value, '\n')
|
||||
|
||||
return ranks
|
||||
|
||||
|
||||
def calc_mean(ranks):
|
||||
mean = {}
|
||||
for key, value in ranks.items():
|
||||
for item in value:
|
||||
if (item[0] not in mean):
|
||||
mean[item[0]] = 0
|
||||
mean[item[0]] += item[1]
|
||||
for key, value in mean.items():
|
||||
res = value/len(ranks)
|
||||
mean[key] = round(res, 4)
|
||||
return sorted(mean.items(), key=itemgetter(1), reverse=True)
|
6
malkova_anastasia_lab_5/scores.py
Normal file
6
malkova_anastasia_lab_5/scores.py
Normal file
@ -0,0 +1,6 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
def MAPE(Y_actual, Y_Predicted):
|
||||
mape = np.mean(np.abs((Y_actual - Y_Predicted)/Y_actual))*100
|
||||
return mape
|
852123
malkova_anastasia_lab_5/true_car_listings.csv
Normal file
852123
malkova_anastasia_lab_5/true_car_listings.csv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user