Compare commits
17 Commits
Author | SHA1 | Date | |
---|---|---|---|
338e0b0ad8 | |||
83031d3667 | |||
5a6a48e622 | |||
75b0e0f580 | |||
5ab313468c | |||
bd8c7a6d2b | |||
292b43e934 | |||
daa238663b | |||
59b6a164c8 | |||
f77a5e5335 | |||
7aa7bd2f42 | |||
11ced38915 | |||
c1ec962e77 | |||
f7672b7625 | |||
f249d643dc | |||
0b9d379e16 | |||
e3ad2174f2 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -276,3 +276,5 @@ cython_debug/
|
||||
node_modules/
|
||||
|
||||
test.csv
|
||||
описания_датасетов/.~lock.cars.odt#
|
||||
описания_датасетов/.~lock.houses.odt#
|
||||
|
12598
data/car-price-prediction.csv
Normal file
12598
data/car-price-prediction.csv
Normal file
File diff suppressed because it is too large
Load Diff
19238
data/car_price_prediction.csv
Normal file
19238
data/car_price_prediction.csv
Normal file
File diff suppressed because it is too large
Load Diff
244
data/dollar.csv
Normal file
244
data/dollar.csv
Normal file
@ -0,0 +1,244 @@
|
||||
"my_date","my_value","bullet","bulletClass","label"
|
||||
"28.03.2023","76.5662","","",""
|
||||
"31.03.2023","77.0863","","",""
|
||||
"01.04.2023","77.3233","","",""
|
||||
"04.04.2023","77.9510","","",""
|
||||
"05.04.2023","79.3563","","",""
|
||||
"06.04.2023","79.4961","","",""
|
||||
"07.04.2023","80.6713","","",""
|
||||
"08.04.2023","82.3988","","",""
|
||||
"11.04.2023","81.7441","","",""
|
||||
"12.04.2023","82.1799","","",""
|
||||
"13.04.2023","82.0934","","",""
|
||||
"14.04.2023","81.6758","","",""
|
||||
"15.04.2023","81.5045","","",""
|
||||
"18.04.2023","81.6279","","",""
|
||||
"19.04.2023","81.6028","","",""
|
||||
"20.04.2023","81.6549","","",""
|
||||
"21.04.2023","81.6188","","",""
|
||||
"22.04.2023","81.4863","","",""
|
||||
"25.04.2023","81.2745","","",""
|
||||
"26.04.2023","81.5499","","",""
|
||||
"27.04.2023","81.6274","","",""
|
||||
"28.04.2023","81.5601","","",""
|
||||
"29.04.2023","80.5093","","",""
|
||||
"03.05.2023","79.9609","","",""
|
||||
"04.05.2023","79.3071","","",""
|
||||
"05.05.2023","78.6139","","",""
|
||||
"06.05.2023","76.8207","","",""
|
||||
"11.05.2023","76.6929","","",""
|
||||
"12.05.2023","75.8846","round","min-pulsating-bullet","мин"
|
||||
"13.05.2023","77.2041","","",""
|
||||
"16.05.2023","79.1004","","",""
|
||||
"17.05.2023","79.9798","","",""
|
||||
"18.05.2023","80.7642","","",""
|
||||
"19.05.2023","80.0366","","",""
|
||||
"20.05.2023","79.9093","","",""
|
||||
"23.05.2023","79.9379","","",""
|
||||
"24.05.2023","80.1665","","",""
|
||||
"25.05.2023","79.9669","","",""
|
||||
"26.05.2023","79.9841","","",""
|
||||
"27.05.2023","79.9667","","",""
|
||||
"30.05.2023","80.0555","","",""
|
||||
"31.05.2023","80.6872","","",""
|
||||
"01.06.2023","80.9942","","",""
|
||||
"02.06.2023","80.9657","","",""
|
||||
"03.06.2023","80.8756","","",""
|
||||
"06.06.2023","81.3294","","",""
|
||||
"07.06.2023","81.2502","","",""
|
||||
"08.06.2023","81.4581","","",""
|
||||
"09.06.2023","82.0930","","",""
|
||||
"10.06.2023","82.6417","","",""
|
||||
"14.06.2023","83.6405","","",""
|
||||
"15.06.2023","84.3249","","",""
|
||||
"16.06.2023","83.9611","","",""
|
||||
"17.06.2023","83.6498","","",""
|
||||
"20.06.2023","83.9866","","",""
|
||||
"21.06.2023","84.2336","","",""
|
||||
"22.06.2023","84.2467","","",""
|
||||
"23.06.2023","83.6077","","",""
|
||||
"24.06.2023","84.0793","","",""
|
||||
"27.06.2023","84.6642","","",""
|
||||
"28.06.2023","85.0504","","",""
|
||||
"29.06.2023","85.6192","","",""
|
||||
"30.06.2023","87.0341","","",""
|
||||
"01.07.2023","88.3844","","",""
|
||||
"04.07.2023","89.3255","","",""
|
||||
"05.07.2023","89.5450","","",""
|
||||
"06.07.2023","90.3380","","",""
|
||||
"07.07.2023","92.5695","","",""
|
||||
"08.07.2023","91.6879","","",""
|
||||
"11.07.2023","91.4931","","",""
|
||||
"12.07.2023","90.5045","","",""
|
||||
"13.07.2023","90.6253","","",""
|
||||
"14.07.2023","90.1757","","",""
|
||||
"15.07.2023","90.1190","","",""
|
||||
"18.07.2023","90.4217","","",""
|
||||
"19.07.2023","90.6906","","",""
|
||||
"20.07.2023","91.2046","","",""
|
||||
"21.07.2023","90.8545","","",""
|
||||
"22.07.2023","90.3846","","",""
|
||||
"25.07.2023","90.4890","","",""
|
||||
"26.07.2023","90.0945","","",""
|
||||
"27.07.2023","90.0468","","",""
|
||||
"28.07.2023","90.0225","","",""
|
||||
"29.07.2023","90.9783","","",""
|
||||
"01.08.2023","91.5923","","",""
|
||||
"02.08.2023","91.7755","","",""
|
||||
"03.08.2023","92.8410","","",""
|
||||
"04.08.2023","93.7792","","",""
|
||||
"05.08.2023","94.8076","","",""
|
||||
"08.08.2023","96.5668","","",""
|
||||
"09.08.2023","96.0755","","",""
|
||||
"10.08.2023","97.3999","","",""
|
||||
"11.08.2023","97.2794","","",""
|
||||
"12.08.2023","98.2066","","",""
|
||||
"15.08.2023","101.0399","","",""
|
||||
"16.08.2023","97.4217","","",""
|
||||
"17.08.2023","96.7045","","",""
|
||||
"18.08.2023","93.7460","","",""
|
||||
"19.08.2023","93.4047","","",""
|
||||
"22.08.2023","94.1424","","",""
|
||||
"23.08.2023","94.1185","","",""
|
||||
"24.08.2023","94.4421","","",""
|
||||
"25.08.2023","94.4007","","",""
|
||||
"26.08.2023","94.7117","","",""
|
||||
"29.08.2023","95.4717","","",""
|
||||
"30.08.2023","95.7070","","",""
|
||||
"31.08.2023","95.9283","","",""
|
||||
"01.09.2023","96.3344","","",""
|
||||
"02.09.2023","96.3411","","",""
|
||||
"05.09.2023","96.6199","","",""
|
||||
"06.09.2023","97.5383","","",""
|
||||
"07.09.2023","97.8439","","",""
|
||||
"08.09.2023","98.1961","","",""
|
||||
"09.09.2023","97.9241","","",""
|
||||
"12.09.2023","96.5083","","",""
|
||||
"13.09.2023","94.7035","","",""
|
||||
"14.09.2023","95.9794","","",""
|
||||
"15.09.2023","96.1609","","",""
|
||||
"16.09.2023","96.6338","","",""
|
||||
"19.09.2023","96.6472","","",""
|
||||
"20.09.2023","96.2236","","",""
|
||||
"21.09.2023","96.6172","","",""
|
||||
"22.09.2023","96.0762","","",""
|
||||
"23.09.2023","96.0419","","",""
|
||||
"26.09.2023","96.1456","","",""
|
||||
"27.09.2023","96.2378","","",""
|
||||
"28.09.2023","96.5000","","",""
|
||||
"29.09.2023","97.0018","","",""
|
||||
"30.09.2023","97.4147","","",""
|
||||
"03.10.2023","98.4785","","",""
|
||||
"04.10.2023","99.2677","","",""
|
||||
"05.10.2023","99.4555","","",""
|
||||
"06.10.2023","99.6762","","",""
|
||||
"07.10.2023","100.4911","","",""
|
||||
"10.10.2023","101.3598","round","max-pulsating-bullet","макс"
|
||||
"11.10.2023","99.9349","","",""
|
||||
"12.10.2023","99.9808","","",""
|
||||
"13.10.2023","96.9948","","",""
|
||||
"14.10.2023","97.3075","","",""
|
||||
"17.10.2023","97.2865","","",""
|
||||
"18.10.2023","97.3458","","",""
|
||||
"19.10.2023","97.3724","","",""
|
||||
"20.10.2023","97.3074","","",""
|
||||
"21.10.2023","95.9053","","",""
|
||||
"24.10.2023","94.7081","","",""
|
||||
"25.10.2023","93.5224","","",""
|
||||
"26.10.2023","93.1507","","",""
|
||||
"27.10.2023","93.5616","","",""
|
||||
"28.10.2023","93.2174","","",""
|
||||
"31.10.2023","93.2435","","",""
|
||||
"01.11.2023","92.0226","","",""
|
||||
"02.11.2023","93.2801","","",""
|
||||
"03.11.2023","93.1730","","",""
|
||||
"04.11.2023","93.0351","","",""
|
||||
"08.11.2023","92.4151","","",""
|
||||
"09.11.2023","92.1973","","",""
|
||||
"10.11.2023","91.9266","","",""
|
||||
"11.11.2023","92.0535","","",""
|
||||
"14.11.2023","92.1185","","",""
|
||||
"15.11.2023","91.2570","","",""
|
||||
"16.11.2023","89.4565","","",""
|
||||
"17.11.2023","88.9466","","",""
|
||||
"18.11.2023","89.1237","","",""
|
||||
"21.11.2023","88.4954","","",""
|
||||
"22.11.2023","87.8701","","",""
|
||||
"23.11.2023","88.1648","","",""
|
||||
"24.11.2023","88.1206","","",""
|
||||
"25.11.2023","88.8133","","",""
|
||||
"28.11.2023","88.7045","","",""
|
||||
"29.11.2023","88.6102","","",""
|
||||
"30.11.2023","88.8841","","",""
|
||||
"01.12.2023","88.5819","","",""
|
||||
"02.12.2023","89.7619","","",""
|
||||
"05.12.2023","90.6728","","",""
|
||||
"06.12.2023","91.5823","","",""
|
||||
"07.12.2023","92.7826","","",""
|
||||
"08.12.2023","92.5654","","",""
|
||||
"09.12.2023","91.6402","","",""
|
||||
"12.12.2023","90.9846","","",""
|
||||
"13.12.2023","90.2158","","",""
|
||||
"14.12.2023","89.8926","","",""
|
||||
"15.12.2023","89.6741","","",""
|
||||
"16.12.2023","89.6966","","",""
|
||||
"19.12.2023","90.4162","","",""
|
||||
"20.12.2023","90.0870","","",""
|
||||
"21.12.2023","90.4056","","",""
|
||||
"22.12.2023","91.7062","","",""
|
||||
"23.12.2023","91.9389","","",""
|
||||
"26.12.2023","91.9690","","",""
|
||||
"27.12.2023","91.7069","","",""
|
||||
"28.12.2023","91.7051","","",""
|
||||
"29.12.2023","90.3041","","",""
|
||||
"30.12.2023","89.6883","","",""
|
||||
"10.01.2024","90.4040","","",""
|
||||
"11.01.2024","89.3939","","",""
|
||||
"12.01.2024","88.7818","","",""
|
||||
"13.01.2024","88.1324","","",""
|
||||
"16.01.2024","87.6772","","",""
|
||||
"17.01.2024","87.6457","","",""
|
||||
"18.01.2024","88.3540","","",""
|
||||
"19.01.2024","88.6610","","",""
|
||||
"20.01.2024","88.5896","","",""
|
||||
"23.01.2024","87.9724","","",""
|
||||
"24.01.2024","87.9199","","",""
|
||||
"25.01.2024","88.2829","","",""
|
||||
"26.01.2024","88.6562","","",""
|
||||
"27.01.2024","89.5159","","",""
|
||||
"30.01.2024","89.6090","","",""
|
||||
"31.01.2024","89.2887","","",""
|
||||
"01.02.2024","89.6678","","",""
|
||||
"02.02.2024","90.2299","","",""
|
||||
"03.02.2024","90.6626","","",""
|
||||
"06.02.2024","91.2434","","",""
|
||||
"07.02.2024","90.6842","","",""
|
||||
"08.02.2024","91.1514","","",""
|
||||
"09.02.2024","91.2561","","",""
|
||||
"10.02.2024","90.8901","","",""
|
||||
"13.02.2024","91.0758","","",""
|
||||
"14.02.2024","91.2057","","",""
|
||||
"15.02.2024","91.4316","","",""
|
||||
"16.02.2024","91.8237","","",""
|
||||
"17.02.2024","92.5492","","",""
|
||||
"20.02.2024","92.4102","","",""
|
||||
"21.02.2024","92.3490","","",""
|
||||
"22.02.2024","92.4387","","",""
|
||||
"23.02.2024","92.7519","","",""
|
||||
"27.02.2024","92.6321","","",""
|
||||
"28.02.2024","92.0425","","",""
|
||||
"29.02.2024","91.8692","","",""
|
||||
"01.03.2024","90.8423","","",""
|
||||
"02.03.2024","91.3336","","",""
|
||||
"05.03.2024","91.3534","","",""
|
||||
"06.03.2024","91.1604","","",""
|
||||
"07.03.2024","90.3412","","",""
|
||||
"08.03.2024","90.7493","","",""
|
||||
"12.03.2024","90.6252","","",""
|
||||
"13.03.2024","90.8818","","",""
|
||||
"19.03.2024","91.9829","","",""
|
||||
"20.03.2024","92.2243","","",""
|
||||
"21.03.2024","92.6861","","",""
|
||||
"22.03.2024","91.9499","","",""
|
||||
"23.03.2024","92.6118","","",""
|
||||
"26.03.2024","92.7761","","",""
|
|
3756
data/ds_salaries.csv
Normal file
3756
data/ds_salaries.csv
Normal file
File diff suppressed because it is too large
Load Diff
21614
data/kc_house_data.csv
Normal file
21614
data/kc_house_data.csv
Normal file
File diff suppressed because it is too large
Load Diff
1371
data/mobile phone price prediction.csv
Normal file
1371
data/mobile phone price prediction.csv
Normal file
File diff suppressed because it is too large
Load Diff
848
notebooks/lab1.ipynb
Normal file
848
notebooks/lab1.ipynb
Normal file
File diff suppressed because one or more lines are too long
312
notebooks/lab2_1.ipynb
Normal file
312
notebooks/lab2_1.ipynb
Normal file
@ -0,0 +1,312 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Загрузка данных в DataFrame"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"df = pd.read_csv(\"../data/kc_house_data.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Получение сведений о пропущенных данных"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(df.isnull().sum())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(df.isnull().any())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for i in df.columns:\n",
|
||||
" null_rate = df[i].isnull().sum() / len(df) * 100\n",
|
||||
" if null_rate > 0:\n",
|
||||
" print(f\"{i} процент пустых значений: {null_rate:.2f}%\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Создание выборок данных"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def split_stratified_into_train_val_test(\n",
|
||||
" df_input,\n",
|
||||
" stratify_colname=\"y\",\n",
|
||||
" frac_train=0.6,\n",
|
||||
" frac_val=0.15,\n",
|
||||
" frac_test=0.25,\n",
|
||||
" random_state=None,\n",
|
||||
"):\n",
|
||||
" \"\"\"\n",
|
||||
" Splits a Pandas dataframe into three subsets (train, val, and test)\n",
|
||||
" following fractional ratios provided by the user, where each subset is\n",
|
||||
" stratified by the values in a specific column (that is, each subset has\n",
|
||||
" the same relative frequency of the values in the column). It performs this\n",
|
||||
" splitting by running train_test_split() twice.\n",
|
||||
"\n",
|
||||
" Parameters\n",
|
||||
" ----------\n",
|
||||
" df_input : Pandas dataframe\n",
|
||||
" Input dataframe to be split.\n",
|
||||
" stratify_colname : str\n",
|
||||
" The name of the column that will be used for stratification. Usually\n",
|
||||
" this column would be for the label.\n",
|
||||
" frac_train : float\n",
|
||||
" frac_val : float\n",
|
||||
" frac_test : float\n",
|
||||
" The ratios with which the dataframe will be split into train, val, and\n",
|
||||
" test data. The values should be expressed as float fractions and should\n",
|
||||
" sum to 1.0.\n",
|
||||
" random_state : int, None, or RandomStateInstance\n",
|
||||
" Value to be passed to train_test_split().\n",
|
||||
"\n",
|
||||
" Returns\n",
|
||||
" -------\n",
|
||||
" df_train, df_val, df_test :\n",
|
||||
" Dataframes containing the three splits.\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" if frac_train + frac_val + frac_test != 1.0:\n",
|
||||
" raise ValueError(\n",
|
||||
" \"fractions %f, %f, %f do not add up to 1.0\"\n",
|
||||
" % (frac_train, frac_val, frac_test)\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" if stratify_colname not in df_input.columns:\n",
|
||||
" raise ValueError(\"%s is not a column in the dataframe\" % (stratify_colname))\n",
|
||||
"\n",
|
||||
" X = df_input # Contains all columns.\n",
|
||||
" y = df_input[\n",
|
||||
" [stratify_colname]\n",
|
||||
" ] # Dataframe of just the column on which to stratify.\n",
|
||||
"\n",
|
||||
" # Split original dataframe into train and temp dataframes.\n",
|
||||
" df_train, df_temp, y_train, y_temp = train_test_split(\n",
|
||||
" X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Split the temp dataframe into val and test dataframes.\n",
|
||||
" relative_frac_test = frac_test / (frac_val + frac_test)\n",
|
||||
" df_val, df_test, y_val, y_test = train_test_split(\n",
|
||||
" df_temp,\n",
|
||||
" y_temp,\n",
|
||||
" stratify=y_temp,\n",
|
||||
" test_size=relative_frac_test,\n",
|
||||
" random_state=random_state,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n",
|
||||
"\n",
|
||||
" return df_train, df_val, df_test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[3 5 4 1 2]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(df.condition.unique())\n",
|
||||
"\n",
|
||||
"data = df[\n",
|
||||
" [\n",
|
||||
" \"price\",\n",
|
||||
" \"bedrooms\",\n",
|
||||
" \"bathrooms\",\n",
|
||||
" \"sqft_living\",\n",
|
||||
" \"sqft_lot\",\n",
|
||||
" \"floors\",\n",
|
||||
" \"view\",\n",
|
||||
" \"condition\",\n",
|
||||
" \"grade\",\n",
|
||||
" \"sqft_above\",\n",
|
||||
" \"sqft_basement\",\n",
|
||||
" \"yr_built\",\n",
|
||||
" \"yr_renovated\",\n",
|
||||
" \"zipcode\",\n",
|
||||
" \"lat\",\n",
|
||||
" \"long\",\n",
|
||||
" ]\n",
|
||||
"].copy()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Обучающая выборка: (12967, 16)\n",
|
||||
"condition\n",
|
||||
"3 8418\n",
|
||||
"4 3407\n",
|
||||
"5 1021\n",
|
||||
"2 103\n",
|
||||
"1 18\n",
|
||||
"Name: count, dtype: int64\n",
|
||||
"Контрольная выборка: (4323, 16)\n",
|
||||
"condition\n",
|
||||
"3 2806\n",
|
||||
"4 1136\n",
|
||||
"5 340\n",
|
||||
"2 35\n",
|
||||
"1 6\n",
|
||||
"Name: count, dtype: int64\n",
|
||||
"Тестовая выборка: (4323, 16)\n",
|
||||
"condition\n",
|
||||
"3 2807\n",
|
||||
"4 1136\n",
|
||||
"5 340\n",
|
||||
"2 34\n",
|
||||
"1 6\n",
|
||||
"Name: count, dtype: int64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_train, df_val, df_test = split_stratified_into_train_val_test(\n",
|
||||
" data,\n",
|
||||
" stratify_colname=\"condition\",\n",
|
||||
" frac_train=0.60,\n",
|
||||
" frac_val=0.20,\n",
|
||||
" frac_test=0.20,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"Обучающая выборка: \", df_train.shape)\n",
|
||||
"print(df_train.condition.value_counts())\n",
|
||||
"\n",
|
||||
"print(\"Контрольная выборка: \", df_val.shape)\n",
|
||||
"print(df_val.condition.value_counts())\n",
|
||||
"\n",
|
||||
"print(\"Тестовая выборка: \", df_test.shape)\n",
|
||||
"print(df_test.condition.value_counts())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Обучающая выборка: (12967, 16)\n",
|
||||
"condition\n",
|
||||
"3 8418\n",
|
||||
"4 3407\n",
|
||||
"5 1021\n",
|
||||
"2 103\n",
|
||||
"1 18\n",
|
||||
"Name: count, dtype: int64\n",
|
||||
"Обучающая выборка после oversampling: (42073, 16)\n",
|
||||
"condition\n",
|
||||
"5 8464\n",
|
||||
"2 8421\n",
|
||||
"1 8420\n",
|
||||
"3 8418\n",
|
||||
"4 8350\n",
|
||||
"Name: count, dtype: int64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from imblearn.over_sampling import ADASYN\n",
|
||||
"\n",
|
||||
"ada = ADASYN()\n",
|
||||
"\n",
|
||||
"print(\"Обучающая выборка: \", df_train.shape)\n",
|
||||
"print(df_train.condition.value_counts())\n",
|
||||
"\n",
|
||||
"X_resampled, y_resampled = ada.fit_resample(df_train, df_train[\"condition\"])\n",
|
||||
"df_train_adasyn = pd.DataFrame(X_resampled)\n",
|
||||
"\n",
|
||||
"print(\"Обучающая выборка после oversampling: \", df_train_adasyn.shape)\n",
|
||||
"print(df_train_adasyn.condition.value_counts())"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
648
notebooks/lab2_2.ipynb
Normal file
648
notebooks/lab2_2.ipynb
Normal file
@ -0,0 +1,648 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Загрузка данных в DataFrame"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"df = pd.read_csv(\"../data/car_price_prediction.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>ID</th>\n",
|
||||
" <th>Price</th>\n",
|
||||
" <th>Levy</th>\n",
|
||||
" <th>Manufacturer</th>\n",
|
||||
" <th>Model</th>\n",
|
||||
" <th>Prod_year</th>\n",
|
||||
" <th>Category</th>\n",
|
||||
" <th>Leather interior</th>\n",
|
||||
" <th>Fuel type</th>\n",
|
||||
" <th>Engine volume</th>\n",
|
||||
" <th>Mileage</th>\n",
|
||||
" <th>Cylinders</th>\n",
|
||||
" <th>Gear_box_type</th>\n",
|
||||
" <th>Drive_wheels</th>\n",
|
||||
" <th>Doors</th>\n",
|
||||
" <th>Wheel</th>\n",
|
||||
" <th>Color</th>\n",
|
||||
" <th>Airbags</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>45654403</td>\n",
|
||||
" <td>13328</td>\n",
|
||||
" <td>1399</td>\n",
|
||||
" <td>LEXUS</td>\n",
|
||||
" <td>RX 450</td>\n",
|
||||
" <td>2010</td>\n",
|
||||
" <td>Jeep</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>Hybrid</td>\n",
|
||||
" <td>3.5</td>\n",
|
||||
" <td>186005 km</td>\n",
|
||||
" <td>6.0</td>\n",
|
||||
" <td>Automatic</td>\n",
|
||||
" <td>4x4</td>\n",
|
||||
" <td>04-May</td>\n",
|
||||
" <td>Left wheel</td>\n",
|
||||
" <td>Silver</td>\n",
|
||||
" <td>12</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>44731507</td>\n",
|
||||
" <td>16621</td>\n",
|
||||
" <td>1018</td>\n",
|
||||
" <td>CHEVROLET</td>\n",
|
||||
" <td>Equinox</td>\n",
|
||||
" <td>2011</td>\n",
|
||||
" <td>Jeep</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>Petrol</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>192000 km</td>\n",
|
||||
" <td>6.0</td>\n",
|
||||
" <td>Tiptronic</td>\n",
|
||||
" <td>4x4</td>\n",
|
||||
" <td>04-May</td>\n",
|
||||
" <td>Left wheel</td>\n",
|
||||
" <td>Black</td>\n",
|
||||
" <td>8</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>45774419</td>\n",
|
||||
" <td>8467</td>\n",
|
||||
" <td>-</td>\n",
|
||||
" <td>HONDA</td>\n",
|
||||
" <td>FIT</td>\n",
|
||||
" <td>2006</td>\n",
|
||||
" <td>Hatchback</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>Petrol</td>\n",
|
||||
" <td>1.3</td>\n",
|
||||
" <td>200000 km</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>Variator</td>\n",
|
||||
" <td>Front</td>\n",
|
||||
" <td>04-May</td>\n",
|
||||
" <td>Right-hand drive</td>\n",
|
||||
" <td>Black</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>45769185</td>\n",
|
||||
" <td>3607</td>\n",
|
||||
" <td>862</td>\n",
|
||||
" <td>FORD</td>\n",
|
||||
" <td>Escape</td>\n",
|
||||
" <td>2011</td>\n",
|
||||
" <td>Jeep</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>Hybrid</td>\n",
|
||||
" <td>2.5</td>\n",
|
||||
" <td>168966 km</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>Automatic</td>\n",
|
||||
" <td>4x4</td>\n",
|
||||
" <td>04-May</td>\n",
|
||||
" <td>Left wheel</td>\n",
|
||||
" <td>White</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>45809263</td>\n",
|
||||
" <td>11726</td>\n",
|
||||
" <td>446</td>\n",
|
||||
" <td>HONDA</td>\n",
|
||||
" <td>FIT</td>\n",
|
||||
" <td>2014</td>\n",
|
||||
" <td>Hatchback</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>Petrol</td>\n",
|
||||
" <td>1.3</td>\n",
|
||||
" <td>91901 km</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>Automatic</td>\n",
|
||||
" <td>Front</td>\n",
|
||||
" <td>04-May</td>\n",
|
||||
" <td>Left wheel</td>\n",
|
||||
" <td>Silver</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" ID Price Levy Manufacturer Model Prod_year Category \\\n",
|
||||
"0 45654403 13328 1399 LEXUS RX 450 2010 Jeep \n",
|
||||
"1 44731507 16621 1018 CHEVROLET Equinox 2011 Jeep \n",
|
||||
"2 45774419 8467 - HONDA FIT 2006 Hatchback \n",
|
||||
"3 45769185 3607 862 FORD Escape 2011 Jeep \n",
|
||||
"4 45809263 11726 446 HONDA FIT 2014 Hatchback \n",
|
||||
"\n",
|
||||
" Leather interior Fuel type Engine volume Mileage Cylinders \\\n",
|
||||
"0 Yes Hybrid 3.5 186005 km 6.0 \n",
|
||||
"1 No Petrol 3 192000 km 6.0 \n",
|
||||
"2 No Petrol 1.3 200000 km 4.0 \n",
|
||||
"3 Yes Hybrid 2.5 168966 km 4.0 \n",
|
||||
"4 Yes Petrol 1.3 91901 km 4.0 \n",
|
||||
"\n",
|
||||
" Gear_box_type Drive_wheels Doors Wheel Color Airbags \n",
|
||||
"0 Automatic 4x4 04-May Left wheel Silver 12 \n",
|
||||
"1 Tiptronic 4x4 04-May Left wheel Black 8 \n",
|
||||
"2 Variator Front 04-May Right-hand drive Black 2 \n",
|
||||
"3 Automatic 4x4 04-May Left wheel White 0 \n",
|
||||
"4 Automatic Front 04-May Left wheel Silver 4 "
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Получение сведений о пропущенных данных"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ID 0\n",
|
||||
"Price 0\n",
|
||||
"Levy 0\n",
|
||||
"Manufacturer 0\n",
|
||||
"Model 0\n",
|
||||
"Prod_year 0\n",
|
||||
"Category 0\n",
|
||||
"Leather interior 0\n",
|
||||
"Fuel type 0\n",
|
||||
"Engine volume 0\n",
|
||||
"Mileage 0\n",
|
||||
"Cylinders 0\n",
|
||||
"Gear_box_type 0\n",
|
||||
"Drive_wheels 0\n",
|
||||
"Doors 0\n",
|
||||
"Wheel 0\n",
|
||||
"Color 0\n",
|
||||
"Airbags 0\n",
|
||||
"dtype: int64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(df.isnull().sum())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ID False\n",
|
||||
"Price False\n",
|
||||
"Levy False\n",
|
||||
"Manufacturer False\n",
|
||||
"Model False\n",
|
||||
"Prod_year False\n",
|
||||
"Category False\n",
|
||||
"Leather interior False\n",
|
||||
"Fuel type False\n",
|
||||
"Engine volume False\n",
|
||||
"Mileage False\n",
|
||||
"Cylinders False\n",
|
||||
"Gear_box_type False\n",
|
||||
"Drive_wheels False\n",
|
||||
"Doors False\n",
|
||||
"Wheel False\n",
|
||||
"Color False\n",
|
||||
"Airbags False\n",
|
||||
"dtype: bool\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(df.isnull().any())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['1399' '1018' '-' '862' '446' '891' '761' '751' '394' '1053' '1055'\n",
|
||||
" '1079' '810' '2386' '1850' '531' '586' '1249' '2455' '583' '1537' '1288'\n",
|
||||
" '915' '1750' '707' '1077' '1486' '1091' '650' '382' '1436' '1194' '503'\n",
|
||||
" '1017' '1104' '639' '629' '919' '781' '530' '640' '765' '777' '779' '934'\n",
|
||||
" '769' '645' '1185' '1324' '830' '1187' '1111' '760' '642' '1604' '1095'\n",
|
||||
" '966' '473' '1138' '1811' '988' '917' '1156' '687' '11714' '836' '1347'\n",
|
||||
" '2866' '1646' '259' '609' '697' '585' '475' '690' '308' '1823' '1361'\n",
|
||||
" '1273' '924' '584' '2078' '831' '1172' '893' '1872' '1885' '1266' '447'\n",
|
||||
" '2148' '1730' '730' '289' '502' '333' '1325' '247' '879' '1342' '1327'\n",
|
||||
" '1598' '1514' '1058' '738' '1935' '481' '1522' '1282' '456' '880' '900'\n",
|
||||
" '798' '1277' '442' '1051' '790' '1292' '1047' '528' '1211' '1493' '1793'\n",
|
||||
" '574' '930' '1998' '271' '706' '1481' '1677' '1661' '1286' '1408' '1090'\n",
|
||||
" '595' '1451' '1267' '993' '1714' '878' '641' '749' '1511' '603' '353'\n",
|
||||
" '877' '1236' '1141' '397' '784' '1024' '1357' '1301' '770' '922' '1438'\n",
|
||||
" '753' '607' '1363' '638' '490' '431' '565' '517' '833' '489' '1760' '986'\n",
|
||||
" '1841' '1620' '1360' '474' '1099' '978' '1624' '1946' '1268' '1307' '696'\n",
|
||||
" '649' '666' '2151' '551' '800' '971' '1323' '2377' '1845' '1083' '694'\n",
|
||||
" '463' '419' '345' '1515' '1505' '2056' '1203' '729' '460' '1356' '876'\n",
|
||||
" '911' '1190' '780' '448' '2410' '1848' '1148' '834' '1275' '1028' '1197'\n",
|
||||
" '724' '890' '1705' '505' '789' '2959' '518' '461' '1719' '2858' '3156'\n",
|
||||
" '2225' '2177' '1968' '1888' '1308' '2736' '1103' '557' '2195' '843'\n",
|
||||
" '1664' '723' '4508' '562' '501' '2018' '1076' '1202' '3301' '691' '1440'\n",
|
||||
" '1869' '1178' '418' '1820' '1413' '488' '1304' '363' '2108' '521' '1659'\n",
|
||||
" '87' '1411' '1528' '3292' '7058' '1578' '627' '874' '1996' '1488' '5679'\n",
|
||||
" '1234' '5603' '400' '889' '3268' '875' '949' '2265' '441' '742' '425'\n",
|
||||
" '2476' '2971' '614' '1816' '1375' '1405' '2297' '1062' '1113' '420'\n",
|
||||
" '2469' '658' '1951' '2670' '2578' '1995' '1032' '994' '1011' '2421'\n",
|
||||
" '1296' '155' '494' '426' '1086' '961' '2236' '1829' '764' '1834' '1054'\n",
|
||||
" '617' '1529' '2266' '637' '626' '1832' '1016' '2002' '1756' '746' '1285'\n",
|
||||
" '2690' '1118' '5332' '980' '1807' '970' '1228' '1195' '1132' '1768'\n",
|
||||
" '1384' '1080' '7063' '1817' '1452' '1975' '1368' '702' '1974' '1781'\n",
|
||||
" '1036' '944' '663' '364' '1539' '1345' '1680' '2209' '741' '1575' '695'\n",
|
||||
" '1317' '294' '1525' '424' '997' '1473' '1552' '2819' '2188' '1668' '3057'\n",
|
||||
" '799' '1502' '2606' '552' '1694' '1759' '1110' '399' '1470' '1174' '5877'\n",
|
||||
" '1474' '1688' '526' '686' '5908' '1107' '2070' '1468' '1246' '1685' '556'\n",
|
||||
" '1533' '1917' '1346' '732' '692' '579' '421' '362' '3505' '1855' '2711'\n",
|
||||
" '1586' '3739' '681' '1708' '2278' '1701' '722' '1482' '928' '827' '832'\n",
|
||||
" '527' '604' '173' '1341' '3329' '1553' '859' '167' '916' '828' '2082'\n",
|
||||
" '1176' '1108' '975' '3008' '1516' '2269' '1699' '2073' '1031' '1503'\n",
|
||||
" '2364' '1030' '1442' '5666' '2715' '1437' '2067' '1426' '2908' '1279'\n",
|
||||
" '866' '4283' '279' '2658' '3015' '2004' '1391' '4736' '748' '1466' '644'\n",
|
||||
" '683' '2705' '1297' '731' '1252' '2216' '3141' '3273' '1518' '1723'\n",
|
||||
" '1588' '972' '682' '1094' '668' '175' '967' '402' '3894' '1960' '1599'\n",
|
||||
" '2000' '2084' '1621' '714' '1109' '3989' '873' '1572' '1163' '1991'\n",
|
||||
" '1716' '1673' '2562' '2874' '965' '462' '605' '1948' '1736' '3518' '2054'\n",
|
||||
" '2467' '1681' '1272' '1205' '750' '2156' '2566' '115' '524' '3184' '676'\n",
|
||||
" '1678' '612' '328' '955' '1441' '1675' '3965' '2909' '623' '822' '867'\n",
|
||||
" '3025' '1993' '792' '636' '4057' '3743' '2337' '2570' '2418' '2472'\n",
|
||||
" '3910' '1662' '2123' '2628' '3208' '2080' '3699' '2913' '864' '2505'\n",
|
||||
" '870' '7536' '1924' '1671' '1064' '1836' '1866' '4741' '841' '1369'\n",
|
||||
" '5681' '3112' '1366' '2223' '1198' '1039' '3811' '3571' '1387' '1171'\n",
|
||||
" '1365' '1531' '1590' '11706' '2308' '4860' '1641' '1045' '1901']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(df[\"Levy\"].unique())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df[\"Levy\"] = df[\"Levy\"].replace({'-' : None})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Levy процент пустых значений: 30.25%\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for i in df.columns:\n",
|
||||
" null_rate = df[i].isnull().sum() / len(df) * 100\n",
|
||||
" if null_rate > 0:\n",
|
||||
" print(f\"{i} процент пустых значений: {null_rate:.2f}%\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Заполнение пропущенных данных"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.fillna({\"Levy\": 0}, inplace=True)\n",
|
||||
"for i in df.columns:\n",
|
||||
" null_rate = df[i].isnull().sum() / len(df) * 100\n",
|
||||
" if null_rate > 0:\n",
|
||||
" print(f\"{i} процент пустых значений: {null_rate:.2f}%\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Создание выборок данных"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def split_stratified_into_train_val_test(\n",
|
||||
" df_input,\n",
|
||||
" stratify_colname=\"y\",\n",
|
||||
" frac_train=0.6,\n",
|
||||
" frac_val=0.15,\n",
|
||||
" frac_test=0.25,\n",
|
||||
" random_state=None,\n",
|
||||
"):\n",
|
||||
" \"\"\"\n",
|
||||
" Splits a Pandas dataframe into three subsets (train, val, and test)\n",
|
||||
" following fractional ratios provided by the user, where each subset is\n",
|
||||
" stratified by the values in a specific column (that is, each subset has\n",
|
||||
" the same relative frequency of the values in the column). It performs this\n",
|
||||
" splitting by running train_test_split() twice.\n",
|
||||
"\n",
|
||||
" Parameters\n",
|
||||
" ----------\n",
|
||||
" df_input : Pandas dataframe\n",
|
||||
" Input dataframe to be split.\n",
|
||||
" stratify_colname : str\n",
|
||||
" The name of the column that will be used for stratification. Usually\n",
|
||||
" this column would be for the label.\n",
|
||||
" frac_train : float\n",
|
||||
" frac_val : float\n",
|
||||
" frac_test : float\n",
|
||||
" The ratios with which the dataframe will be split into train, val, and\n",
|
||||
" test data. The values should be expressed as float fractions and should\n",
|
||||
" sum to 1.0.\n",
|
||||
" random_state : int, None, or RandomStateInstance\n",
|
||||
" Value to be passed to train_test_split().\n",
|
||||
"\n",
|
||||
" Returns\n",
|
||||
" -------\n",
|
||||
" df_train, df_val, df_test :\n",
|
||||
" Dataframes containing the three splits.\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" if frac_train + frac_val + frac_test != 1.0:\n",
|
||||
" raise ValueError(\n",
|
||||
" \"fractions %f, %f, %f do not add up to 1.0\"\n",
|
||||
" % (frac_train, frac_val, frac_test)\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" if stratify_colname not in df_input.columns:\n",
|
||||
" raise ValueError(\"%s is not a column in the dataframe\" % (stratify_colname))\n",
|
||||
"\n",
|
||||
" X = df_input # Contains all columns.\n",
|
||||
" y = df_input[\n",
|
||||
" [stratify_colname]\n",
|
||||
" ] # Dataframe of just the column on which to stratify.\n",
|
||||
"\n",
|
||||
" # Split original dataframe into train and temp dataframes.\n",
|
||||
" df_train, df_temp, y_train, y_temp = train_test_split(\n",
|
||||
" X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Split the temp dataframe into val and test dataframes.\n",
|
||||
" relative_frac_test = frac_test / (frac_val + frac_test)\n",
|
||||
" df_val, df_test, y_val, y_test = train_test_split(\n",
|
||||
" df_temp,\n",
|
||||
" y_temp,\n",
|
||||
" stratify=y_temp,\n",
|
||||
" test_size=relative_frac_test,\n",
|
||||
" random_state=random_state,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n",
|
||||
"\n",
|
||||
" return df_train, df_val, df_test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['Automatic' 'Tiptronic' 'Variator' 'Manual']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(df.Gear_box_type.unique())\n",
|
||||
"\n",
|
||||
"data = df[\n",
|
||||
" [\n",
|
||||
" \"Price\",\n",
|
||||
" \"Gear_box_type\",\n",
|
||||
" ]\n",
|
||||
"].copy()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Обучающая выборка: (11542, 2)\n",
|
||||
"Gear_box_type\n",
|
||||
"Automatic 8108\n",
|
||||
"Tiptronic 1861\n",
|
||||
"Manual 1125\n",
|
||||
"Variator 448\n",
|
||||
"Name: count, dtype: int64\n",
|
||||
"Контрольная выборка: (3847, 2)\n",
|
||||
"Gear_box_type\n",
|
||||
"Automatic 2703\n",
|
||||
"Tiptronic 620\n",
|
||||
"Manual 375\n",
|
||||
"Variator 149\n",
|
||||
"Name: count, dtype: int64\n",
|
||||
"Тестовая выборка: (3848, 2)\n",
|
||||
"Gear_box_type\n",
|
||||
"Automatic 2703\n",
|
||||
"Tiptronic 621\n",
|
||||
"Manual 375\n",
|
||||
"Variator 149\n",
|
||||
"Name: count, dtype: int64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_train, df_val, df_test = split_stratified_into_train_val_test(\n",
|
||||
" data,\n",
|
||||
" stratify_colname=\"Gear_box_type\",\n",
|
||||
" frac_train=0.60,\n",
|
||||
" frac_val=0.20,\n",
|
||||
" frac_test=0.20,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"Обучающая выборка: \", df_train.shape)\n",
|
||||
"print(df_train.Gear_box_type.value_counts())\n",
|
||||
"\n",
|
||||
"print(\"Контрольная выборка: \", df_val.shape)\n",
|
||||
"print(df_val.Gear_box_type.value_counts())\n",
|
||||
"\n",
|
||||
"print(\"Тестовая выборка: \", df_test.shape)\n",
|
||||
"print(df_test.Gear_box_type.value_counts())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Выборка с избытком (oversampling)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Обучающая выборка: (11542, 2)\n",
|
||||
"Gear_box_type\n",
|
||||
"Automatic 8108\n",
|
||||
"Tiptronic 1861\n",
|
||||
"Manual 1125\n",
|
||||
"Variator 448\n",
|
||||
"Name: count, dtype: int64\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "ValueError",
|
||||
"evalue": "could not convert string to float: 'Automatic'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_9996\\2277749880.py\u001b[0m in \u001b[0;36m?\u001b[1;34m()\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Обучающая выборка: \"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mGear_box_type\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 8\u001b[1;33m \u001b[0mX_resampled\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_resampled\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mada\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit_resample\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"Gear_box_type\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 9\u001b[0m \u001b[0mdf_train_adasyn\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_resampled\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Обучающая выборка после oversampling: \"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf_train_adasyn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;32mc:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\imblearn\\base.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, X, y)\u001b[0m\n\u001b[0;32m 204\u001b[0m \u001b[0my_resampled\u001b[0m \u001b[1;33m:\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m-\u001b[0m\u001b[0mlike\u001b[0m \u001b[0mof\u001b[0m \u001b[0mshape\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mn_samples_new\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 205\u001b[0m \u001b[0mThe\u001b[0m \u001b[0mcorresponding\u001b[0m \u001b[0mlabel\u001b[0m \u001b[0mof\u001b[0m \u001b[1;33m`\u001b[0m\u001b[0mX_resampled\u001b[0m\u001b[1;33m`\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 206\u001b[0m \"\"\"\n\u001b[0;32m 207\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_validate_params\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 208\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit_resample\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
|
||||
"\u001b[1;32mc:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\imblearn\\base.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, X, y)\u001b[0m\n\u001b[0;32m 102\u001b[0m \u001b[0mThe\u001b[0m \u001b[0mcorresponding\u001b[0m \u001b[0mlabel\u001b[0m \u001b[0mof\u001b[0m \u001b[1;33m`\u001b[0m\u001b[0mX_resampled\u001b[0m\u001b[1;33m`\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 103\u001b[0m \"\"\"\n\u001b[0;32m 104\u001b[0m \u001b[0mcheck_classification_targets\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 105\u001b[0m \u001b[0marrays_transformer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mArraysTransformer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 106\u001b[1;33m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbinarize_y\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_check_X_y\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 107\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 108\u001b[0m self.sampling_strategy_ = check_sampling_strategy(\n\u001b[0;32m 109\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msampling_strategy\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_sampling_type\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;32mc:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\imblearn\\base.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, X, y, accept_sparse)\u001b[0m\n\u001b[0;32m 157\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_check_X_y\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 158\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0maccept_sparse\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 159\u001b[0m \u001b[0maccept_sparse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;34m\"csr\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"csc\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 160\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbinarize_y\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcheck_target_type\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindicate_one_vs_all\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 161\u001b[1;33m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_validate_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreset\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maccept_sparse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 162\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbinarize_y\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;32mc:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\base.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, X, y, reset, validate_separately, cast_to_ndarray, **check_params)\u001b[0m\n\u001b[0;32m 646\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;34m\"estimator\"\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mcheck_y_params\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 647\u001b[0m \u001b[0mcheck_y_params\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;33m**\u001b[0m\u001b[0mdefault_check_params\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mcheck_y_params\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 648\u001b[0m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minput_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"y\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mcheck_y_params\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 649\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 650\u001b[1;33m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcheck_X_y\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mcheck_params\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 651\u001b[0m \u001b[0mout\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 652\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 653\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mno_val_X\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mcheck_params\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"ensure_2d\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;32mc:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\utils\\validation.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)\u001b[0m\n\u001b[0;32m 1297\u001b[0m raise ValueError(\n\u001b[0;32m 1298\u001b[0m \u001b[1;33mf\"\u001b[0m\u001b[1;33m{\u001b[0m\u001b[0mestimator_name\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m requires y to be passed, but the target y is None\u001b[0m\u001b[1;33m\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1299\u001b[0m \u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1300\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1301\u001b[1;33m X = check_array(\n\u001b[0m\u001b[0;32m 1302\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1303\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maccept_sparse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1304\u001b[0m \u001b[0maccept_large_sparse\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maccept_large_sparse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;32mc:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\utils\\validation.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)\u001b[0m\n\u001b[0;32m 1009\u001b[0m \u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1010\u001b[0m \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mxp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1011\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1012\u001b[0m \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_asarray_with_order\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0morder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0morder\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mxp\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mxp\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1013\u001b[1;33m \u001b[1;32mexcept\u001b[0m \u001b[0mComplexWarning\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mcomplex_warning\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1014\u001b[0m raise ValueError(\n\u001b[0;32m 1015\u001b[0m \u001b[1;34m\"Complex data not supported\\n{}\\n\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1016\u001b[0m \u001b[1;33m)\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mcomplex_warning\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;32mc:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\utils\\_array_api.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(array, dtype, order, copy, xp, device)\u001b[0m\n\u001b[0;32m 741\u001b[0m \u001b[1;31m# Use NumPy API to support order\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 742\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcopy\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 743\u001b[0m \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnumpy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0morder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0morder\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 744\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 745\u001b[1;33m \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnumpy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0morder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0morder\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 746\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 747\u001b[0m \u001b[1;31m# At this point array is a NumPy ndarray. We convert it to an array\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 748\u001b[0m \u001b[1;31m# container that is consistent with the input's namespace.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;32mc:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, dtype, copy)\u001b[0m\n\u001b[0;32m 2149\u001b[0m def __array__(\n\u001b[0;32m 2150\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mnpt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDTypeLike\u001b[0m \u001b[1;33m|\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mbool_t\u001b[0m \u001b[1;33m|\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2151\u001b[0m \u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2152\u001b[0m \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2153\u001b[1;33m \u001b[0marr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2154\u001b[0m if (\n\u001b[0;32m 2155\u001b[0m \u001b[0mastype_is_view\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0marr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2156\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0musing_copy_on_write\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;31mValueError\u001b[0m: could not convert string to float: 'Automatic'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from imblearn.over_sampling import ADASYN\n",
|
||||
"\n",
|
||||
"ada = ADASYN()\n",
|
||||
"\n",
|
||||
"print(\"Обучающая выборка: \", df_train.shape)\n",
|
||||
"print(df_train.Gear_box_type.value_counts())\n",
|
||||
"\n",
|
||||
"X_resampled, y_resampled = ada.fit_resample(df_train, df_train[\"Gear_box_type\"])\n",
|
||||
"df_train_adasyn = pd.DataFrame(X_resampled)\n",
|
||||
"\n",
|
||||
"print(\"Обучающая выборка после oversampling: \", df_train_adasyn.shape)\n",
|
||||
"print(df_train_adasyn.Gear_box_type.value_counts())"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
2584
notebooks/lab3_1.ipynb
Normal file
2584
notebooks/lab3_1.ipynb
Normal file
File diff suppressed because one or more lines are too long
2971
notebooks/lab3_2.ipynb
Normal file
2971
notebooks/lab3_2.ipynb
Normal file
File diff suppressed because one or more lines are too long
2962
notebooks/lab4.ipynb
Normal file
2962
notebooks/lab4.ipynb
Normal file
File diff suppressed because one or more lines are too long
2219
notebooks/lab4_pipeline.ipynb
Normal file
2219
notebooks/lab4_pipeline.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
2811
notebooks/lab4_sandbox.ipynb
Normal file
2811
notebooks/lab4_sandbox.ipynb
Normal file
File diff suppressed because one or more lines are too long
17
notebooks/transformers.py
Normal file
17
notebooks/transformers.py
Normal file
@ -0,0 +1,17 @@
|
||||
import numpy as np
|
||||
from sklearn.base import BaseEstimator, TransformerMixin
|
||||
|
||||
|
||||
class CarsFeatures(BaseEstimator, TransformerMixin):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def fit(self, X, y=None):
|
||||
return self
|
||||
|
||||
def transform(self, X, y=None):
|
||||
X["Age"] = 2020 - X["Prod. year"]
|
||||
return X
|
||||
|
||||
def get_feature_names_out(self, features_in):
|
||||
return np.append(features_in, ["Age"], axis=0)
|
154
poetry.lock
generated
154
poetry.lock
generated
@ -467,6 +467,17 @@ files = [
|
||||
[package.dependencies]
|
||||
colorama = {version = "*", markers = "platform_system == \"Windows\""}
|
||||
|
||||
[[package]]
|
||||
name = "cloudpickle"
|
||||
version = "3.1.0"
|
||||
description = "Pickler class to extend the standard pickle.Pickler functionality"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "cloudpickle-3.1.0-py3-none-any.whl", hash = "sha256:fe11acda67f61aaaec473e3afe030feb131d78a43461b718185363384f1ba12e"},
|
||||
{file = "cloudpickle-3.1.0.tar.gz", hash = "sha256:81a929b6e3c7335c863c771d673d105f02efdb89dfaba0c90495d1c64796601b"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
version = "0.4.6"
|
||||
@ -675,6 +686,41 @@ files = [
|
||||
[package.extras]
|
||||
devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"]
|
||||
|
||||
[[package]]
|
||||
name = "featuretools"
|
||||
version = "1.31.0"
|
||||
description = "a framework for automated feature engineering"
|
||||
optional = false
|
||||
python-versions = "<4,>=3.9"
|
||||
files = [
|
||||
{file = "featuretools-1.31.0-py3-none-any.whl", hash = "sha256:87c94e9ae959c89acd83da96bd2583f3ef0f6daaa9639cbb6e46dbde2c742a18"},
|
||||
{file = "featuretools-1.31.0.tar.gz", hash = "sha256:01bfb17fcc1715b4c3623c7bc94a8982122c4a0fa03350ed478601bb81f90155"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
cloudpickle = ">=1.5.0"
|
||||
holidays = ">=0.17"
|
||||
numpy = ">=1.25.0"
|
||||
packaging = ">=20.0"
|
||||
pandas = ">=2.0.0"
|
||||
psutil = ">=5.7.0"
|
||||
scipy = ">=1.10.0"
|
||||
tqdm = ">=4.66.3"
|
||||
woodwork = ">=0.28.0"
|
||||
|
||||
[package.extras]
|
||||
autonormalize = ["autonormalize (>=2.0.1)"]
|
||||
complete = ["featuretools[dask,nlp,premium]"]
|
||||
dask = ["dask[dataframe] (>=2023.2.0)", "distributed (>=2023.2.0)"]
|
||||
dev = ["black[jupyter] (>=23.1.0)", "featuretools[dask,docs,test]", "pre-commit (>=2.20.0)", "ruff (>=0.1.6)"]
|
||||
docs = ["Sphinx (==5.1.1)", "autonormalize (>=2.0.1)", "click (>=7.0.0)", "featuretools[dask,test]", "ipython (==8.4.0)", "jupyter (==1.0.0)", "jupyter-client (>=8.0.2)", "matplotlib (==3.7.2)", "myst-parser (==0.18.0)", "nbconvert (==6.5.0)", "nbsphinx (==0.8.9)", "pydata-sphinx-theme (==0.9.0)", "sphinx-copybutton (==0.5.0)", "sphinx-inline-tabs (==2022.1.2b11)"]
|
||||
nlp = ["nlp-primitives (>=2.12.0)"]
|
||||
premium = ["premium-primitives (>=0.0.3)"]
|
||||
sklearn = ["featuretools-sklearn-transformer (>=1.0.0)"]
|
||||
sql = ["featuretools-sql (>=0.0.1)", "psycopg2-binary (>=2.9.3)"]
|
||||
test = ["boto3 (>=1.34.32)", "composeml (>=0.8.0)", "graphviz (>=0.8.4)", "moto[all] (>=5.0.0)", "pip (>=23.3.0)", "pyarrow (>=14.0.1)", "pympler (>=0.8)", "pytest (>=7.1.2)", "pytest-cov (>=3.0.0)", "pytest-timeout (>=2.1.0)", "pytest-xdist (>=2.5.0)", "smart-open (>=5.0.0)", "urllib3 (>=1.26.18)"]
|
||||
tsfresh = ["featuretools-tsfresh-primitives (>=1.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "flask"
|
||||
version = "3.0.3"
|
||||
@ -833,6 +879,20 @@ files = [
|
||||
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "holidays"
|
||||
version = "0.60"
|
||||
description = "World Holidays Framework"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
files = [
|
||||
{file = "holidays-0.60-py3-none-any.whl", hash = "sha256:d857949c5ee35655215a10c5a26e6a856bdc3beccc4fbbc8debef98dfba17b82"},
|
||||
{file = "holidays-0.60.tar.gz", hash = "sha256:495fc5123f5d92b92673237375eb8c15a03d21c647b089db509b7d9612267556"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
python-dateutil = "*"
|
||||
|
||||
[[package]]
|
||||
name = "httpcore"
|
||||
version = "1.0.5"
|
||||
@ -914,6 +974,25 @@ examples = ["keras (>=2.4.3)", "matplotlib (>=3.1.2)", "pandas (>=1.0.5)", "seab
|
||||
optional = ["keras (>=2.4.3)", "pandas (>=1.0.5)", "tensorflow (>=2.4.3)"]
|
||||
tests = ["black (>=23.3.0)", "flake8 (>=3.8.2)", "keras (>=2.4.3)", "mypy (>=1.3.0)", "pandas (>=1.0.5)", "pytest (>=5.0.1)", "pytest-cov (>=2.9.0)", "tensorflow (>=2.4.3)"]
|
||||
|
||||
[[package]]
|
||||
name = "importlib-resources"
|
||||
version = "6.4.5"
|
||||
description = "Read resources from Python packages"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "importlib_resources-6.4.5-py3-none-any.whl", hash = "sha256:ac29d5f956f01d5e4bb63102a5a19957f1b9175e45649977264a1416783bb717"},
|
||||
{file = "importlib_resources-6.4.5.tar.gz", hash = "sha256:980862a1d16c9e147a59603677fa2aa5fd82b87f223b6cb870695bcfce830065"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"]
|
||||
cover = ["pytest-cov"]
|
||||
doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
|
||||
enabler = ["pytest-enabler (>=2.2)"]
|
||||
test = ["jaraco.test (>=5.4)", "pytest (>=6,!=8.1.*)", "zipp (>=3.17)"]
|
||||
type = ["pytest-mypy"]
|
||||
|
||||
[[package]]
|
||||
name = "ipykernel"
|
||||
version = "6.29.5"
|
||||
@ -2708,6 +2787,11 @@ files = [
|
||||
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"},
|
||||
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"},
|
||||
{file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"},
|
||||
{file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"},
|
||||
{file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"},
|
||||
{file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"},
|
||||
{file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"},
|
||||
{file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"},
|
||||
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"},
|
||||
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"},
|
||||
{file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"},
|
||||
@ -2781,6 +2865,27 @@ dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodest
|
||||
doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.13.1)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<=7.3.7)", "sphinx-design (>=0.4.0)"]
|
||||
test = ["Cython", "array-api-strict (>=2.0)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
|
||||
|
||||
[[package]]
|
||||
name = "seaborn"
|
||||
version = "0.13.2"
|
||||
description = "Statistical data visualization"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987"},
|
||||
{file = "seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
matplotlib = ">=3.4,<3.6.1 || >3.6.1"
|
||||
numpy = ">=1.20,<1.24.0 || >1.24.0"
|
||||
pandas = ">=1.2"
|
||||
|
||||
[package.extras]
|
||||
dev = ["flake8", "flit", "mypy", "pandas-stubs", "pre-commit", "pytest", "pytest-cov", "pytest-xdist"]
|
||||
docs = ["ipykernel", "nbconvert", "numpydoc", "pydata_sphinx_theme (==0.10.0rc2)", "pyyaml", "sphinx (<6.0.0)", "sphinx-copybutton", "sphinx-design", "sphinx-issues"]
|
||||
stats = ["scipy (>=1.7)", "statsmodels (>=0.12)"]
|
||||
|
||||
[[package]]
|
||||
name = "send2trash"
|
||||
version = "1.8.3"
|
||||
@ -2939,6 +3044,27 @@ files = [
|
||||
{file = "tornado-6.4.1.tar.gz", hash = "sha256:92d3ab53183d8c50f8204a51e6f91d18a15d5ef261e84d452800d4ff6fc504e9"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tqdm"
|
||||
version = "4.67.0"
|
||||
description = "Fast, Extensible Progress Meter"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "tqdm-4.67.0-py3-none-any.whl", hash = "sha256:0cd8af9d56911acab92182e88d763100d4788bdf421d251616040cc4d44863be"},
|
||||
{file = "tqdm-4.67.0.tar.gz", hash = "sha256:fe5a6f95e6fe0b9755e9469b77b9c3cf850048224ecaa8293d7d2d31f97d869a"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
colorama = {version = "*", markers = "platform_system == \"Windows\""}
|
||||
|
||||
[package.extras]
|
||||
dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"]
|
||||
discord = ["requests"]
|
||||
notebook = ["ipywidgets (>=6)"]
|
||||
slack = ["slack-sdk"]
|
||||
telegram = ["requests"]
|
||||
|
||||
[[package]]
|
||||
name = "traitlets"
|
||||
version = "5.14.3"
|
||||
@ -3110,7 +3236,33 @@ files = [
|
||||
{file = "widgetsnbextension-4.0.13.tar.gz", hash = "sha256:ffcb67bc9febd10234a362795f643927f4e0c05d9342c727b65d2384f8feacb6"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "woodwork"
|
||||
version = "0.31.0"
|
||||
description = "a data typing library for machine learning"
|
||||
optional = false
|
||||
python-versions = "<4,>=3.9"
|
||||
files = [
|
||||
{file = "woodwork-0.31.0-py3-none-any.whl", hash = "sha256:5cb3370553b5f466f8c8599b1bf559584dc0b798cc1f2da26bbd7029d256c6f9"},
|
||||
{file = "woodwork-0.31.0.tar.gz", hash = "sha256:6ef82af1d5b6525b02efe6417c574c810cfdcc606cb266bd0d7fb17a1d066b67"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
importlib-resources = ">=5.10.0"
|
||||
numpy = ">=1.25.0"
|
||||
pandas = ">=2.0.0"
|
||||
python-dateutil = ">=2.8.2"
|
||||
scikit-learn = ">=1.1.0"
|
||||
scipy = ">=1.10.0"
|
||||
|
||||
[package.extras]
|
||||
complete = ["woodwork[updater]"]
|
||||
dev = ["click (>=8.1.7)", "pre-commit (>=2.20.0)", "ruff (>=0.1.6)", "woodwork[docs,test]"]
|
||||
docs = ["Sphinx (==5.1.1)", "ipython (==8.4.0)", "jupyter (==1.0.0)", "myst-parser (==0.18.0)", "nbconvert (==6.5.0)", "nbsphinx (==0.8.9)", "pyarrow (>=14.0.1)", "pydata-sphinx-theme (==0.9.0)", "sphinx-copybutton (==0.5.0)", "sphinx-inline-tabs (==2022.1.2b11)"]
|
||||
test = ["boto3 (>=1.34.32)", "moto[all] (>=5.0.0)", "pyarrow (>=14.0.1)", "pytest (>=7.0.1)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=2.1.0)", "smart-open (>=5.0.0)"]
|
||||
updater = ["alteryx-open-src-update-checker (>=3.1.0)"]
|
||||
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.12"
|
||||
content-hash = "a7e3d516bde2d6e4173d8a9770fb5337a0c806dadaeda355084b262c1995f7ea"
|
||||
content-hash = "ddd000b70cadbcdb2463cdb4e0be8181c6dab001dd368a95bd2caa73a3085aa5"
|
||||
|
@ -17,8 +17,13 @@ apiflask = "^2.2.0"
|
||||
flask-cors = "^5.0.0"
|
||||
scikit-learn = "^1.5.2"
|
||||
imbalanced-learn = "^0.12.3"
|
||||
featuretools = "^1.31.0"
|
||||
seaborn = "^0.13.2"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
ipykernel = "^6.29.5"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
BIN
описания_датасетов/cars.odt
Normal file
BIN
описания_датасетов/cars.odt
Normal file
Binary file not shown.
BIN
описания_датасетов/houses.odt
Normal file
BIN
описания_датасетов/houses.odt
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user