done
This commit is contained in:
parent
790b607e5e
commit
4960643e7c
198
lab1.ipynb
198
lab1.ipynb
File diff suppressed because one or more lines are too long
@ -1,2 +0,0 @@
|
|||||||
[flake8]
|
|
||||||
max-line-length = 120
|
|
13
mai/.vscode/extensions.json
vendored
13
mai/.vscode/extensions.json
vendored
@ -1,13 +0,0 @@
|
|||||||
{
|
|
||||||
"recommendations": [
|
|
||||||
"ms-python.black-formatter",
|
|
||||||
"ms-python.flake8",
|
|
||||||
"ms-python.isort",
|
|
||||||
"ms-toolsai.jupyter",
|
|
||||||
"ms-toolsai.datawrangler",
|
|
||||||
"ms-python.python",
|
|
||||||
"donjayamanne.python-environment-manager",
|
|
||||||
// optional
|
|
||||||
"usernamehw.errorlens"
|
|
||||||
]
|
|
||||||
}
|
|
16
mai/.vscode/launch.json
vendored
16
mai/.vscode/launch.json
vendored
@ -1,16 +0,0 @@
|
|||||||
{
|
|
||||||
// Use IntelliSense to learn about possible attributes.
|
|
||||||
// Hover to view descriptions of existing attributes.
|
|
||||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
|
||||||
"version": "0.2.0",
|
|
||||||
"configurations": [
|
|
||||||
{
|
|
||||||
"name": "mai-service",
|
|
||||||
"type": "debugpy",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "run.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"justMyCode": true
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
38
mai/.vscode/settings.json
vendored
38
mai/.vscode/settings.json
vendored
@ -1,38 +0,0 @@
|
|||||||
{
|
|
||||||
"files.autoSave": "onFocusChange",
|
|
||||||
"files.exclude": {
|
|
||||||
"**/__pycache__": true
|
|
||||||
},
|
|
||||||
"editor.detectIndentation": false,
|
|
||||||
"editor.formatOnType": false,
|
|
||||||
"editor.formatOnPaste": true,
|
|
||||||
"editor.formatOnSave": true,
|
|
||||||
"editor.tabSize": 4,
|
|
||||||
"editor.insertSpaces": true,
|
|
||||||
"editor.codeActionsOnSave": {
|
|
||||||
"source.organizeImports": "explicit",
|
|
||||||
"source.sortImports": "explicit"
|
|
||||||
},
|
|
||||||
"editor.stickyScroll.enabled": false,
|
|
||||||
"diffEditor.ignoreTrimWhitespace": false,
|
|
||||||
"debug.showVariableTypes": true,
|
|
||||||
"workbench.editor.highlightModifiedTabs": true,
|
|
||||||
"git.suggestSmartCommit": false,
|
|
||||||
"git.autofetch": true,
|
|
||||||
"git.openRepositoryInParentFolders": "always",
|
|
||||||
"git.confirmSync": false,
|
|
||||||
"errorLens.gutterIconsEnabled": true,
|
|
||||||
"errorLens.messageEnabled": false,
|
|
||||||
"[python]": {
|
|
||||||
"editor.defaultFormatter": "ms-python.black-formatter",
|
|
||||||
},
|
|
||||||
"python.languageServer": "Pylance",
|
|
||||||
"python.analysis.typeCheckingMode": "basic",
|
|
||||||
"python.analysis.autoImportCompletions": true,
|
|
||||||
"isort.args": [
|
|
||||||
"--profile",
|
|
||||||
"black"
|
|
||||||
],
|
|
||||||
"notebook.lineNumbers": "on",
|
|
||||||
"notebook.output.minimalErrorRendering": true,
|
|
||||||
}
|
|
Binary file not shown.
Before Width: | Height: | Size: 111 KiB |
@ -1,52 +0,0 @@
|
|||||||
import importlib
|
|
||||||
import os
|
|
||||||
import traceback
|
|
||||||
|
|
||||||
import matplotlib
|
|
||||||
from apiflask import APIBlueprint, APIFlask
|
|
||||||
from flask_cors import CORS
|
|
||||||
|
|
||||||
matplotlib.use("agg")
|
|
||||||
|
|
||||||
cors = CORS()
|
|
||||||
api_bp = APIBlueprint("api", __name__, url_prefix="/api/v1")
|
|
||||||
dataset_path: str | None = None
|
|
||||||
|
|
||||||
|
|
||||||
class Config:
|
|
||||||
SECRET_KEY = "secret!"
|
|
||||||
SEND_FILE_MAX_AGE_DEFAULT = -1
|
|
||||||
|
|
||||||
|
|
||||||
def create_app():
|
|
||||||
global dataset_path
|
|
||||||
|
|
||||||
# Create and configure app
|
|
||||||
app = APIFlask(
|
|
||||||
"MAI Service",
|
|
||||||
title="MAI Service API",
|
|
||||||
docs_path="/",
|
|
||||||
version="1.0",
|
|
||||||
static_folder="",
|
|
||||||
template_folder="",
|
|
||||||
)
|
|
||||||
app.config.from_object(Config)
|
|
||||||
|
|
||||||
dataset_path = os.path.join(app.instance_path, "dataset")
|
|
||||||
os.makedirs(dataset_path, exist_ok=True)
|
|
||||||
|
|
||||||
@app.errorhandler(Exception)
|
|
||||||
def my_error_processor(error):
|
|
||||||
traceback.print_exception(error)
|
|
||||||
return {"message": str(error), "detail": "No details"}, 500
|
|
||||||
|
|
||||||
# Import custom REST methods
|
|
||||||
importlib.import_module("backend.api")
|
|
||||||
|
|
||||||
# Enable REST API
|
|
||||||
app.register_blueprint(api_bp)
|
|
||||||
|
|
||||||
# Enable app extensions
|
|
||||||
cors.init_app(app)
|
|
||||||
|
|
||||||
return app
|
|
@ -1,57 +0,0 @@
|
|||||||
from apiflask import FileSchema, Schema, fields
|
|
||||||
from flask import send_file
|
|
||||||
|
|
||||||
from backend import api_bp, dataset_path
|
|
||||||
from backend.service import Service
|
|
||||||
|
|
||||||
|
|
||||||
class FileUpload(Schema):
|
|
||||||
file = fields.File(required=True)
|
|
||||||
|
|
||||||
|
|
||||||
class ColumnInfoDto(Schema):
|
|
||||||
datatype = fields.String()
|
|
||||||
items = fields.List(fields.String())
|
|
||||||
|
|
||||||
|
|
||||||
class TableColumnDto(Schema):
|
|
||||||
name = fields.String()
|
|
||||||
datatype = fields.String()
|
|
||||||
items = fields.List(fields.String())
|
|
||||||
|
|
||||||
|
|
||||||
service = Service(dataset_path)
|
|
||||||
|
|
||||||
|
|
||||||
@api_bp.post("/dataset")
|
|
||||||
@api_bp.input(FileUpload, location="files")
|
|
||||||
def upload_dataset(files_data):
|
|
||||||
uploaded_file = files_data["file"]
|
|
||||||
return service.upload_dataset(uploaded_file)
|
|
||||||
|
|
||||||
|
|
||||||
@api_bp.get("/dataset")
|
|
||||||
def get_all_datasets():
|
|
||||||
return service.get_all_datasets()
|
|
||||||
|
|
||||||
|
|
||||||
@api_bp.get("/dataset/<string:name>")
|
|
||||||
@api_bp.output(TableColumnDto(many=True))
|
|
||||||
def get_dataset_info(name: str):
|
|
||||||
return service.get_dataset_info(name)
|
|
||||||
|
|
||||||
|
|
||||||
@api_bp.get("/dataset/<string:name>/<string:column>")
|
|
||||||
@api_bp.output(ColumnInfoDto)
|
|
||||||
def get_column_info(name: str, column: str):
|
|
||||||
return service.get_column_info(name, column)
|
|
||||||
|
|
||||||
|
|
||||||
@api_bp.get("/dataset/draw/hist/<string:name>/<string:column>")
|
|
||||||
@api_bp.output(
|
|
||||||
FileSchema(type="string", format="binary"), content_type="image/png", example=""
|
|
||||||
)
|
|
||||||
def get_dataset_hist(name: str, column: str):
|
|
||||||
data = service.get_hist(name, column)
|
|
||||||
data.seek(0)
|
|
||||||
return send_file(data, download_name=f"{name}.hist.png", mimetype="image/png")
|
|
@ -1,59 +0,0 @@
|
|||||||
import io
|
|
||||||
import os
|
|
||||||
import pathlib
|
|
||||||
from typing import BinaryIO, Dict, List
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
from matplotlib.figure import Figure
|
|
||||||
from werkzeug.datastructures import FileStorage
|
|
||||||
from werkzeug.utils import secure_filename
|
|
||||||
|
|
||||||
|
|
||||||
class Service:
|
|
||||||
def __init__(self, dataset_path: str | None) -> None:
|
|
||||||
if dataset_path is None:
|
|
||||||
raise Exception("Dataset path is not defined")
|
|
||||||
self.__path: str = dataset_path
|
|
||||||
|
|
||||||
def __get_dataset(self, filename: str) -> pd.DataFrame:
|
|
||||||
full_file_name = os.path.join(self.__path, secure_filename(filename))
|
|
||||||
return pd.read_csv(full_file_name)
|
|
||||||
|
|
||||||
def upload_dataset(self, file: FileStorage) -> str:
|
|
||||||
if file.filename is None:
|
|
||||||
raise Exception("Dataset upload error")
|
|
||||||
file_name: str = file.filename
|
|
||||||
full_file_name = os.path.join(self.__path, secure_filename(file_name))
|
|
||||||
file.save(full_file_name)
|
|
||||||
return file_name
|
|
||||||
|
|
||||||
def get_all_datasets(self) -> List[str]:
|
|
||||||
return [file.name for file in pathlib.Path(self.__path).glob("*.csv")]
|
|
||||||
|
|
||||||
def get_dataset_info(self, filename) -> List[Dict]:
|
|
||||||
dataset = self.__get_dataset(filename)
|
|
||||||
dataset_info = []
|
|
||||||
for column in dataset.columns:
|
|
||||||
items = dataset[column].astype(str)
|
|
||||||
column_info = {
|
|
||||||
"name": column,
|
|
||||||
"datatype": dataset.dtypes[column],
|
|
||||||
"items": items,
|
|
||||||
}
|
|
||||||
dataset_info.append(column_info)
|
|
||||||
return dataset_info
|
|
||||||
|
|
||||||
def get_column_info(self, filename, column) -> Dict:
|
|
||||||
dataset = self.__get_dataset(filename)
|
|
||||||
datatype = dataset.dtypes[column]
|
|
||||||
items = sorted(dataset[column].astype(str).unique())
|
|
||||||
return {"datatype": datatype, "items": items}
|
|
||||||
|
|
||||||
def get_hist(self, filename, column) -> BinaryIO:
|
|
||||||
dataset = self.__get_dataset(filename)
|
|
||||||
bytes = io.BytesIO()
|
|
||||||
plot: Figure | None = dataset.plot.hist(column=[column], bins=80).get_figure()
|
|
||||||
if plot is None:
|
|
||||||
raise Exception("Can't create hist plot")
|
|
||||||
plot.savefig(bytes, dpi=300, format="png")
|
|
||||||
return bytes
|
|
Binary file not shown.
Before Width: | Height: | Size: 22 KiB |
Binary file not shown.
Before Width: | Height: | Size: 74 KiB |
Binary file not shown.
Before Width: | Height: | Size: 129 KiB |
Binary file not shown.
Before Width: | Height: | Size: 38 KiB |
2936
mai/lab4.ipynb
Normal file
2936
mai/lab4.ipynb
Normal file
File diff suppressed because one or more lines are too long
@ -1,55 +0,0 @@
|
|||||||
## Окружение и примеры для выполнения лабораторных работ по дисциплине "Методы ИИ"
|
|
||||||
|
|
||||||
### Python
|
|
||||||
|
|
||||||
Используется Python версии 3.12
|
|
||||||
|
|
||||||
Установщик https://www.python.org/ftp/python/3.12.5/python-3.12.5-amd64.exe
|
|
||||||
|
|
||||||
### Poetry
|
|
||||||
|
|
||||||
Для создания и настройки окружения проекта необходимо установить poetry
|
|
||||||
|
|
||||||
**Для Windows (Powershell)**
|
|
||||||
|
|
||||||
```
|
|
||||||
(Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python -
|
|
||||||
```
|
|
||||||
|
|
||||||
**Linux, macOS, Windows (WSL)**
|
|
||||||
|
|
||||||
```
|
|
||||||
curl -sSL https://install.python-poetry.org | python3 -
|
|
||||||
```
|
|
||||||
|
|
||||||
**Добавление poetry в PATH**
|
|
||||||
|
|
||||||
1. Открыть настройки переменных среды \
|
|
||||||
\
|
|
||||||
<img src="docs/path1.png" width="300"> \
|
|
||||||
\
|
|
||||||
<img src="docs/path2.png" width="400"> \
|
|
||||||
2. Изменить переменную Path текущего пользователя \
|
|
||||||
\
|
|
||||||
<img src="docs/path3.png" width="500"> \
|
|
||||||
3. Добавление пути `%APPDATA%\Python\Scripts` до исполняемого файла poetry \
|
|
||||||
\
|
|
||||||
<img src="docs/path4.png" width="400">
|
|
||||||
|
|
||||||
### Создание окружения
|
|
||||||
|
|
||||||
```
|
|
||||||
poetry install
|
|
||||||
```
|
|
||||||
|
|
||||||
### Запуск тестового сервиса
|
|
||||||
|
|
||||||
Запустить тестовый сервис можно с помощью VSCode (см. launch.json в каталоге .vscode).
|
|
||||||
|
|
||||||
Также запустить тестовый сервис можно с помощью командной строки:
|
|
||||||
|
|
||||||
1. Активация виртуального окружения -- `poetry shell`
|
|
||||||
|
|
||||||
2. Запуск сервиса -- `python run.py`
|
|
||||||
|
|
||||||
Для выходы из виртуального окружения используется команду `exit`
|
|
16
mai/run.py
16
mai/run.py
@ -1,16 +0,0 @@
|
|||||||
from backend import create_app
|
|
||||||
|
|
||||||
app = create_app()
|
|
||||||
|
|
||||||
|
|
||||||
def __main():
|
|
||||||
app.run(
|
|
||||||
host="127.0.0.1",
|
|
||||||
port=8080,
|
|
||||||
debug=True,
|
|
||||||
use_reloader=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
__main()
|
|
79
mai/utils.py
Normal file
79
mai/utils.py
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
from typing import Tuple
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from pandas import DataFrame
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
|
||||||
|
def split_stratified_into_train_val_test(
|
||||||
|
df_input,
|
||||||
|
stratify_colname="y",
|
||||||
|
frac_train=0.6,
|
||||||
|
frac_val=0.15,
|
||||||
|
frac_test=0.25,
|
||||||
|
random_state=None,
|
||||||
|
) -> Tuple[DataFrame, DataFrame, DataFrame, DataFrame, DataFrame, DataFrame]:
|
||||||
|
"""
|
||||||
|
Splits a Pandas dataframe into three subsets (train, val, and test)
|
||||||
|
following fractional ratios provided by the user, where each subset is
|
||||||
|
stratified by the values in a specific column (that is, each subset has
|
||||||
|
the same relative frequency of the values in the column). It performs this
|
||||||
|
splitting by running train_test_split() twice.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
df_input : Pandas dataframe
|
||||||
|
Input dataframe to be split.
|
||||||
|
stratify_colname : str
|
||||||
|
The name of the column that will be used for stratification. Usually
|
||||||
|
this column would be for the label.
|
||||||
|
frac_train : float
|
||||||
|
frac_val : float
|
||||||
|
frac_test : float
|
||||||
|
The ratios with which the dataframe will be split into train, val, and
|
||||||
|
test data. The values should be expressed as float fractions and should
|
||||||
|
sum to 1.0.
|
||||||
|
random_state : int, None, or RandomStateInstance
|
||||||
|
Value to be passed to train_test_split().
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
df_train, df_val, df_test :
|
||||||
|
Dataframes containing the three splits.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if frac_train + frac_val + frac_test != 1.0:
|
||||||
|
raise ValueError(
|
||||||
|
"fractions %f, %f, %f do not add up to 1.0"
|
||||||
|
% (frac_train, frac_val, frac_test)
|
||||||
|
)
|
||||||
|
|
||||||
|
if stratify_colname not in df_input.columns:
|
||||||
|
raise ValueError("%s is not a column in the dataframe" % (stratify_colname))
|
||||||
|
|
||||||
|
X = df_input # Contains all columns.
|
||||||
|
y = df_input[
|
||||||
|
[stratify_colname]
|
||||||
|
] # Dataframe of just the column on which to stratify.
|
||||||
|
|
||||||
|
# Split original dataframe into train and temp dataframes.
|
||||||
|
df_train, df_temp, y_train, y_temp = train_test_split(
|
||||||
|
X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state
|
||||||
|
)
|
||||||
|
|
||||||
|
if frac_val <= 0:
|
||||||
|
assert len(df_input) == len(df_train) + len(df_temp)
|
||||||
|
return df_train, pd.DataFrame(), df_temp, y_train, pd.DataFrame(), y_temp
|
||||||
|
|
||||||
|
# Split the temp dataframe into val and test dataframes.
|
||||||
|
relative_frac_test = frac_test / (frac_val + frac_test)
|
||||||
|
df_val, df_test, y_val, y_test = train_test_split(
|
||||||
|
df_temp,
|
||||||
|
y_temp,
|
||||||
|
stratify=y_temp,
|
||||||
|
test_size=relative_frac_test,
|
||||||
|
random_state=random_state,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(df_input) == len(df_train) + len(df_val) + len(df_test)
|
||||||
|
return df_train, df_val, df_test, y_train, y_val, y_test
|
Loading…
Reference in New Issue
Block a user