done
This commit is contained in:
parent
790b607e5e
commit
4960643e7c
198
lab1.ipynb
198
lab1.ipynb
File diff suppressed because one or more lines are too long
@ -1,2 +0,0 @@
|
||||
[flake8]
|
||||
max-line-length = 120
|
13
mai/.vscode/extensions.json
vendored
13
mai/.vscode/extensions.json
vendored
@ -1,13 +0,0 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"ms-python.black-formatter",
|
||||
"ms-python.flake8",
|
||||
"ms-python.isort",
|
||||
"ms-toolsai.jupyter",
|
||||
"ms-toolsai.datawrangler",
|
||||
"ms-python.python",
|
||||
"donjayamanne.python-environment-manager",
|
||||
// optional
|
||||
"usernamehw.errorlens"
|
||||
]
|
||||
}
|
16
mai/.vscode/launch.json
vendored
16
mai/.vscode/launch.json
vendored
@ -1,16 +0,0 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "mai-service",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "run.py",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": true
|
||||
}
|
||||
]
|
||||
}
|
38
mai/.vscode/settings.json
vendored
38
mai/.vscode/settings.json
vendored
@ -1,38 +0,0 @@
|
||||
{
|
||||
"files.autoSave": "onFocusChange",
|
||||
"files.exclude": {
|
||||
"**/__pycache__": true
|
||||
},
|
||||
"editor.detectIndentation": false,
|
||||
"editor.formatOnType": false,
|
||||
"editor.formatOnPaste": true,
|
||||
"editor.formatOnSave": true,
|
||||
"editor.tabSize": 4,
|
||||
"editor.insertSpaces": true,
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.organizeImports": "explicit",
|
||||
"source.sortImports": "explicit"
|
||||
},
|
||||
"editor.stickyScroll.enabled": false,
|
||||
"diffEditor.ignoreTrimWhitespace": false,
|
||||
"debug.showVariableTypes": true,
|
||||
"workbench.editor.highlightModifiedTabs": true,
|
||||
"git.suggestSmartCommit": false,
|
||||
"git.autofetch": true,
|
||||
"git.openRepositoryInParentFolders": "always",
|
||||
"git.confirmSync": false,
|
||||
"errorLens.gutterIconsEnabled": true,
|
||||
"errorLens.messageEnabled": false,
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": "ms-python.black-formatter",
|
||||
},
|
||||
"python.languageServer": "Pylance",
|
||||
"python.analysis.typeCheckingMode": "basic",
|
||||
"python.analysis.autoImportCompletions": true,
|
||||
"isort.args": [
|
||||
"--profile",
|
||||
"black"
|
||||
],
|
||||
"notebook.lineNumbers": "on",
|
||||
"notebook.output.minimalErrorRendering": true,
|
||||
}
|
Binary file not shown.
Before Width: | Height: | Size: 111 KiB |
@ -1,52 +0,0 @@
|
||||
import importlib
|
||||
import os
|
||||
import traceback
|
||||
|
||||
import matplotlib
|
||||
from apiflask import APIBlueprint, APIFlask
|
||||
from flask_cors import CORS
|
||||
|
||||
matplotlib.use("agg")
|
||||
|
||||
cors = CORS()
|
||||
api_bp = APIBlueprint("api", __name__, url_prefix="/api/v1")
|
||||
dataset_path: str | None = None
|
||||
|
||||
|
||||
class Config:
|
||||
SECRET_KEY = "secret!"
|
||||
SEND_FILE_MAX_AGE_DEFAULT = -1
|
||||
|
||||
|
||||
def create_app():
|
||||
global dataset_path
|
||||
|
||||
# Create and configure app
|
||||
app = APIFlask(
|
||||
"MAI Service",
|
||||
title="MAI Service API",
|
||||
docs_path="/",
|
||||
version="1.0",
|
||||
static_folder="",
|
||||
template_folder="",
|
||||
)
|
||||
app.config.from_object(Config)
|
||||
|
||||
dataset_path = os.path.join(app.instance_path, "dataset")
|
||||
os.makedirs(dataset_path, exist_ok=True)
|
||||
|
||||
@app.errorhandler(Exception)
|
||||
def my_error_processor(error):
|
||||
traceback.print_exception(error)
|
||||
return {"message": str(error), "detail": "No details"}, 500
|
||||
|
||||
# Import custom REST methods
|
||||
importlib.import_module("backend.api")
|
||||
|
||||
# Enable REST API
|
||||
app.register_blueprint(api_bp)
|
||||
|
||||
# Enable app extensions
|
||||
cors.init_app(app)
|
||||
|
||||
return app
|
@ -1,57 +0,0 @@
|
||||
from apiflask import FileSchema, Schema, fields
|
||||
from flask import send_file
|
||||
|
||||
from backend import api_bp, dataset_path
|
||||
from backend.service import Service
|
||||
|
||||
|
||||
class FileUpload(Schema):
|
||||
file = fields.File(required=True)
|
||||
|
||||
|
||||
class ColumnInfoDto(Schema):
|
||||
datatype = fields.String()
|
||||
items = fields.List(fields.String())
|
||||
|
||||
|
||||
class TableColumnDto(Schema):
|
||||
name = fields.String()
|
||||
datatype = fields.String()
|
||||
items = fields.List(fields.String())
|
||||
|
||||
|
||||
service = Service(dataset_path)
|
||||
|
||||
|
||||
@api_bp.post("/dataset")
|
||||
@api_bp.input(FileUpload, location="files")
|
||||
def upload_dataset(files_data):
|
||||
uploaded_file = files_data["file"]
|
||||
return service.upload_dataset(uploaded_file)
|
||||
|
||||
|
||||
@api_bp.get("/dataset")
|
||||
def get_all_datasets():
|
||||
return service.get_all_datasets()
|
||||
|
||||
|
||||
@api_bp.get("/dataset/<string:name>")
|
||||
@api_bp.output(TableColumnDto(many=True))
|
||||
def get_dataset_info(name: str):
|
||||
return service.get_dataset_info(name)
|
||||
|
||||
|
||||
@api_bp.get("/dataset/<string:name>/<string:column>")
|
||||
@api_bp.output(ColumnInfoDto)
|
||||
def get_column_info(name: str, column: str):
|
||||
return service.get_column_info(name, column)
|
||||
|
||||
|
||||
@api_bp.get("/dataset/draw/hist/<string:name>/<string:column>")
|
||||
@api_bp.output(
|
||||
FileSchema(type="string", format="binary"), content_type="image/png", example=""
|
||||
)
|
||||
def get_dataset_hist(name: str, column: str):
|
||||
data = service.get_hist(name, column)
|
||||
data.seek(0)
|
||||
return send_file(data, download_name=f"{name}.hist.png", mimetype="image/png")
|
@ -1,59 +0,0 @@
|
||||
import io
|
||||
import os
|
||||
import pathlib
|
||||
from typing import BinaryIO, Dict, List
|
||||
|
||||
import pandas as pd
|
||||
from matplotlib.figure import Figure
|
||||
from werkzeug.datastructures import FileStorage
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
|
||||
class Service:
|
||||
def __init__(self, dataset_path: str | None) -> None:
|
||||
if dataset_path is None:
|
||||
raise Exception("Dataset path is not defined")
|
||||
self.__path: str = dataset_path
|
||||
|
||||
def __get_dataset(self, filename: str) -> pd.DataFrame:
|
||||
full_file_name = os.path.join(self.__path, secure_filename(filename))
|
||||
return pd.read_csv(full_file_name)
|
||||
|
||||
def upload_dataset(self, file: FileStorage) -> str:
|
||||
if file.filename is None:
|
||||
raise Exception("Dataset upload error")
|
||||
file_name: str = file.filename
|
||||
full_file_name = os.path.join(self.__path, secure_filename(file_name))
|
||||
file.save(full_file_name)
|
||||
return file_name
|
||||
|
||||
def get_all_datasets(self) -> List[str]:
|
||||
return [file.name for file in pathlib.Path(self.__path).glob("*.csv")]
|
||||
|
||||
def get_dataset_info(self, filename) -> List[Dict]:
|
||||
dataset = self.__get_dataset(filename)
|
||||
dataset_info = []
|
||||
for column in dataset.columns:
|
||||
items = dataset[column].astype(str)
|
||||
column_info = {
|
||||
"name": column,
|
||||
"datatype": dataset.dtypes[column],
|
||||
"items": items,
|
||||
}
|
||||
dataset_info.append(column_info)
|
||||
return dataset_info
|
||||
|
||||
def get_column_info(self, filename, column) -> Dict:
|
||||
dataset = self.__get_dataset(filename)
|
||||
datatype = dataset.dtypes[column]
|
||||
items = sorted(dataset[column].astype(str).unique())
|
||||
return {"datatype": datatype, "items": items}
|
||||
|
||||
def get_hist(self, filename, column) -> BinaryIO:
|
||||
dataset = self.__get_dataset(filename)
|
||||
bytes = io.BytesIO()
|
||||
plot: Figure | None = dataset.plot.hist(column=[column], bins=80).get_figure()
|
||||
if plot is None:
|
||||
raise Exception("Can't create hist plot")
|
||||
plot.savefig(bytes, dpi=300, format="png")
|
||||
return bytes
|
Binary file not shown.
Before Width: | Height: | Size: 22 KiB |
Binary file not shown.
Before Width: | Height: | Size: 74 KiB |
Binary file not shown.
Before Width: | Height: | Size: 129 KiB |
Binary file not shown.
Before Width: | Height: | Size: 38 KiB |
2936
mai/lab4.ipynb
Normal file
2936
mai/lab4.ipynb
Normal file
File diff suppressed because one or more lines are too long
@ -1,55 +0,0 @@
|
||||
## Окружение и примеры для выполнения лабораторных работ по дисциплине "Методы ИИ"
|
||||
|
||||
### Python
|
||||
|
||||
Используется Python версии 3.12
|
||||
|
||||
Установщик https://www.python.org/ftp/python/3.12.5/python-3.12.5-amd64.exe
|
||||
|
||||
### Poetry
|
||||
|
||||
Для создания и настройки окружения проекта необходимо установить poetry
|
||||
|
||||
**Для Windows (Powershell)**
|
||||
|
||||
```
|
||||
(Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python -
|
||||
```
|
||||
|
||||
**Linux, macOS, Windows (WSL)**
|
||||
|
||||
```
|
||||
curl -sSL https://install.python-poetry.org | python3 -
|
||||
```
|
||||
|
||||
**Добавление poetry в PATH**
|
||||
|
||||
1. Открыть настройки переменных среды \
|
||||
\
|
||||
<img src="docs/path1.png" width="300"> \
|
||||
\
|
||||
<img src="docs/path2.png" width="400"> \
|
||||
2. Изменить переменную Path текущего пользователя \
|
||||
\
|
||||
<img src="docs/path3.png" width="500"> \
|
||||
3. Добавление пути `%APPDATA%\Python\Scripts` до исполняемого файла poetry \
|
||||
\
|
||||
<img src="docs/path4.png" width="400">
|
||||
|
||||
### Создание окружения
|
||||
|
||||
```
|
||||
poetry install
|
||||
```
|
||||
|
||||
### Запуск тестового сервиса
|
||||
|
||||
Запустить тестовый сервис можно с помощью VSCode (см. launch.json в каталоге .vscode).
|
||||
|
||||
Также запустить тестовый сервис можно с помощью командной строки:
|
||||
|
||||
1. Активация виртуального окружения -- `poetry shell`
|
||||
|
||||
2. Запуск сервиса -- `python run.py`
|
||||
|
||||
Для выходы из виртуального окружения используется команду `exit`
|
16
mai/run.py
16
mai/run.py
@ -1,16 +0,0 @@
|
||||
from backend import create_app
|
||||
|
||||
app = create_app()
|
||||
|
||||
|
||||
def __main():
|
||||
app.run(
|
||||
host="127.0.0.1",
|
||||
port=8080,
|
||||
debug=True,
|
||||
use_reloader=False,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
__main()
|
79
mai/utils.py
Normal file
79
mai/utils.py
Normal file
@ -0,0 +1,79 @@
|
||||
from typing import Tuple
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
|
||||
def split_stratified_into_train_val_test(
|
||||
df_input,
|
||||
stratify_colname="y",
|
||||
frac_train=0.6,
|
||||
frac_val=0.15,
|
||||
frac_test=0.25,
|
||||
random_state=None,
|
||||
) -> Tuple[DataFrame, DataFrame, DataFrame, DataFrame, DataFrame, DataFrame]:
|
||||
"""
|
||||
Splits a Pandas dataframe into three subsets (train, val, and test)
|
||||
following fractional ratios provided by the user, where each subset is
|
||||
stratified by the values in a specific column (that is, each subset has
|
||||
the same relative frequency of the values in the column). It performs this
|
||||
splitting by running train_test_split() twice.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df_input : Pandas dataframe
|
||||
Input dataframe to be split.
|
||||
stratify_colname : str
|
||||
The name of the column that will be used for stratification. Usually
|
||||
this column would be for the label.
|
||||
frac_train : float
|
||||
frac_val : float
|
||||
frac_test : float
|
||||
The ratios with which the dataframe will be split into train, val, and
|
||||
test data. The values should be expressed as float fractions and should
|
||||
sum to 1.0.
|
||||
random_state : int, None, or RandomStateInstance
|
||||
Value to be passed to train_test_split().
|
||||
|
||||
Returns
|
||||
-------
|
||||
df_train, df_val, df_test :
|
||||
Dataframes containing the three splits.
|
||||
"""
|
||||
|
||||
if frac_train + frac_val + frac_test != 1.0:
|
||||
raise ValueError(
|
||||
"fractions %f, %f, %f do not add up to 1.0"
|
||||
% (frac_train, frac_val, frac_test)
|
||||
)
|
||||
|
||||
if stratify_colname not in df_input.columns:
|
||||
raise ValueError("%s is not a column in the dataframe" % (stratify_colname))
|
||||
|
||||
X = df_input # Contains all columns.
|
||||
y = df_input[
|
||||
[stratify_colname]
|
||||
] # Dataframe of just the column on which to stratify.
|
||||
|
||||
# Split original dataframe into train and temp dataframes.
|
||||
df_train, df_temp, y_train, y_temp = train_test_split(
|
||||
X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state
|
||||
)
|
||||
|
||||
if frac_val <= 0:
|
||||
assert len(df_input) == len(df_train) + len(df_temp)
|
||||
return df_train, pd.DataFrame(), df_temp, y_train, pd.DataFrame(), y_temp
|
||||
|
||||
# Split the temp dataframe into val and test dataframes.
|
||||
relative_frac_test = frac_test / (frac_val + frac_test)
|
||||
df_val, df_test, y_val, y_test = train_test_split(
|
||||
df_temp,
|
||||
y_temp,
|
||||
stratify=y_temp,
|
||||
test_size=relative_frac_test,
|
||||
random_state=random_state,
|
||||
)
|
||||
|
||||
assert len(df_input) == len(df_train) + len(df_val) + len(df_test)
|
||||
return df_train, df_val, df_test, y_train, y_val, y_test
|
Loading…
Reference in New Issue
Block a user