Compare commits

..

1 Commits
main ... lab1

Author SHA1 Message Date
278e197032 lab1 2024-11-22 22:56:37 +04:00
15 changed files with 14395 additions and 0 deletions

2
.flake8 Normal file
View File

@ -0,0 +1,2 @@
[flake8]
max-line-length = 120

13
.vscode/extensions.json vendored Normal file
View File

@ -0,0 +1,13 @@
{
"recommendations": [
"ms-python.black-formatter",
"ms-python.flake8",
"ms-python.isort",
"ms-toolsai.jupyter",
"ms-toolsai.datawrangler",
"ms-python.python",
"donjayamanne.python-environment-manager",
// optional
"usernamehw.errorlens"
]
}

16
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,16 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "mai-service",
"type": "debugpy",
"request": "launch",
"program": "run.py",
"console": "integratedTerminal",
"justMyCode": true
}
]
}

38
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,38 @@
{
"files.autoSave": "onFocusChange",
"files.exclude": {
"**/__pycache__": true
},
"editor.detectIndentation": false,
"editor.formatOnType": false,
"editor.formatOnPaste": true,
"editor.formatOnSave": true,
"editor.tabSize": 4,
"editor.insertSpaces": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit",
"source.sortImports": "explicit"
},
"editor.stickyScroll.enabled": false,
"diffEditor.ignoreTrimWhitespace": false,
"debug.showVariableTypes": true,
"workbench.editor.highlightModifiedTabs": true,
"git.suggestSmartCommit": false,
"git.autofetch": true,
"git.openRepositoryInParentFolders": "always",
"git.confirmSync": false,
"errorLens.gutterIconsEnabled": true,
"errorLens.messageEnabled": false,
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
},
"python.languageServer": "Pylance",
"python.analysis.typeCheckingMode": "basic",
"python.analysis.autoImportCompletions": true,
"isort.args": [
"--profile",
"black"
],
"notebook.lineNumbers": "on",
"notebook.output.minimalErrorRendering": true,
}

52
backend/__init__.py Normal file
View File

@ -0,0 +1,52 @@
import importlib
import os
import traceback
import matplotlib
from apiflask import APIBlueprint, APIFlask
from flask_cors import CORS
matplotlib.use("agg")
cors = CORS()
api_bp = APIBlueprint("api", __name__, url_prefix="/api/v1")
dataset_path: str | None = None
class Config:
SECRET_KEY = "secret!"
SEND_FILE_MAX_AGE_DEFAULT = -1
def create_app():
global dataset_path
# Create and configure app
app = APIFlask(
"MAI Service",
title="MAI Service API",
docs_path="/",
version="1.0",
static_folder="",
template_folder="",
)
app.config.from_object(Config)
dataset_path = os.path.join(app.instance_path, "dataset")
os.makedirs(dataset_path, exist_ok=True)
@app.errorhandler(Exception)
def my_error_processor(error):
traceback.print_exception(error)
return {"message": str(error), "detail": "No details"}, 500
# Import custom REST methods
importlib.import_module("backend.api")
# Enable REST API
app.register_blueprint(api_bp)
# Enable app extensions
cors.init_app(app)
return app

57
backend/api.py Normal file
View File

@ -0,0 +1,57 @@
from apiflask import FileSchema, Schema, fields
from flask import send_file
from backend import api_bp, dataset_path
from backend.service import Service
class FileUpload(Schema):
file = fields.File(required=True)
class ColumnInfoDto(Schema):
datatype = fields.String()
items = fields.List(fields.String())
class TableColumnDto(Schema):
name = fields.String()
datatype = fields.String()
items = fields.List(fields.String())
service = Service(dataset_path)
@api_bp.post("/dataset")
@api_bp.input(FileUpload, location="files")
def upload_dataset(files_data):
uploaded_file = files_data["file"]
return service.upload_dataset(uploaded_file)
@api_bp.get("/dataset")
def get_all_datasets():
return service.get_all_datasets()
@api_bp.get("/dataset/<string:name>")
@api_bp.output(TableColumnDto(many=True))
def get_dataset_info(name: str):
return service.get_dataset_info(name)
@api_bp.get("/dataset/<string:name>/<string:column>")
@api_bp.output(ColumnInfoDto)
def get_column_info(name: str, column: str):
return service.get_column_info(name, column)
@api_bp.get("/dataset/draw/hist/<string:name>/<string:column>")
@api_bp.output(
FileSchema(type="string", format="binary"), content_type="image/png", example=""
)
def get_dataset_hist(name: str, column: str):
data = service.get_hist(name, column)
data.seek(0)
return send_file(data, download_name=f"{name}.hist.png", mimetype="image/png")

59
backend/service.py Normal file
View File

@ -0,0 +1,59 @@
import io
import os
import pathlib
from typing import BinaryIO, Dict, List
import pandas as pd
from matplotlib.figure import Figure
from werkzeug.datastructures import FileStorage
from werkzeug.utils import secure_filename
class Service:
def __init__(self, dataset_path: str | None) -> None:
if dataset_path is None:
raise Exception("Dataset path is not defined")
self.__path: str = dataset_path
def __get_dataset(self, filename: str) -> pd.DataFrame:
full_file_name = os.path.join(self.__path, secure_filename(filename))
return pd.read_csv(full_file_name)
def upload_dataset(self, file: FileStorage) -> str:
if file.filename is None:
raise Exception("Dataset upload error")
file_name: str = file.filename
full_file_name = os.path.join(self.__path, secure_filename(file_name))
file.save(full_file_name)
return file_name
def get_all_datasets(self) -> List[str]:
return [file.name for file in pathlib.Path(self.__path).glob("*.csv")]
def get_dataset_info(self, filename) -> List[Dict]:
dataset = self.__get_dataset(filename)
dataset_info = []
for column in dataset.columns:
items = dataset[column].astype(str)
column_info = {
"name": column,
"datatype": dataset.dtypes[column],
"items": items,
}
dataset_info.append(column_info)
return dataset_info
def get_column_info(self, filename, column) -> Dict:
dataset = self.__get_dataset(filename)
datatype = dataset.dtypes[column]
items = sorted(dataset[column].astype(str).unique())
return {"datatype": datatype, "items": items}
def get_hist(self, filename, column) -> BinaryIO:
dataset = self.__get_dataset(filename)
bytes = io.BytesIO()
plot: Figure | None = dataset.plot.hist(column=[column], bins=80).get_figure()
if plot is None:
raise Exception("Can't create hist plot")
plot.savefig(bytes, dpi=300, format="png")
return bytes

File diff suppressed because it is too large Load Diff

5111
lab1.csv Normal file

File diff suppressed because it is too large Load Diff

626
lab1.ipynb Normal file

File diff suppressed because one or more lines are too long

3267
poetry.lock generated Normal file

File diff suppressed because it is too large Load Diff

2
poetry.toml Normal file
View File

@ -0,0 +1,2 @@
[virtualenvs]
in-project = true

25
pyproject.toml Normal file
View File

@ -0,0 +1,25 @@
[tool.poetry]
name = "mai"
version = "1.0.0"
description = "MAI labs"
authors = ["Tikhonenkov Alexey <tikhonenkov2015@gmail.com>"]
readme = "readme.md"
package-mode = false
[tool.poetry.dependencies]
python = "^3.12"
jupyter = "^1.1.1"
numpy = "^2.1.0"
pandas = "^2.2.2"
matplotlib = "^3.9.2"
flask = "^3.0.3"
apiflask = "^2.2.0"
flask-cors = "^5.0.0"
scikit-learn = "^1.5.2"
imbalanced-learn = "^0.12.3"
featuretools = "^1.31.0"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

0
readme copy.md Normal file
View File

16
run.py Normal file
View File

@ -0,0 +1,16 @@
from backend import create_app
app = create_app()
def __main():
app.run(
host="127.0.0.1",
port=8080,
debug=True,
use_reloader=False,
)
if __name__ == "__main__":
__main()