Compare commits

...

4 Commits
main ... lab2

Author SHA1 Message Date
9fe3e445d0 . 2024-12-19 17:02:15 +04:00
9ee8efec42 lab2 2024-11-29 18:04:52 +04:00
d59680bbe0 lab2done 2024-11-23 07:56:39 +04:00
278e197032 lab1 2024-11-22 22:56:37 +04:00
22 changed files with 78981 additions and 0 deletions

2
.flake8 Normal file
View File

@ -0,0 +1,2 @@
[flake8]
max-line-length = 120

13
.vscode/extensions.json vendored Normal file
View File

@ -0,0 +1,13 @@
{
"recommendations": [
"ms-python.black-formatter",
"ms-python.flake8",
"ms-python.isort",
"ms-toolsai.jupyter",
"ms-toolsai.datawrangler",
"ms-python.python",
"donjayamanne.python-environment-manager",
// optional
"usernamehw.errorlens"
]
}

16
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,16 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "mai-service",
"type": "debugpy",
"request": "launch",
"program": "run.py",
"console": "integratedTerminal",
"justMyCode": true
}
]
}

38
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,38 @@
{
"files.autoSave": "onFocusChange",
"files.exclude": {
"**/__pycache__": true
},
"editor.detectIndentation": false,
"editor.formatOnType": false,
"editor.formatOnPaste": true,
"editor.formatOnSave": true,
"editor.tabSize": 4,
"editor.insertSpaces": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit",
"source.sortImports": "explicit"
},
"editor.stickyScroll.enabled": false,
"diffEditor.ignoreTrimWhitespace": false,
"debug.showVariableTypes": true,
"workbench.editor.highlightModifiedTabs": true,
"git.suggestSmartCommit": false,
"git.autofetch": true,
"git.openRepositoryInParentFolders": "always",
"git.confirmSync": false,
"errorLens.gutterIconsEnabled": true,
"errorLens.messageEnabled": false,
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
},
"python.languageServer": "Pylance",
"python.analysis.typeCheckingMode": "basic",
"python.analysis.autoImportCompletions": true,
"isort.args": [
"--profile",
"black"
],
"notebook.lineNumbers": "on",
"notebook.output.minimalErrorRendering": true,
}

View File

@ -0,0 +1,8 @@
{
"folders": [
{
"path": "."
}
],
"settings": {}
}

52
backend/__init__.py Normal file
View File

@ -0,0 +1,52 @@
import importlib
import os
import traceback
import matplotlib
from apiflask import APIBlueprint, APIFlask
from flask_cors import CORS
matplotlib.use("agg")
cors = CORS()
api_bp = APIBlueprint("api", __name__, url_prefix="/api/v1")
dataset_path: str | None = None
class Config:
SECRET_KEY = "secret!"
SEND_FILE_MAX_AGE_DEFAULT = -1
def create_app():
global dataset_path
# Create and configure app
app = APIFlask(
"MAI Service",
title="MAI Service API",
docs_path="/",
version="1.0",
static_folder="",
template_folder="",
)
app.config.from_object(Config)
dataset_path = os.path.join(app.instance_path, "dataset")
os.makedirs(dataset_path, exist_ok=True)
@app.errorhandler(Exception)
def my_error_processor(error):
traceback.print_exception(error)
return {"message": str(error), "detail": "No details"}, 500
# Import custom REST methods
importlib.import_module("backend.api")
# Enable REST API
app.register_blueprint(api_bp)
# Enable app extensions
cors.init_app(app)
return app

57
backend/api.py Normal file
View File

@ -0,0 +1,57 @@
from apiflask import FileSchema, Schema, fields
from flask import send_file
from backend import api_bp, dataset_path
from backend.service import Service
class FileUpload(Schema):
file = fields.File(required=True)
class ColumnInfoDto(Schema):
datatype = fields.String()
items = fields.List(fields.String())
class TableColumnDto(Schema):
name = fields.String()
datatype = fields.String()
items = fields.List(fields.String())
service = Service(dataset_path)
@api_bp.post("/dataset")
@api_bp.input(FileUpload, location="files")
def upload_dataset(files_data):
uploaded_file = files_data["file"]
return service.upload_dataset(uploaded_file)
@api_bp.get("/dataset")
def get_all_datasets():
return service.get_all_datasets()
@api_bp.get("/dataset/<string:name>")
@api_bp.output(TableColumnDto(many=True))
def get_dataset_info(name: str):
return service.get_dataset_info(name)
@api_bp.get("/dataset/<string:name>/<string:column>")
@api_bp.output(ColumnInfoDto)
def get_column_info(name: str, column: str):
return service.get_column_info(name, column)
@api_bp.get("/dataset/draw/hist/<string:name>/<string:column>")
@api_bp.output(
FileSchema(type="string", format="binary"), content_type="image/png", example=""
)
def get_dataset_hist(name: str, column: str):
data = service.get_hist(name, column)
data.seek(0)
return send_file(data, download_name=f"{name}.hist.png", mimetype="image/png")

59
backend/service.py Normal file
View File

@ -0,0 +1,59 @@
import io
import os
import pathlib
from typing import BinaryIO, Dict, List
import pandas as pd
from matplotlib.figure import Figure
from werkzeug.datastructures import FileStorage
from werkzeug.utils import secure_filename
class Service:
def __init__(self, dataset_path: str | None) -> None:
if dataset_path is None:
raise Exception("Dataset path is not defined")
self.__path: str = dataset_path
def __get_dataset(self, filename: str) -> pd.DataFrame:
full_file_name = os.path.join(self.__path, secure_filename(filename))
return pd.read_csv(full_file_name)
def upload_dataset(self, file: FileStorage) -> str:
if file.filename is None:
raise Exception("Dataset upload error")
file_name: str = file.filename
full_file_name = os.path.join(self.__path, secure_filename(file_name))
file.save(full_file_name)
return file_name
def get_all_datasets(self) -> List[str]:
return [file.name for file in pathlib.Path(self.__path).glob("*.csv")]
def get_dataset_info(self, filename) -> List[Dict]:
dataset = self.__get_dataset(filename)
dataset_info = []
for column in dataset.columns:
items = dataset[column].astype(str)
column_info = {
"name": column,
"datatype": dataset.dtypes[column],
"items": items,
}
dataset_info.append(column_info)
return dataset_info
def get_column_info(self, filename, column) -> Dict:
dataset = self.__get_dataset(filename)
datatype = dataset.dtypes[column]
items = sorted(dataset[column].astype(str).unique())
return {"datatype": datatype, "items": items}
def get_hist(self, filename, column) -> BinaryIO:
dataset = self.__get_dataset(filename)
bytes = io.BytesIO()
plot: Figure | None = dataset.plot.hist(column=[column], bins=80).get_figure()
if plot is None:
raise Exception("Can't create hist plot")
plot.savefig(bytes, dpi=300, format="png")
return bytes

53944
data/Diamonds-Prices.csv Normal file

File diff suppressed because it is too large Load Diff

2601
data/Forbes Billionaires.csv Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

5111
lab1.csv Normal file

File diff suppressed because it is too large Load Diff

653
lab1.ipynb Normal file

File diff suppressed because one or more lines are too long

2063
lab2.ipynb Normal file

File diff suppressed because one or more lines are too long

1113
lab3.ipynb Normal file

File diff suppressed because one or more lines are too long

3436
lab4.ipynb Normal file

File diff suppressed because one or more lines are too long

3288
poetry.lock generated Normal file

File diff suppressed because it is too large Load Diff

2
poetry.toml Normal file
View File

@ -0,0 +1,2 @@
[virtualenvs]
in-project = true

27
pyproject.toml Normal file
View File

@ -0,0 +1,27 @@
[tool.poetry]
name = "mai"
version = "1.0.0"
description = "MAI labs"
authors = ["Tikhonenkov Alexey <tikhonenkov2015@gmail.com>"]
readme = "readme.md"
package-mode = false
[tool.poetry.dependencies]
python = "^3.12"
jupyter = "^1.1.1"
numpy = "^2.1.0"
pandas = "^2.2.2"
matplotlib = "^3.9.2"
flask = "^3.0.3"
apiflask = "^2.2.0"
flask-cors = "^5.0.0"
scikit-learn = "^1.5.2"
imbalanced-learn = "^0.12.3"
featuretools = "^1.31.0"
ipykernel = "^6.29.5"
seaborn = "^0.13.2"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

0
readme copy.md Normal file
View File

16
run.py Normal file
View File

@ -0,0 +1,16 @@
from backend import create_app
app = create_app()
def __main():
app.run(
host="127.0.0.1",
port=8080,
debug=True,
use_reloader=False,
)
if __name__ == "__main__":
__main()