PIbd-42_SSPR/db/csv_to_db.py

61 lines
2.3 KiB
Python

import asyncio
import pandas as pd
from sqlalchemy import insert, text
from sqlalchemy.exc import OperationalError, IntegrityError
from db.clickhouse_db_connection import BaseClickhouse, engine_clickhouse, session_clickhouse
from db.postgres_db_connection import async_session_postgres, engine_postgres
from db.models.base import Base
from db.models.ch_experimentdb_experiment_data_model import ChExperimentDBExperimentData
from db.models.experiment_data_model import ExperimentData
from db.models.experiment_parameters_model import ExperimentParameters
from db.models.load_parameters_model import LoadParameters
from db.models.recycling_parameters_model import RecyclingParameters
async def create_all_tables():
async with engine_postgres.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
BaseClickhouse.metadata.create_all(engine_clickhouse)
async def load_data_to_postgres(file: str, model_class, chunk_size=100):
df = pd.read_csv(file).dropna()
async with async_session_postgres() as session:
for start in range(0, len(df), chunk_size):
end = start + chunk_size
chunk = df.iloc[start:end].to_dict(orient='records')
stmt = insert(model_class).values(chunk)
try:
await session.execute(stmt)
await session.commit()
except IntegrityError as e:
await session.rollback()
def load_data_to_clickhouse(file: str, model_class, chunk_size=100):
df = pd.read_csv(file).dropna()
with session_clickhouse as session:
for start in range(0, len(df), chunk_size):
end = start + chunk_size
chunk = df.iloc[start:end].to_dict(orient='records')
stmt = insert(model_class).values(chunk)
session.execute(stmt)
session.commit()
async def csv_to_db():
await create_all_tables()
await load_data_to_postgres('./db/files/load_parameters.csv', LoadParameters)
await load_data_to_postgres('./db/files/recycling_parameters.csv', RecyclingParameters)
await load_data_to_postgres('./db/files/experiment_parameters.csv', ExperimentParameters)
await load_data_to_postgres('./db/files/experiment_data.csv', ExperimentData)
load_data_to_clickhouse('./db/files/ch_experimentdb_experiment_data.csv', ChExperimentDBExperimentData)
print('csv_to_db выполнено')