From 55f637a3b2f83bb91c89c12a3b8aa173567d820d Mon Sep 17 00:00:00 2001 From: Factorino73 Date: Wed, 8 Jan 2025 21:30:16 +0400 Subject: [PATCH] feat: add async to video and audio processing --- src/bot/handlers/video_command_handler.py | 10 ++--- src/bot/handlers/voice_command_handler.py | 8 ++-- src/utils/audio_processing.py | 50 ++++++++++++----------- src/utils/logging_configuration.py | 4 +- src/utils/video_processing.py | 36 ++++++++-------- 5 files changed, 56 insertions(+), 52 deletions(-) diff --git a/src/bot/handlers/video_command_handler.py b/src/bot/handlers/video_command_handler.py index dc71074..e7a3ecd 100644 --- a/src/bot/handlers/video_command_handler.py +++ b/src/bot/handlers/video_command_handler.py @@ -1,6 +1,6 @@ import os from logging import Logger -from tempfile import NamedTemporaryFile +import tempfile from pyrogram import filters from pyrogram.filters import Filter @@ -108,7 +108,7 @@ class VideoCommandHandler(AbstractCommandHandler): "Processing video message to extract text...", quote=True ) - with NamedTemporaryFile(delete=False) as temp_video_file: + with tempfile.NamedTemporaryFile(delete=False) as temp_video_file: video_file_path = await client.download_media( media.file_id, file_name=temp_video_file.name @@ -117,7 +117,7 @@ class VideoCommandHandler(AbstractCommandHandler): try: # Validate video duration - video_duration: float = video_processing.get_video_duration(video_file_path) # type: ignore + video_duration: float = await video_processing.get_video_duration(video_file_path) # type: ignore if video_duration > self.MAX_DURATION: self.logger.warning(f"{media_type} too long: {video_duration} seconds.") await processing_message.edit_text( @@ -129,8 +129,8 @@ class VideoCommandHandler(AbstractCommandHandler): # Extract audio and convert it to text output_dir = os.path.dirname(video_file_path) # type: ignore - audio_file_path: str = video_processing.extract_audio_from_video(video_file_path, output_dir) # type: ignore - extracted_text: str = audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore + audio_file_path: str = await video_processing.extract_audio_from_video(video_file_path, output_dir) # type: ignore + extracted_text: str = await audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore self.logger.info(f"{media_type} message successfully converted to text.") response_text: str = ( diff --git a/src/bot/handlers/voice_command_handler.py b/src/bot/handlers/voice_command_handler.py index 0fe00fa..c48f79c 100644 --- a/src/bot/handlers/voice_command_handler.py +++ b/src/bot/handlers/voice_command_handler.py @@ -1,6 +1,6 @@ import os from logging import Logger -from tempfile import NamedTemporaryFile +import tempfile from pyrogram import filters from pyrogram.filters import Filter @@ -101,7 +101,7 @@ class VoiceCommandHandler(AbstractCommandHandler): "Converting voice message to text...", quote=True ) - with NamedTemporaryFile(delete=False) as temp_audio_file: + with tempfile.NamedTemporaryFile(delete=False) as temp_audio_file: audio_file_path = await client.download_media( message.reply_to_message.voice.file_id, file_name=temp_audio_file.name @@ -110,7 +110,7 @@ class VoiceCommandHandler(AbstractCommandHandler): try: # Validate voice message duration - voice_duration: float = audio_processing.get_audio_duration(audio_file_path) # type: ignore + voice_duration: float = await audio_processing.get_audio_duration(audio_file_path) if voice_duration > self.MAX_DURATION: self.logger.warning(f"Voice message too long: {voice_duration} seconds.") await processing_message.edit_text( @@ -119,7 +119,7 @@ class VoiceCommandHandler(AbstractCommandHandler): return await client.send_chat_action(message.chat.id, ChatAction.TYPING) - extracted_text: str = audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore + extracted_text: str = await audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore self.logger.info("Voice message successfully converted to text.") response_text: str = ( diff --git a/src/utils/audio_processing.py b/src/utils/audio_processing.py index a26796d..a15ebdb 100644 --- a/src/utils/audio_processing.py +++ b/src/utils/audio_processing.py @@ -1,6 +1,7 @@ import os import logging from logging import Logger +import asyncio from pydub import AudioSegment import speech_recognition as sr @@ -10,19 +11,19 @@ from speech_recognition.audio import AudioData # Configure logging logger: Logger = logging.getLogger(__name__) -def convert_to_wav(file_path: str) -> str: +async def convert_to_wav(file_path: str) -> str: """ Converts an audio file to WAV format if it is not already in WAV format. Args: - file_path (str): The path to the audio file to be converted. + file_path (`str`): The path to the audio file to be converted. Returns: - str: The path to the converted or original WAV file. + `str`: The path to the converted or original WAV file. Raises: - FileNotFoundError: If the file does not exist. - RuntimeError: If the conversion fails for any reason. + `FileNotFoundError`: If the file does not exist. + `RuntimeError`: If the conversion fails for any reason. """ # Check if the file exists if not os.path.exists(file_path): @@ -35,28 +36,28 @@ def convert_to_wav(file_path: str) -> str: try: logger.info(f"Converting {file_path} to WAV format.") - audio = AudioSegment.from_file(file_path) + audio = await asyncio.to_thread(AudioSegment.from_file, file_path) wav_path: str = f"{os.path.splitext(file_path)[0]}.wav" - audio.export(wav_path, format="wav") + await asyncio.to_thread(audio.export, wav_path, format="wav") logger.info(f"File converted to {wav_path}.") return wav_path except Exception as e: logger.error(f"Failed to convert file to WAV: {e}") raise RuntimeError(f"Failed to convert file to WAV: {e}") -def get_audio_duration(file_path: str) -> float: +async def get_audio_duration(file_path: str) -> float: """ Retrieves the duration of an audio file in seconds. Args: - file_path (str): The path to the audio file. + file_path (`str`): The path to the audio file. Returns: - float: The duration of the audio file in seconds. + `float`: The duration of the audio file in seconds. Raises: - FileNotFoundError: If the file does not exist. - RuntimeError: If unable to get the file duration. + `FileNotFoundError`: If the file does not exist. + `RuntimeError`: If unable to get the file duration. """ # Check if the file exists if not os.path.exists(file_path): @@ -65,7 +66,7 @@ def get_audio_duration(file_path: str) -> float: try: logger.info(f"Getting duration of {file_path}.") - audio = AudioSegment.from_file(file_path) + audio = await asyncio.to_thread(AudioSegment.from_file, file_path) duration: float = len(audio) / 1000 # Duration in seconds logger.info(f"Duration of {file_path}: {duration} seconds.") return duration @@ -73,20 +74,20 @@ def get_audio_duration(file_path: str) -> float: logger.error(f"Failed to get file duration: {e}") raise RuntimeError(f"Failed to get file duration: {e}") -def convert_voice_to_text(file_path: str, language='ru') -> str: +async def convert_voice_to_text(file_path: str, language='ru') -> str: """ Converts speech from an audio file to text using OpenAI speech recognition service. Args: - file_path (str): The path to the audio file to be processed. - language (str): The language code for speech recognition (default is 'ru'). + file_path (`str`): The path to the audio file to be processed. + language (`str`): The language code for speech recognition (default is 'ru'). Returns: - str: The transcribed text if recognition is successful. + `str`: The transcribed text if recognition is successful. Raises: - FileNotFoundError: If the file does not exist. - RuntimeError: For any errors encountered during processing. + `FileNotFoundError`: If the file does not exist. + `RuntimeError`: For any errors encountered during processing. """ # Check if the file exists if not os.path.exists(file_path): @@ -95,7 +96,7 @@ def convert_voice_to_text(file_path: str, language='ru') -> str: # Convert the file to WAV format if necessary try: - wav_path: str = convert_to_wav(file_path) + wav_path: str = await convert_to_wav(file_path) except RuntimeError as e: logger.error(f"Error converting to WAV: {e}") raise RuntimeError(f"Error converting to WAV: {e}") @@ -103,10 +104,13 @@ def convert_voice_to_text(file_path: str, language='ru') -> str: recognizer = sr.Recognizer() try: - logger.info(f"Processing file {wav_path} ({get_audio_duration(wav_path)} sec) for speech recognition.") + duration: float = await get_audio_duration(wav_path) + logger.info(f"Processing file {wav_path} ({duration} sec) for speech recognition.") with sr.AudioFile(wav_path) as source: - audio_data: AudioData = recognizer.record(source) - text = recognizer.recognize_whisper(audio_data, language=language, model='medium') + audio_data: AudioData = await asyncio.to_thread(recognizer.record, source) + text = await asyncio.to_thread( + recognizer.recognize_whisper, audio_data, language=language, model='medium' + ) logger.info("Speech recognition successful.") return text # type: ignore except sr.UnknownValueError: diff --git a/src/utils/logging_configuration.py b/src/utils/logging_configuration.py index 1e2fd29..ed18666 100644 --- a/src/utils/logging_configuration.py +++ b/src/utils/logging_configuration.py @@ -12,8 +12,8 @@ def setup_logging(output_to_console=False) -> None: log files for a week. Args: - output_to_console (bool): If True, log messages will also be printed to the console. - Defaults to False. + output_to_console (`bool`): If True, log messages will also be printed to the console. + Defaults to False. """ # Define the default handlers to use. Always logs to a file. handlers: list[str] = ['file'] diff --git a/src/utils/video_processing.py b/src/utils/video_processing.py index e6de23d..520f469 100644 --- a/src/utils/video_processing.py +++ b/src/utils/video_processing.py @@ -1,6 +1,7 @@ import os import logging from logging import Logger +import asyncio from moviepy import VideoFileClip @@ -8,26 +9,26 @@ from moviepy import VideoFileClip # Configure logging logger: Logger = logging.getLogger(__name__) -def get_video_duration(video_path: str) -> float: +async def get_video_duration(video_path: str) -> float: """ Get the duration of a video file in seconds. Args: - video_path (str): The path to the video file. + video_path (`str`): The path to the video file. Returns: - float: The duration of the video in seconds. + `float`: The duration of the video in seconds. Raises: - FileNotFoundError: If the video file does not exist. - RuntimeError: If an error occurs during processing. + `FileNotFoundError`: If the video file does not exist. + `RuntimeError`: If an error occurs during processing. """ if not os.path.exists(video_path): logger.error(f"Video file {video_path} does not exist.") raise FileNotFoundError(f"Video file {video_path} does not exist.") try: - video_clip = VideoFileClip(video_path) + video_clip: VideoFileClip = await asyncio.to_thread(VideoFileClip, video_path) duration = video_clip.duration logger.info(f"Duration of video {video_path}: {duration} seconds.") return duration @@ -35,23 +36,22 @@ def get_video_duration(video_path: str) -> float: logger.error(f"Failed to get video duration: {e}", exc_info=True) raise RuntimeError(f"Failed to get video duration: {e}") finally: - video_clip.close() + await asyncio.to_thread(video_clip.close) - -def extract_audio_from_video(video_path: str, output_dir: str) -> str: +async def extract_audio_from_video(video_path: str, output_dir: str) -> str: """ Extracts the audio track from a video file and saves it as a WAV file. Args: - video_path (str): The path to the video file. - output_dir (str): The directory where the audio file will be saved. + video_path (`str`): The path to the video file. + output_dir (`str`): The directory where the audio file will be saved. Returns: - str: The path to the extracted audio file. + `str`: The path to the extracted audio file. Raises: - FileNotFoundError: If the video file does not exist. - RuntimeError: If an error occurs during audio extraction. + `FileNotFoundError`: If the video file does not exist. + `RuntimeError`: If an error occurs during audio extraction. """ if not os.path.exists(video_path): logger.error(f"Video file {video_path} does not exist.") @@ -59,13 +59,13 @@ def extract_audio_from_video(video_path: str, output_dir: str) -> str: try: logger.info(f"Extracting audio from video: {video_path}") - video_clip = VideoFileClip(video_path) - audio_path = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(video_path))[0]}.wav") - video_clip.audio.write_audiofile(audio_path) # type: ignore + video_clip: VideoFileClip = await asyncio.to_thread(VideoFileClip, video_path) + audio_path: str = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(video_path))[0]}.wav") + await asyncio.to_thread(video_clip.audio.write_audiofile, audio_path) # type: ignore logger.info(f"Audio extracted and saved to: {audio_path}") return audio_path except Exception as e: logger.error(f"Failed to extract audio from video: {e}", exc_info=True) raise RuntimeError(f"Failed to extract audio from video: {e}") finally: - video_clip.close() + await asyncio.to_thread(video_clip.close)