feat: add async to video and audio processing

2025-01-08 21:30:16 +04:00 · 2025-01-08 21:30:16 +04:00 · 55f637a3b2
commit 55f637a3b2
parent 942abb2375
5 changed files with 56 additions and 52 deletions
--- a/src/bot/handlers/video_command_handler.py
+++ b/src/bot/handlers/video_command_handler.py
@ -1,6 +1,6 @@
 import os
 from logging import Logger
-from tempfile import NamedTemporaryFile
+import tempfile

 from pyrogram import filters
 from pyrogram.filters import Filter
@ -108,7 +108,7 @@ class VideoCommandHandler(AbstractCommandHandler):
            "Processing video message to extract text...", quote=True
        )

-        with NamedTemporaryFile(delete=False) as temp_video_file:
+        with tempfile.NamedTemporaryFile(delete=False) as temp_video_file:
            video_file_path = await client.download_media(
                media.file_id, 
                file_name=temp_video_file.name
@ -117,7 +117,7 @@ class VideoCommandHandler(AbstractCommandHandler):

            try:
                # Validate video duration
-                video_duration: float = video_processing.get_video_duration(video_file_path)  # type: ignore
+                video_duration: float = await video_processing.get_video_duration(video_file_path)  # type: ignore
                if video_duration > self.MAX_DURATION:
                    self.logger.warning(f"{media_type} too long: {video_duration} seconds.")
                    await processing_message.edit_text(
@ -129,8 +129,8 @@ class VideoCommandHandler(AbstractCommandHandler):
                
                # Extract audio and convert it to text
                output_dir = os.path.dirname(video_file_path)  # type: ignore
-                audio_file_path: str = video_processing.extract_audio_from_video(video_file_path, output_dir)  # type: ignore
-                extracted_text: str = audio_processing.convert_voice_to_text(audio_file_path, language=language_code)  # type: ignore
+                audio_file_path: str = await video_processing.extract_audio_from_video(video_file_path, output_dir)  # type: ignore
+                extracted_text: str = await audio_processing.convert_voice_to_text(audio_file_path, language=language_code)  # type: ignore
                self.logger.info(f"{media_type} message successfully converted to text.")

                response_text: str = (
--- a/src/bot/handlers/voice_command_handler.py
+++ b/src/bot/handlers/voice_command_handler.py
@ -1,6 +1,6 @@
 import os
 from logging import Logger
-from tempfile import NamedTemporaryFile
+import tempfile

 from pyrogram import filters
 from pyrogram.filters import Filter
@ -101,7 +101,7 @@ class VoiceCommandHandler(AbstractCommandHandler):
            "Converting voice message to text...", quote=True
        )

-        with NamedTemporaryFile(delete=False) as temp_audio_file:
+        with tempfile.NamedTemporaryFile(delete=False) as temp_audio_file:
            audio_file_path = await client.download_media(
                message.reply_to_message.voice.file_id, 
                file_name=temp_audio_file.name
@ -110,7 +110,7 @@ class VoiceCommandHandler(AbstractCommandHandler):

            try:
                # Validate voice message duration
-                voice_duration: float = audio_processing.get_audio_duration(audio_file_path)  # type: ignore
+                voice_duration: float = await audio_processing.get_audio_duration(audio_file_path)
                if voice_duration > self.MAX_DURATION:
                    self.logger.warning(f"Voice message too long: {voice_duration} seconds.")
                    await processing_message.edit_text(
@ -119,7 +119,7 @@ class VoiceCommandHandler(AbstractCommandHandler):
                    return

                await client.send_chat_action(message.chat.id, ChatAction.TYPING)
-                extracted_text: str = audio_processing.convert_voice_to_text(audio_file_path, language=language_code)  # type: ignore
+                extracted_text: str = await audio_processing.convert_voice_to_text(audio_file_path, language=language_code)  # type: ignore
                self.logger.info("Voice message successfully converted to text.")

                response_text: str = (
--- a/src/utils/audio_processing.py
+++ b/src/utils/audio_processing.py
@ -1,6 +1,7 @@
 import os
 import logging
 from logging import Logger
+import asyncio

 from pydub import AudioSegment
 import speech_recognition as sr
@ -10,19 +11,19 @@ from speech_recognition.audio import AudioData
 # Configure logging
 logger: Logger = logging.getLogger(__name__)

-def convert_to_wav(file_path: str) -> str:
+async def convert_to_wav(file_path: str) -> str:
    """
    Converts an audio file to WAV format if it is not already in WAV format.

    Args:
-        file_path (str): The path to the audio file to be converted.
+        file_path (`str`): The path to the audio file to be converted.

    Returns:
-        str: The path to the converted or original WAV file.
+        `str`: The path to the converted or original WAV file.

    Raises:
-        FileNotFoundError: If the file does not exist.
-        RuntimeError: If the conversion fails for any reason.
+        `FileNotFoundError`: If the file does not exist.
+        `RuntimeError`: If the conversion fails for any reason.
    """
    # Check if the file exists
    if not os.path.exists(file_path):
@ -35,28 +36,28 @@ def convert_to_wav(file_path: str) -> str:

    try:
        logger.info(f"Converting {file_path} to WAV format.")
-        audio = AudioSegment.from_file(file_path)
+        audio = await asyncio.to_thread(AudioSegment.from_file, file_path)
        wav_path: str = f"{os.path.splitext(file_path)[0]}.wav"
-        audio.export(wav_path, format="wav")
+        await asyncio.to_thread(audio.export, wav_path, format="wav")
        logger.info(f"File converted to {wav_path}.")
        return wav_path
    except Exception as e:
        logger.error(f"Failed to convert file to WAV: {e}")
        raise RuntimeError(f"Failed to convert file to WAV: {e}")

-def get_audio_duration(file_path: str) -> float:
+async def get_audio_duration(file_path: str) -> float:
    """
    Retrieves the duration of an audio file in seconds.

    Args:
-        file_path (str): The path to the audio file.
+        file_path (`str`): The path to the audio file.

    Returns:
-        float: The duration of the audio file in seconds.
+        `float`: The duration of the audio file in seconds.

    Raises:
-        FileNotFoundError: If the file does not exist.
-        RuntimeError: If unable to get the file duration.
+        `FileNotFoundError`: If the file does not exist.
+        `RuntimeError`: If unable to get the file duration.
    """
    # Check if the file exists
    if not os.path.exists(file_path):
@ -65,7 +66,7 @@ def get_audio_duration(file_path: str) -> float:

    try:
        logger.info(f"Getting duration of {file_path}.")
-        audio = AudioSegment.from_file(file_path)
+        audio = await asyncio.to_thread(AudioSegment.from_file, file_path)
        duration: float = len(audio) / 1000  # Duration in seconds
        logger.info(f"Duration of {file_path}: {duration} seconds.")
        return duration
@ -73,20 +74,20 @@ def get_audio_duration(file_path: str) -> float:
        logger.error(f"Failed to get file duration: {e}")
        raise RuntimeError(f"Failed to get file duration: {e}")

-def convert_voice_to_text(file_path: str, language='ru') -> str:
+async def convert_voice_to_text(file_path: str, language='ru') -> str:
    """
    Converts speech from an audio file to text using OpenAI speech recognition service.

    Args:
-        file_path (str): The path to the audio file to be processed.
-        language (str): The language code for speech recognition (default is 'ru').
+        file_path (`str`): The path to the audio file to be processed.
+        language (`str`): The language code for speech recognition (default is 'ru').

    Returns:
-        str: The transcribed text if recognition is successful.
+        `str`: The transcribed text if recognition is successful.

    Raises:
-        FileNotFoundError: If the file does not exist.
-        RuntimeError: For any errors encountered during processing.
+        `FileNotFoundError`: If the file does not exist.
+        `RuntimeError`: For any errors encountered during processing.
    """
    # Check if the file exists
    if not os.path.exists(file_path):
@ -95,7 +96,7 @@ def convert_voice_to_text(file_path: str, language='ru') -> str:

    # Convert the file to WAV format if necessary
    try:
-        wav_path: str = convert_to_wav(file_path)
+        wav_path: str = await convert_to_wav(file_path)
    except RuntimeError as e:
        logger.error(f"Error converting to WAV: {e}")
        raise RuntimeError(f"Error converting to WAV: {e}")
@ -103,10 +104,13 @@ def convert_voice_to_text(file_path: str, language='ru') -> str:
    recognizer = sr.Recognizer()

    try:
-        logger.info(f"Processing file {wav_path} ({get_audio_duration(wav_path)} sec) for speech recognition.")
+        duration: float = await get_audio_duration(wav_path)
+        logger.info(f"Processing file {wav_path} ({duration} sec) for speech recognition.")
        with sr.AudioFile(wav_path) as source:
-            audio_data: AudioData = recognizer.record(source)
-            text = recognizer.recognize_whisper(audio_data, language=language, model='medium')
+            audio_data: AudioData = await asyncio.to_thread(recognizer.record, source)
+            text = await asyncio.to_thread(
+                recognizer.recognize_whisper, audio_data, language=language, model='medium'
+            )
            logger.info("Speech recognition successful.")
            return text  # type: ignore
    except sr.UnknownValueError:
--- a/src/utils/logging_configuration.py
+++ b/src/utils/logging_configuration.py
@ -12,7 +12,7 @@ def setup_logging(output_to_console=False) -> None:
    log files for a week.

    Args:
-        output_to_console (bool): If True, log messages will also be printed to the console. 
+        output_to_console (`bool`): If True, log messages will also be printed to the console. 
                                    Defaults to False.
    """
    # Define the default handlers to use. Always logs to a file.
--- a/src/utils/video_processing.py
+++ b/src/utils/video_processing.py
@ -1,6 +1,7 @@
 import os
 import logging
 from logging import Logger
+import asyncio

 from moviepy import VideoFileClip

@ -8,26 +9,26 @@ from moviepy import VideoFileClip
 # Configure logging
 logger: Logger = logging.getLogger(__name__)

-def get_video_duration(video_path: str) -> float:
+async def get_video_duration(video_path: str) -> float:
    """
    Get the duration of a video file in seconds.

    Args:
-        video_path (str): The path to the video file.
+        video_path (`str`): The path to the video file.

    Returns:
-        float: The duration of the video in seconds.
+        `float`: The duration of the video in seconds.

    Raises:
-        FileNotFoundError: If the video file does not exist.
-        RuntimeError: If an error occurs during processing.
+        `FileNotFoundError`: If the video file does not exist.
+        `RuntimeError`: If an error occurs during processing.
    """
    if not os.path.exists(video_path):
        logger.error(f"Video file {video_path} does not exist.")
        raise FileNotFoundError(f"Video file {video_path} does not exist.")

    try:
-        video_clip = VideoFileClip(video_path)
+        video_clip: VideoFileClip = await asyncio.to_thread(VideoFileClip, video_path)
        duration = video_clip.duration
        logger.info(f"Duration of video {video_path}: {duration} seconds.")
        return duration
@ -35,23 +36,22 @@ def get_video_duration(video_path: str) -> float:
        logger.error(f"Failed to get video duration: {e}", exc_info=True)
        raise RuntimeError(f"Failed to get video duration: {e}")
    finally:
-        video_clip.close()
+        await asyncio.to_thread(video_clip.close)

-
-def extract_audio_from_video(video_path: str, output_dir: str) -> str:
+async def extract_audio_from_video(video_path: str, output_dir: str) -> str:
    """
    Extracts the audio track from a video file and saves it as a WAV file.

    Args:
-        video_path (str): The path to the video file.
-        output_dir (str): The directory where the audio file will be saved.
+        video_path (`str`): The path to the video file.
+        output_dir (`str`): The directory where the audio file will be saved.

    Returns:
-        str: The path to the extracted audio file.
+        `str`: The path to the extracted audio file.

    Raises:
-        FileNotFoundError: If the video file does not exist.
-        RuntimeError: If an error occurs during audio extraction.
+        `FileNotFoundError`: If the video file does not exist.
+        `RuntimeError`: If an error occurs during audio extraction.
    """
    if not os.path.exists(video_path):
        logger.error(f"Video file {video_path} does not exist.")
@ -59,13 +59,13 @@ def extract_audio_from_video(video_path: str, output_dir: str) -> str:

    try:
        logger.info(f"Extracting audio from video: {video_path}")
-        video_clip = VideoFileClip(video_path)
-        audio_path = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(video_path))[0]}.wav")
-        video_clip.audio.write_audiofile(audio_path)  # type: ignore
+        video_clip: VideoFileClip = await asyncio.to_thread(VideoFileClip, video_path)
+        audio_path: str = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(video_path))[0]}.wav")
+        await asyncio.to_thread(video_clip.audio.write_audiofile, audio_path)  # type: ignore
        logger.info(f"Audio extracted and saved to: {audio_path}")
        return audio_path
    except Exception as e:
        logger.error(f"Failed to extract audio from video: {e}", exc_info=True)
        raise RuntimeError(f"Failed to extract audio from video: {e}")
    finally:
-        video_clip.close()
+        await asyncio.to_thread(video_clip.close)