feat: add async to video and audio processing

This commit is contained in:
parent 942abb2375
commit 55f637a3b2
5 changed files with 56 additions and 52 deletions

View File

@ -1,6 +1,6 @@
import os import os
from logging import Logger from logging import Logger
from tempfile import NamedTemporaryFile import tempfile
from pyrogram import filters from pyrogram import filters
from pyrogram.filters import Filter from pyrogram.filters import Filter
@ -108,7 +108,7 @@ class VideoCommandHandler(AbstractCommandHandler):
"Processing video message to extract text...", quote=True "Processing video message to extract text...", quote=True
) )
with NamedTemporaryFile(delete=False) as temp_video_file: with tempfile.NamedTemporaryFile(delete=False) as temp_video_file:
video_file_path = await client.download_media( video_file_path = await client.download_media(
media.file_id, media.file_id,
file_name=temp_video_file.name file_name=temp_video_file.name
@ -117,7 +117,7 @@ class VideoCommandHandler(AbstractCommandHandler):
try: try:
# Validate video duration # Validate video duration
video_duration: float = video_processing.get_video_duration(video_file_path) # type: ignore video_duration: float = await video_processing.get_video_duration(video_file_path) # type: ignore
if video_duration > self.MAX_DURATION: if video_duration > self.MAX_DURATION:
self.logger.warning(f"{media_type} too long: {video_duration} seconds.") self.logger.warning(f"{media_type} too long: {video_duration} seconds.")
await processing_message.edit_text( await processing_message.edit_text(
@ -129,8 +129,8 @@ class VideoCommandHandler(AbstractCommandHandler):
# Extract audio and convert it to text # Extract audio and convert it to text
output_dir = os.path.dirname(video_file_path) # type: ignore output_dir = os.path.dirname(video_file_path) # type: ignore
audio_file_path: str = video_processing.extract_audio_from_video(video_file_path, output_dir) # type: ignore audio_file_path: str = await video_processing.extract_audio_from_video(video_file_path, output_dir) # type: ignore
extracted_text: str = audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore extracted_text: str = await audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore
self.logger.info(f"{media_type} message successfully converted to text.") self.logger.info(f"{media_type} message successfully converted to text.")
response_text: str = ( response_text: str = (

View File

@ -1,6 +1,6 @@
import os import os
from logging import Logger from logging import Logger
from tempfile import NamedTemporaryFile import tempfile
from pyrogram import filters from pyrogram import filters
from pyrogram.filters import Filter from pyrogram.filters import Filter
@ -101,7 +101,7 @@ class VoiceCommandHandler(AbstractCommandHandler):
"Converting voice message to text...", quote=True "Converting voice message to text...", quote=True
) )
with NamedTemporaryFile(delete=False) as temp_audio_file: with tempfile.NamedTemporaryFile(delete=False) as temp_audio_file:
audio_file_path = await client.download_media( audio_file_path = await client.download_media(
message.reply_to_message.voice.file_id, message.reply_to_message.voice.file_id,
file_name=temp_audio_file.name file_name=temp_audio_file.name
@ -110,7 +110,7 @@ class VoiceCommandHandler(AbstractCommandHandler):
try: try:
# Validate voice message duration # Validate voice message duration
voice_duration: float = audio_processing.get_audio_duration(audio_file_path) # type: ignore voice_duration: float = await audio_processing.get_audio_duration(audio_file_path)
if voice_duration > self.MAX_DURATION: if voice_duration > self.MAX_DURATION:
self.logger.warning(f"Voice message too long: {voice_duration} seconds.") self.logger.warning(f"Voice message too long: {voice_duration} seconds.")
await processing_message.edit_text( await processing_message.edit_text(
@ -119,7 +119,7 @@ class VoiceCommandHandler(AbstractCommandHandler):
return return
await client.send_chat_action(message.chat.id, ChatAction.TYPING) await client.send_chat_action(message.chat.id, ChatAction.TYPING)
extracted_text: str = audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore extracted_text: str = await audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore
self.logger.info("Voice message successfully converted to text.") self.logger.info("Voice message successfully converted to text.")
response_text: str = ( response_text: str = (

View File

@ -1,6 +1,7 @@
import os import os
import logging import logging
from logging import Logger from logging import Logger
import asyncio
from pydub import AudioSegment from pydub import AudioSegment
import speech_recognition as sr import speech_recognition as sr
@ -10,19 +11,19 @@ from speech_recognition.audio import AudioData
# Configure logging # Configure logging
logger: Logger = logging.getLogger(__name__) logger: Logger = logging.getLogger(__name__)
def convert_to_wav(file_path: str) -> str: async def convert_to_wav(file_path: str) -> str:
""" """
Converts an audio file to WAV format if it is not already in WAV format. Converts an audio file to WAV format if it is not already in WAV format.
Args: Args:
file_path (str): The path to the audio file to be converted. file_path (`str`): The path to the audio file to be converted.
Returns: Returns:
str: The path to the converted or original WAV file. `str`: The path to the converted or original WAV file.
Raises: Raises:
FileNotFoundError: If the file does not exist. `FileNotFoundError`: If the file does not exist.
RuntimeError: If the conversion fails for any reason. `RuntimeError`: If the conversion fails for any reason.
""" """
# Check if the file exists # Check if the file exists
if not os.path.exists(file_path): if not os.path.exists(file_path):
@ -35,28 +36,28 @@ def convert_to_wav(file_path: str) -> str:
try: try:
logger.info(f"Converting {file_path} to WAV format.") logger.info(f"Converting {file_path} to WAV format.")
audio = AudioSegment.from_file(file_path) audio = await asyncio.to_thread(AudioSegment.from_file, file_path)
wav_path: str = f"{os.path.splitext(file_path)[0]}.wav" wav_path: str = f"{os.path.splitext(file_path)[0]}.wav"
audio.export(wav_path, format="wav") await asyncio.to_thread(audio.export, wav_path, format="wav")
logger.info(f"File converted to {wav_path}.") logger.info(f"File converted to {wav_path}.")
return wav_path return wav_path
except Exception as e: except Exception as e:
logger.error(f"Failed to convert file to WAV: {e}") logger.error(f"Failed to convert file to WAV: {e}")
raise RuntimeError(f"Failed to convert file to WAV: {e}") raise RuntimeError(f"Failed to convert file to WAV: {e}")
def get_audio_duration(file_path: str) -> float: async def get_audio_duration(file_path: str) -> float:
""" """
Retrieves the duration of an audio file in seconds. Retrieves the duration of an audio file in seconds.
Args: Args:
file_path (str): The path to the audio file. file_path (`str`): The path to the audio file.
Returns: Returns:
float: The duration of the audio file in seconds. `float`: The duration of the audio file in seconds.
Raises: Raises:
FileNotFoundError: If the file does not exist. `FileNotFoundError`: If the file does not exist.
RuntimeError: If unable to get the file duration. `RuntimeError`: If unable to get the file duration.
""" """
# Check if the file exists # Check if the file exists
if not os.path.exists(file_path): if not os.path.exists(file_path):
@ -65,7 +66,7 @@ def get_audio_duration(file_path: str) -> float:
try: try:
logger.info(f"Getting duration of {file_path}.") logger.info(f"Getting duration of {file_path}.")
audio = AudioSegment.from_file(file_path) audio = await asyncio.to_thread(AudioSegment.from_file, file_path)
duration: float = len(audio) / 1000 # Duration in seconds duration: float = len(audio) / 1000 # Duration in seconds
logger.info(f"Duration of {file_path}: {duration} seconds.") logger.info(f"Duration of {file_path}: {duration} seconds.")
return duration return duration
@ -73,20 +74,20 @@ def get_audio_duration(file_path: str) -> float:
logger.error(f"Failed to get file duration: {e}") logger.error(f"Failed to get file duration: {e}")
raise RuntimeError(f"Failed to get file duration: {e}") raise RuntimeError(f"Failed to get file duration: {e}")
def convert_voice_to_text(file_path: str, language='ru') -> str: async def convert_voice_to_text(file_path: str, language='ru') -> str:
""" """
Converts speech from an audio file to text using OpenAI speech recognition service. Converts speech from an audio file to text using OpenAI speech recognition service.
Args: Args:
file_path (str): The path to the audio file to be processed. file_path (`str`): The path to the audio file to be processed.
language (str): The language code for speech recognition (default is 'ru'). language (`str`): The language code for speech recognition (default is 'ru').
Returns: Returns:
str: The transcribed text if recognition is successful. `str`: The transcribed text if recognition is successful.
Raises: Raises:
FileNotFoundError: If the file does not exist. `FileNotFoundError`: If the file does not exist.
RuntimeError: For any errors encountered during processing. `RuntimeError`: For any errors encountered during processing.
""" """
# Check if the file exists # Check if the file exists
if not os.path.exists(file_path): if not os.path.exists(file_path):
@ -95,7 +96,7 @@ def convert_voice_to_text(file_path: str, language='ru') -> str:
# Convert the file to WAV format if necessary # Convert the file to WAV format if necessary
try: try:
wav_path: str = convert_to_wav(file_path) wav_path: str = await convert_to_wav(file_path)
except RuntimeError as e: except RuntimeError as e:
logger.error(f"Error converting to WAV: {e}") logger.error(f"Error converting to WAV: {e}")
raise RuntimeError(f"Error converting to WAV: {e}") raise RuntimeError(f"Error converting to WAV: {e}")
@ -103,10 +104,13 @@ def convert_voice_to_text(file_path: str, language='ru') -> str:
recognizer = sr.Recognizer() recognizer = sr.Recognizer()
try: try:
logger.info(f"Processing file {wav_path} ({get_audio_duration(wav_path)} sec) for speech recognition.") duration: float = await get_audio_duration(wav_path)
logger.info(f"Processing file {wav_path} ({duration} sec) for speech recognition.")
with sr.AudioFile(wav_path) as source: with sr.AudioFile(wav_path) as source:
audio_data: AudioData = recognizer.record(source) audio_data: AudioData = await asyncio.to_thread(recognizer.record, source)
text = recognizer.recognize_whisper(audio_data, language=language, model='medium') text = await asyncio.to_thread(
recognizer.recognize_whisper, audio_data, language=language, model='medium'
)
logger.info("Speech recognition successful.") logger.info("Speech recognition successful.")
return text # type: ignore return text # type: ignore
except sr.UnknownValueError: except sr.UnknownValueError:

View File

@ -12,8 +12,8 @@ def setup_logging(output_to_console=False) -> None:
log files for a week. log files for a week.
Args: Args:
output_to_console (bool): If True, log messages will also be printed to the console. output_to_console (`bool`): If True, log messages will also be printed to the console.
Defaults to False. Defaults to False.
""" """
# Define the default handlers to use. Always logs to a file. # Define the default handlers to use. Always logs to a file.
handlers: list[str] = ['file'] handlers: list[str] = ['file']

View File

@ -1,6 +1,7 @@
import os import os
import logging import logging
from logging import Logger from logging import Logger
import asyncio
from moviepy import VideoFileClip from moviepy import VideoFileClip
@ -8,26 +9,26 @@ from moviepy import VideoFileClip
# Configure logging # Configure logging
logger: Logger = logging.getLogger(__name__) logger: Logger = logging.getLogger(__name__)
def get_video_duration(video_path: str) -> float: async def get_video_duration(video_path: str) -> float:
""" """
Get the duration of a video file in seconds. Get the duration of a video file in seconds.
Args: Args:
video_path (str): The path to the video file. video_path (`str`): The path to the video file.
Returns: Returns:
float: The duration of the video in seconds. `float`: The duration of the video in seconds.
Raises: Raises:
FileNotFoundError: If the video file does not exist. `FileNotFoundError`: If the video file does not exist.
RuntimeError: If an error occurs during processing. `RuntimeError`: If an error occurs during processing.
""" """
if not os.path.exists(video_path): if not os.path.exists(video_path):
logger.error(f"Video file {video_path} does not exist.") logger.error(f"Video file {video_path} does not exist.")
raise FileNotFoundError(f"Video file {video_path} does not exist.") raise FileNotFoundError(f"Video file {video_path} does not exist.")
try: try:
video_clip = VideoFileClip(video_path) video_clip: VideoFileClip = await asyncio.to_thread(VideoFileClip, video_path)
duration = video_clip.duration duration = video_clip.duration
logger.info(f"Duration of video {video_path}: {duration} seconds.") logger.info(f"Duration of video {video_path}: {duration} seconds.")
return duration return duration
@ -35,23 +36,22 @@ def get_video_duration(video_path: str) -> float:
logger.error(f"Failed to get video duration: {e}", exc_info=True) logger.error(f"Failed to get video duration: {e}", exc_info=True)
raise RuntimeError(f"Failed to get video duration: {e}") raise RuntimeError(f"Failed to get video duration: {e}")
finally: finally:
video_clip.close() await asyncio.to_thread(video_clip.close)
async def extract_audio_from_video(video_path: str, output_dir: str) -> str:
def extract_audio_from_video(video_path: str, output_dir: str) -> str:
""" """
Extracts the audio track from a video file and saves it as a WAV file. Extracts the audio track from a video file and saves it as a WAV file.
Args: Args:
video_path (str): The path to the video file. video_path (`str`): The path to the video file.
output_dir (str): The directory where the audio file will be saved. output_dir (`str`): The directory where the audio file will be saved.
Returns: Returns:
str: The path to the extracted audio file. `str`: The path to the extracted audio file.
Raises: Raises:
FileNotFoundError: If the video file does not exist. `FileNotFoundError`: If the video file does not exist.
RuntimeError: If an error occurs during audio extraction. `RuntimeError`: If an error occurs during audio extraction.
""" """
if not os.path.exists(video_path): if not os.path.exists(video_path):
logger.error(f"Video file {video_path} does not exist.") logger.error(f"Video file {video_path} does not exist.")
@ -59,13 +59,13 @@ def extract_audio_from_video(video_path: str, output_dir: str) -> str:
try: try:
logger.info(f"Extracting audio from video: {video_path}") logger.info(f"Extracting audio from video: {video_path}")
video_clip = VideoFileClip(video_path) video_clip: VideoFileClip = await asyncio.to_thread(VideoFileClip, video_path)
audio_path = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(video_path))[0]}.wav") audio_path: str = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(video_path))[0]}.wav")
video_clip.audio.write_audiofile(audio_path) # type: ignore await asyncio.to_thread(video_clip.audio.write_audiofile, audio_path) # type: ignore
logger.info(f"Audio extracted and saved to: {audio_path}") logger.info(f"Audio extracted and saved to: {audio_path}")
return audio_path return audio_path
except Exception as e: except Exception as e:
logger.error(f"Failed to extract audio from video: {e}", exc_info=True) logger.error(f"Failed to extract audio from video: {e}", exc_info=True)
raise RuntimeError(f"Failed to extract audio from video: {e}") raise RuntimeError(f"Failed to extract audio from video: {e}")
finally: finally:
video_clip.close() await asyncio.to_thread(video_clip.close)