feat: add async to video and audio processing

This commit is contained in:
parent 942abb2375
commit 55f637a3b2
5 changed files with 56 additions and 52 deletions

View File

@ -1,6 +1,6 @@
import os
from logging import Logger
from tempfile import NamedTemporaryFile
import tempfile
from pyrogram import filters
from pyrogram.filters import Filter
@ -108,7 +108,7 @@ class VideoCommandHandler(AbstractCommandHandler):
"Processing video message to extract text...", quote=True
)
with NamedTemporaryFile(delete=False) as temp_video_file:
with tempfile.NamedTemporaryFile(delete=False) as temp_video_file:
video_file_path = await client.download_media(
media.file_id,
file_name=temp_video_file.name
@ -117,7 +117,7 @@ class VideoCommandHandler(AbstractCommandHandler):
try:
# Validate video duration
video_duration: float = video_processing.get_video_duration(video_file_path) # type: ignore
video_duration: float = await video_processing.get_video_duration(video_file_path) # type: ignore
if video_duration > self.MAX_DURATION:
self.logger.warning(f"{media_type} too long: {video_duration} seconds.")
await processing_message.edit_text(
@ -129,8 +129,8 @@ class VideoCommandHandler(AbstractCommandHandler):
# Extract audio and convert it to text
output_dir = os.path.dirname(video_file_path) # type: ignore
audio_file_path: str = video_processing.extract_audio_from_video(video_file_path, output_dir) # type: ignore
extracted_text: str = audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore
audio_file_path: str = await video_processing.extract_audio_from_video(video_file_path, output_dir) # type: ignore
extracted_text: str = await audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore
self.logger.info(f"{media_type} message successfully converted to text.")
response_text: str = (

View File

@ -1,6 +1,6 @@
import os
from logging import Logger
from tempfile import NamedTemporaryFile
import tempfile
from pyrogram import filters
from pyrogram.filters import Filter
@ -101,7 +101,7 @@ class VoiceCommandHandler(AbstractCommandHandler):
"Converting voice message to text...", quote=True
)
with NamedTemporaryFile(delete=False) as temp_audio_file:
with tempfile.NamedTemporaryFile(delete=False) as temp_audio_file:
audio_file_path = await client.download_media(
message.reply_to_message.voice.file_id,
file_name=temp_audio_file.name
@ -110,7 +110,7 @@ class VoiceCommandHandler(AbstractCommandHandler):
try:
# Validate voice message duration
voice_duration: float = audio_processing.get_audio_duration(audio_file_path) # type: ignore
voice_duration: float = await audio_processing.get_audio_duration(audio_file_path)
if voice_duration > self.MAX_DURATION:
self.logger.warning(f"Voice message too long: {voice_duration} seconds.")
await processing_message.edit_text(
@ -119,7 +119,7 @@ class VoiceCommandHandler(AbstractCommandHandler):
return
await client.send_chat_action(message.chat.id, ChatAction.TYPING)
extracted_text: str = audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore
extracted_text: str = await audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore
self.logger.info("Voice message successfully converted to text.")
response_text: str = (

View File

@ -1,6 +1,7 @@
import os
import logging
from logging import Logger
import asyncio
from pydub import AudioSegment
import speech_recognition as sr
@ -10,19 +11,19 @@ from speech_recognition.audio import AudioData
# Configure logging
logger: Logger = logging.getLogger(__name__)
def convert_to_wav(file_path: str) -> str:
async def convert_to_wav(file_path: str) -> str:
"""
Converts an audio file to WAV format if it is not already in WAV format.
Args:
file_path (str): The path to the audio file to be converted.
file_path (`str`): The path to the audio file to be converted.
Returns:
str: The path to the converted or original WAV file.
`str`: The path to the converted or original WAV file.
Raises:
FileNotFoundError: If the file does not exist.
RuntimeError: If the conversion fails for any reason.
`FileNotFoundError`: If the file does not exist.
`RuntimeError`: If the conversion fails for any reason.
"""
# Check if the file exists
if not os.path.exists(file_path):
@ -35,28 +36,28 @@ def convert_to_wav(file_path: str) -> str:
try:
logger.info(f"Converting {file_path} to WAV format.")
audio = AudioSegment.from_file(file_path)
audio = await asyncio.to_thread(AudioSegment.from_file, file_path)
wav_path: str = f"{os.path.splitext(file_path)[0]}.wav"
audio.export(wav_path, format="wav")
await asyncio.to_thread(audio.export, wav_path, format="wav")
logger.info(f"File converted to {wav_path}.")
return wav_path
except Exception as e:
logger.error(f"Failed to convert file to WAV: {e}")
raise RuntimeError(f"Failed to convert file to WAV: {e}")
def get_audio_duration(file_path: str) -> float:
async def get_audio_duration(file_path: str) -> float:
"""
Retrieves the duration of an audio file in seconds.
Args:
file_path (str): The path to the audio file.
file_path (`str`): The path to the audio file.
Returns:
float: The duration of the audio file in seconds.
`float`: The duration of the audio file in seconds.
Raises:
FileNotFoundError: If the file does not exist.
RuntimeError: If unable to get the file duration.
`FileNotFoundError`: If the file does not exist.
`RuntimeError`: If unable to get the file duration.
"""
# Check if the file exists
if not os.path.exists(file_path):
@ -65,7 +66,7 @@ def get_audio_duration(file_path: str) -> float:
try:
logger.info(f"Getting duration of {file_path}.")
audio = AudioSegment.from_file(file_path)
audio = await asyncio.to_thread(AudioSegment.from_file, file_path)
duration: float = len(audio) / 1000 # Duration in seconds
logger.info(f"Duration of {file_path}: {duration} seconds.")
return duration
@ -73,20 +74,20 @@ def get_audio_duration(file_path: str) -> float:
logger.error(f"Failed to get file duration: {e}")
raise RuntimeError(f"Failed to get file duration: {e}")
def convert_voice_to_text(file_path: str, language='ru') -> str:
async def convert_voice_to_text(file_path: str, language='ru') -> str:
"""
Converts speech from an audio file to text using OpenAI speech recognition service.
Args:
file_path (str): The path to the audio file to be processed.
language (str): The language code for speech recognition (default is 'ru').
file_path (`str`): The path to the audio file to be processed.
language (`str`): The language code for speech recognition (default is 'ru').
Returns:
str: The transcribed text if recognition is successful.
`str`: The transcribed text if recognition is successful.
Raises:
FileNotFoundError: If the file does not exist.
RuntimeError: For any errors encountered during processing.
`FileNotFoundError`: If the file does not exist.
`RuntimeError`: For any errors encountered during processing.
"""
# Check if the file exists
if not os.path.exists(file_path):
@ -95,7 +96,7 @@ def convert_voice_to_text(file_path: str, language='ru') -> str:
# Convert the file to WAV format if necessary
try:
wav_path: str = convert_to_wav(file_path)
wav_path: str = await convert_to_wav(file_path)
except RuntimeError as e:
logger.error(f"Error converting to WAV: {e}")
raise RuntimeError(f"Error converting to WAV: {e}")
@ -103,10 +104,13 @@ def convert_voice_to_text(file_path: str, language='ru') -> str:
recognizer = sr.Recognizer()
try:
logger.info(f"Processing file {wav_path} ({get_audio_duration(wav_path)} sec) for speech recognition.")
duration: float = await get_audio_duration(wav_path)
logger.info(f"Processing file {wav_path} ({duration} sec) for speech recognition.")
with sr.AudioFile(wav_path) as source:
audio_data: AudioData = recognizer.record(source)
text = recognizer.recognize_whisper(audio_data, language=language, model='medium')
audio_data: AudioData = await asyncio.to_thread(recognizer.record, source)
text = await asyncio.to_thread(
recognizer.recognize_whisper, audio_data, language=language, model='medium'
)
logger.info("Speech recognition successful.")
return text # type: ignore
except sr.UnknownValueError:

View File

@ -12,7 +12,7 @@ def setup_logging(output_to_console=False) -> None:
log files for a week.
Args:
output_to_console (bool): If True, log messages will also be printed to the console.
output_to_console (`bool`): If True, log messages will also be printed to the console.
Defaults to False.
"""
# Define the default handlers to use. Always logs to a file.

View File

@ -1,6 +1,7 @@
import os
import logging
from logging import Logger
import asyncio
from moviepy import VideoFileClip
@ -8,26 +9,26 @@ from moviepy import VideoFileClip
# Configure logging
logger: Logger = logging.getLogger(__name__)
def get_video_duration(video_path: str) -> float:
async def get_video_duration(video_path: str) -> float:
"""
Get the duration of a video file in seconds.
Args:
video_path (str): The path to the video file.
video_path (`str`): The path to the video file.
Returns:
float: The duration of the video in seconds.
`float`: The duration of the video in seconds.
Raises:
FileNotFoundError: If the video file does not exist.
RuntimeError: If an error occurs during processing.
`FileNotFoundError`: If the video file does not exist.
`RuntimeError`: If an error occurs during processing.
"""
if not os.path.exists(video_path):
logger.error(f"Video file {video_path} does not exist.")
raise FileNotFoundError(f"Video file {video_path} does not exist.")
try:
video_clip = VideoFileClip(video_path)
video_clip: VideoFileClip = await asyncio.to_thread(VideoFileClip, video_path)
duration = video_clip.duration
logger.info(f"Duration of video {video_path}: {duration} seconds.")
return duration
@ -35,23 +36,22 @@ def get_video_duration(video_path: str) -> float:
logger.error(f"Failed to get video duration: {e}", exc_info=True)
raise RuntimeError(f"Failed to get video duration: {e}")
finally:
video_clip.close()
await asyncio.to_thread(video_clip.close)
def extract_audio_from_video(video_path: str, output_dir: str) -> str:
async def extract_audio_from_video(video_path: str, output_dir: str) -> str:
"""
Extracts the audio track from a video file and saves it as a WAV file.
Args:
video_path (str): The path to the video file.
output_dir (str): The directory where the audio file will be saved.
video_path (`str`): The path to the video file.
output_dir (`str`): The directory where the audio file will be saved.
Returns:
str: The path to the extracted audio file.
`str`: The path to the extracted audio file.
Raises:
FileNotFoundError: If the video file does not exist.
RuntimeError: If an error occurs during audio extraction.
`FileNotFoundError`: If the video file does not exist.
`RuntimeError`: If an error occurs during audio extraction.
"""
if not os.path.exists(video_path):
logger.error(f"Video file {video_path} does not exist.")
@ -59,13 +59,13 @@ def extract_audio_from_video(video_path: str, output_dir: str) -> str:
try:
logger.info(f"Extracting audio from video: {video_path}")
video_clip = VideoFileClip(video_path)
audio_path = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(video_path))[0]}.wav")
video_clip.audio.write_audiofile(audio_path) # type: ignore
video_clip: VideoFileClip = await asyncio.to_thread(VideoFileClip, video_path)
audio_path: str = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(video_path))[0]}.wav")
await asyncio.to_thread(video_clip.audio.write_audiofile, audio_path) # type: ignore
logger.info(f"Audio extracted and saved to: {audio_path}")
return audio_path
except Exception as e:
logger.error(f"Failed to extract audio from video: {e}", exc_info=True)
raise RuntimeError(f"Failed to extract audio from video: {e}")
finally:
video_clip.close()
await asyncio.to_thread(video_clip.close)