feat: add async to video and audio processing
This commit is contained in:
parent
942abb2375
commit
55f637a3b2
@ -1,6 +1,6 @@
|
||||
import os
|
||||
from logging import Logger
|
||||
from tempfile import NamedTemporaryFile
|
||||
import tempfile
|
||||
|
||||
from pyrogram import filters
|
||||
from pyrogram.filters import Filter
|
||||
@ -108,7 +108,7 @@ class VideoCommandHandler(AbstractCommandHandler):
|
||||
"Processing video message to extract text...", quote=True
|
||||
)
|
||||
|
||||
with NamedTemporaryFile(delete=False) as temp_video_file:
|
||||
with tempfile.NamedTemporaryFile(delete=False) as temp_video_file:
|
||||
video_file_path = await client.download_media(
|
||||
media.file_id,
|
||||
file_name=temp_video_file.name
|
||||
@ -117,7 +117,7 @@ class VideoCommandHandler(AbstractCommandHandler):
|
||||
|
||||
try:
|
||||
# Validate video duration
|
||||
video_duration: float = video_processing.get_video_duration(video_file_path) # type: ignore
|
||||
video_duration: float = await video_processing.get_video_duration(video_file_path) # type: ignore
|
||||
if video_duration > self.MAX_DURATION:
|
||||
self.logger.warning(f"{media_type} too long: {video_duration} seconds.")
|
||||
await processing_message.edit_text(
|
||||
@ -129,8 +129,8 @@ class VideoCommandHandler(AbstractCommandHandler):
|
||||
|
||||
# Extract audio and convert it to text
|
||||
output_dir = os.path.dirname(video_file_path) # type: ignore
|
||||
audio_file_path: str = video_processing.extract_audio_from_video(video_file_path, output_dir) # type: ignore
|
||||
extracted_text: str = audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore
|
||||
audio_file_path: str = await video_processing.extract_audio_from_video(video_file_path, output_dir) # type: ignore
|
||||
extracted_text: str = await audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore
|
||||
self.logger.info(f"{media_type} message successfully converted to text.")
|
||||
|
||||
response_text: str = (
|
||||
|
@ -1,6 +1,6 @@
|
||||
import os
|
||||
from logging import Logger
|
||||
from tempfile import NamedTemporaryFile
|
||||
import tempfile
|
||||
|
||||
from pyrogram import filters
|
||||
from pyrogram.filters import Filter
|
||||
@ -101,7 +101,7 @@ class VoiceCommandHandler(AbstractCommandHandler):
|
||||
"Converting voice message to text...", quote=True
|
||||
)
|
||||
|
||||
with NamedTemporaryFile(delete=False) as temp_audio_file:
|
||||
with tempfile.NamedTemporaryFile(delete=False) as temp_audio_file:
|
||||
audio_file_path = await client.download_media(
|
||||
message.reply_to_message.voice.file_id,
|
||||
file_name=temp_audio_file.name
|
||||
@ -110,7 +110,7 @@ class VoiceCommandHandler(AbstractCommandHandler):
|
||||
|
||||
try:
|
||||
# Validate voice message duration
|
||||
voice_duration: float = audio_processing.get_audio_duration(audio_file_path) # type: ignore
|
||||
voice_duration: float = await audio_processing.get_audio_duration(audio_file_path)
|
||||
if voice_duration > self.MAX_DURATION:
|
||||
self.logger.warning(f"Voice message too long: {voice_duration} seconds.")
|
||||
await processing_message.edit_text(
|
||||
@ -119,7 +119,7 @@ class VoiceCommandHandler(AbstractCommandHandler):
|
||||
return
|
||||
|
||||
await client.send_chat_action(message.chat.id, ChatAction.TYPING)
|
||||
extracted_text: str = audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore
|
||||
extracted_text: str = await audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore
|
||||
self.logger.info("Voice message successfully converted to text.")
|
||||
|
||||
response_text: str = (
|
||||
|
@ -1,6 +1,7 @@
|
||||
import os
|
||||
import logging
|
||||
from logging import Logger
|
||||
import asyncio
|
||||
|
||||
from pydub import AudioSegment
|
||||
import speech_recognition as sr
|
||||
@ -10,19 +11,19 @@ from speech_recognition.audio import AudioData
|
||||
# Configure logging
|
||||
logger: Logger = logging.getLogger(__name__)
|
||||
|
||||
def convert_to_wav(file_path: str) -> str:
|
||||
async def convert_to_wav(file_path: str) -> str:
|
||||
"""
|
||||
Converts an audio file to WAV format if it is not already in WAV format.
|
||||
|
||||
Args:
|
||||
file_path (str): The path to the audio file to be converted.
|
||||
file_path (`str`): The path to the audio file to be converted.
|
||||
|
||||
Returns:
|
||||
str: The path to the converted or original WAV file.
|
||||
`str`: The path to the converted or original WAV file.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the file does not exist.
|
||||
RuntimeError: If the conversion fails for any reason.
|
||||
`FileNotFoundError`: If the file does not exist.
|
||||
`RuntimeError`: If the conversion fails for any reason.
|
||||
"""
|
||||
# Check if the file exists
|
||||
if not os.path.exists(file_path):
|
||||
@ -35,28 +36,28 @@ def convert_to_wav(file_path: str) -> str:
|
||||
|
||||
try:
|
||||
logger.info(f"Converting {file_path} to WAV format.")
|
||||
audio = AudioSegment.from_file(file_path)
|
||||
audio = await asyncio.to_thread(AudioSegment.from_file, file_path)
|
||||
wav_path: str = f"{os.path.splitext(file_path)[0]}.wav"
|
||||
audio.export(wav_path, format="wav")
|
||||
await asyncio.to_thread(audio.export, wav_path, format="wav")
|
||||
logger.info(f"File converted to {wav_path}.")
|
||||
return wav_path
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to convert file to WAV: {e}")
|
||||
raise RuntimeError(f"Failed to convert file to WAV: {e}")
|
||||
|
||||
def get_audio_duration(file_path: str) -> float:
|
||||
async def get_audio_duration(file_path: str) -> float:
|
||||
"""
|
||||
Retrieves the duration of an audio file in seconds.
|
||||
|
||||
Args:
|
||||
file_path (str): The path to the audio file.
|
||||
file_path (`str`): The path to the audio file.
|
||||
|
||||
Returns:
|
||||
float: The duration of the audio file in seconds.
|
||||
`float`: The duration of the audio file in seconds.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the file does not exist.
|
||||
RuntimeError: If unable to get the file duration.
|
||||
`FileNotFoundError`: If the file does not exist.
|
||||
`RuntimeError`: If unable to get the file duration.
|
||||
"""
|
||||
# Check if the file exists
|
||||
if not os.path.exists(file_path):
|
||||
@ -65,7 +66,7 @@ def get_audio_duration(file_path: str) -> float:
|
||||
|
||||
try:
|
||||
logger.info(f"Getting duration of {file_path}.")
|
||||
audio = AudioSegment.from_file(file_path)
|
||||
audio = await asyncio.to_thread(AudioSegment.from_file, file_path)
|
||||
duration: float = len(audio) / 1000 # Duration in seconds
|
||||
logger.info(f"Duration of {file_path}: {duration} seconds.")
|
||||
return duration
|
||||
@ -73,20 +74,20 @@ def get_audio_duration(file_path: str) -> float:
|
||||
logger.error(f"Failed to get file duration: {e}")
|
||||
raise RuntimeError(f"Failed to get file duration: {e}")
|
||||
|
||||
def convert_voice_to_text(file_path: str, language='ru') -> str:
|
||||
async def convert_voice_to_text(file_path: str, language='ru') -> str:
|
||||
"""
|
||||
Converts speech from an audio file to text using OpenAI speech recognition service.
|
||||
|
||||
Args:
|
||||
file_path (str): The path to the audio file to be processed.
|
||||
language (str): The language code for speech recognition (default is 'ru').
|
||||
file_path (`str`): The path to the audio file to be processed.
|
||||
language (`str`): The language code for speech recognition (default is 'ru').
|
||||
|
||||
Returns:
|
||||
str: The transcribed text if recognition is successful.
|
||||
`str`: The transcribed text if recognition is successful.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the file does not exist.
|
||||
RuntimeError: For any errors encountered during processing.
|
||||
`FileNotFoundError`: If the file does not exist.
|
||||
`RuntimeError`: For any errors encountered during processing.
|
||||
"""
|
||||
# Check if the file exists
|
||||
if not os.path.exists(file_path):
|
||||
@ -95,7 +96,7 @@ def convert_voice_to_text(file_path: str, language='ru') -> str:
|
||||
|
||||
# Convert the file to WAV format if necessary
|
||||
try:
|
||||
wav_path: str = convert_to_wav(file_path)
|
||||
wav_path: str = await convert_to_wav(file_path)
|
||||
except RuntimeError as e:
|
||||
logger.error(f"Error converting to WAV: {e}")
|
||||
raise RuntimeError(f"Error converting to WAV: {e}")
|
||||
@ -103,10 +104,13 @@ def convert_voice_to_text(file_path: str, language='ru') -> str:
|
||||
recognizer = sr.Recognizer()
|
||||
|
||||
try:
|
||||
logger.info(f"Processing file {wav_path} ({get_audio_duration(wav_path)} sec) for speech recognition.")
|
||||
duration: float = await get_audio_duration(wav_path)
|
||||
logger.info(f"Processing file {wav_path} ({duration} sec) for speech recognition.")
|
||||
with sr.AudioFile(wav_path) as source:
|
||||
audio_data: AudioData = recognizer.record(source)
|
||||
text = recognizer.recognize_whisper(audio_data, language=language, model='medium')
|
||||
audio_data: AudioData = await asyncio.to_thread(recognizer.record, source)
|
||||
text = await asyncio.to_thread(
|
||||
recognizer.recognize_whisper, audio_data, language=language, model='medium'
|
||||
)
|
||||
logger.info("Speech recognition successful.")
|
||||
return text # type: ignore
|
||||
except sr.UnknownValueError:
|
||||
|
@ -12,7 +12,7 @@ def setup_logging(output_to_console=False) -> None:
|
||||
log files for a week.
|
||||
|
||||
Args:
|
||||
output_to_console (bool): If True, log messages will also be printed to the console.
|
||||
output_to_console (`bool`): If True, log messages will also be printed to the console.
|
||||
Defaults to False.
|
||||
"""
|
||||
# Define the default handlers to use. Always logs to a file.
|
||||
|
@ -1,6 +1,7 @@
|
||||
import os
|
||||
import logging
|
||||
from logging import Logger
|
||||
import asyncio
|
||||
|
||||
from moviepy import VideoFileClip
|
||||
|
||||
@ -8,26 +9,26 @@ from moviepy import VideoFileClip
|
||||
# Configure logging
|
||||
logger: Logger = logging.getLogger(__name__)
|
||||
|
||||
def get_video_duration(video_path: str) -> float:
|
||||
async def get_video_duration(video_path: str) -> float:
|
||||
"""
|
||||
Get the duration of a video file in seconds.
|
||||
|
||||
Args:
|
||||
video_path (str): The path to the video file.
|
||||
video_path (`str`): The path to the video file.
|
||||
|
||||
Returns:
|
||||
float: The duration of the video in seconds.
|
||||
`float`: The duration of the video in seconds.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the video file does not exist.
|
||||
RuntimeError: If an error occurs during processing.
|
||||
`FileNotFoundError`: If the video file does not exist.
|
||||
`RuntimeError`: If an error occurs during processing.
|
||||
"""
|
||||
if not os.path.exists(video_path):
|
||||
logger.error(f"Video file {video_path} does not exist.")
|
||||
raise FileNotFoundError(f"Video file {video_path} does not exist.")
|
||||
|
||||
try:
|
||||
video_clip = VideoFileClip(video_path)
|
||||
video_clip: VideoFileClip = await asyncio.to_thread(VideoFileClip, video_path)
|
||||
duration = video_clip.duration
|
||||
logger.info(f"Duration of video {video_path}: {duration} seconds.")
|
||||
return duration
|
||||
@ -35,23 +36,22 @@ def get_video_duration(video_path: str) -> float:
|
||||
logger.error(f"Failed to get video duration: {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to get video duration: {e}")
|
||||
finally:
|
||||
video_clip.close()
|
||||
await asyncio.to_thread(video_clip.close)
|
||||
|
||||
|
||||
def extract_audio_from_video(video_path: str, output_dir: str) -> str:
|
||||
async def extract_audio_from_video(video_path: str, output_dir: str) -> str:
|
||||
"""
|
||||
Extracts the audio track from a video file and saves it as a WAV file.
|
||||
|
||||
Args:
|
||||
video_path (str): The path to the video file.
|
||||
output_dir (str): The directory where the audio file will be saved.
|
||||
video_path (`str`): The path to the video file.
|
||||
output_dir (`str`): The directory where the audio file will be saved.
|
||||
|
||||
Returns:
|
||||
str: The path to the extracted audio file.
|
||||
`str`: The path to the extracted audio file.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the video file does not exist.
|
||||
RuntimeError: If an error occurs during audio extraction.
|
||||
`FileNotFoundError`: If the video file does not exist.
|
||||
`RuntimeError`: If an error occurs during audio extraction.
|
||||
"""
|
||||
if not os.path.exists(video_path):
|
||||
logger.error(f"Video file {video_path} does not exist.")
|
||||
@ -59,13 +59,13 @@ def extract_audio_from_video(video_path: str, output_dir: str) -> str:
|
||||
|
||||
try:
|
||||
logger.info(f"Extracting audio from video: {video_path}")
|
||||
video_clip = VideoFileClip(video_path)
|
||||
audio_path = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(video_path))[0]}.wav")
|
||||
video_clip.audio.write_audiofile(audio_path) # type: ignore
|
||||
video_clip: VideoFileClip = await asyncio.to_thread(VideoFileClip, video_path)
|
||||
audio_path: str = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(video_path))[0]}.wav")
|
||||
await asyncio.to_thread(video_clip.audio.write_audiofile, audio_path) # type: ignore
|
||||
logger.info(f"Audio extracted and saved to: {audio_path}")
|
||||
return audio_path
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to extract audio from video: {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to extract audio from video: {e}")
|
||||
finally:
|
||||
video_clip.close()
|
||||
await asyncio.to_thread(video_clip.close)
|
||||
|
Loading…
Reference in New Issue
Block a user