feat: add async to video and audio processing
This commit is contained in:
parent
942abb2375
commit
55f637a3b2
@ -1,6 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
from logging import Logger
|
from logging import Logger
|
||||||
from tempfile import NamedTemporaryFile
|
import tempfile
|
||||||
|
|
||||||
from pyrogram import filters
|
from pyrogram import filters
|
||||||
from pyrogram.filters import Filter
|
from pyrogram.filters import Filter
|
||||||
@ -108,7 +108,7 @@ class VideoCommandHandler(AbstractCommandHandler):
|
|||||||
"Processing video message to extract text...", quote=True
|
"Processing video message to extract text...", quote=True
|
||||||
)
|
)
|
||||||
|
|
||||||
with NamedTemporaryFile(delete=False) as temp_video_file:
|
with tempfile.NamedTemporaryFile(delete=False) as temp_video_file:
|
||||||
video_file_path = await client.download_media(
|
video_file_path = await client.download_media(
|
||||||
media.file_id,
|
media.file_id,
|
||||||
file_name=temp_video_file.name
|
file_name=temp_video_file.name
|
||||||
@ -117,7 +117,7 @@ class VideoCommandHandler(AbstractCommandHandler):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# Validate video duration
|
# Validate video duration
|
||||||
video_duration: float = video_processing.get_video_duration(video_file_path) # type: ignore
|
video_duration: float = await video_processing.get_video_duration(video_file_path) # type: ignore
|
||||||
if video_duration > self.MAX_DURATION:
|
if video_duration > self.MAX_DURATION:
|
||||||
self.logger.warning(f"{media_type} too long: {video_duration} seconds.")
|
self.logger.warning(f"{media_type} too long: {video_duration} seconds.")
|
||||||
await processing_message.edit_text(
|
await processing_message.edit_text(
|
||||||
@ -129,8 +129,8 @@ class VideoCommandHandler(AbstractCommandHandler):
|
|||||||
|
|
||||||
# Extract audio and convert it to text
|
# Extract audio and convert it to text
|
||||||
output_dir = os.path.dirname(video_file_path) # type: ignore
|
output_dir = os.path.dirname(video_file_path) # type: ignore
|
||||||
audio_file_path: str = video_processing.extract_audio_from_video(video_file_path, output_dir) # type: ignore
|
audio_file_path: str = await video_processing.extract_audio_from_video(video_file_path, output_dir) # type: ignore
|
||||||
extracted_text: str = audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore
|
extracted_text: str = await audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore
|
||||||
self.logger.info(f"{media_type} message successfully converted to text.")
|
self.logger.info(f"{media_type} message successfully converted to text.")
|
||||||
|
|
||||||
response_text: str = (
|
response_text: str = (
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
from logging import Logger
|
from logging import Logger
|
||||||
from tempfile import NamedTemporaryFile
|
import tempfile
|
||||||
|
|
||||||
from pyrogram import filters
|
from pyrogram import filters
|
||||||
from pyrogram.filters import Filter
|
from pyrogram.filters import Filter
|
||||||
@ -101,7 +101,7 @@ class VoiceCommandHandler(AbstractCommandHandler):
|
|||||||
"Converting voice message to text...", quote=True
|
"Converting voice message to text...", quote=True
|
||||||
)
|
)
|
||||||
|
|
||||||
with NamedTemporaryFile(delete=False) as temp_audio_file:
|
with tempfile.NamedTemporaryFile(delete=False) as temp_audio_file:
|
||||||
audio_file_path = await client.download_media(
|
audio_file_path = await client.download_media(
|
||||||
message.reply_to_message.voice.file_id,
|
message.reply_to_message.voice.file_id,
|
||||||
file_name=temp_audio_file.name
|
file_name=temp_audio_file.name
|
||||||
@ -110,7 +110,7 @@ class VoiceCommandHandler(AbstractCommandHandler):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# Validate voice message duration
|
# Validate voice message duration
|
||||||
voice_duration: float = audio_processing.get_audio_duration(audio_file_path) # type: ignore
|
voice_duration: float = await audio_processing.get_audio_duration(audio_file_path)
|
||||||
if voice_duration > self.MAX_DURATION:
|
if voice_duration > self.MAX_DURATION:
|
||||||
self.logger.warning(f"Voice message too long: {voice_duration} seconds.")
|
self.logger.warning(f"Voice message too long: {voice_duration} seconds.")
|
||||||
await processing_message.edit_text(
|
await processing_message.edit_text(
|
||||||
@ -119,7 +119,7 @@ class VoiceCommandHandler(AbstractCommandHandler):
|
|||||||
return
|
return
|
||||||
|
|
||||||
await client.send_chat_action(message.chat.id, ChatAction.TYPING)
|
await client.send_chat_action(message.chat.id, ChatAction.TYPING)
|
||||||
extracted_text: str = audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore
|
extracted_text: str = await audio_processing.convert_voice_to_text(audio_file_path, language=language_code) # type: ignore
|
||||||
self.logger.info("Voice message successfully converted to text.")
|
self.logger.info("Voice message successfully converted to text.")
|
||||||
|
|
||||||
response_text: str = (
|
response_text: str = (
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
from logging import Logger
|
from logging import Logger
|
||||||
|
import asyncio
|
||||||
|
|
||||||
from pydub import AudioSegment
|
from pydub import AudioSegment
|
||||||
import speech_recognition as sr
|
import speech_recognition as sr
|
||||||
@ -10,19 +11,19 @@ from speech_recognition.audio import AudioData
|
|||||||
# Configure logging
|
# Configure logging
|
||||||
logger: Logger = logging.getLogger(__name__)
|
logger: Logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
def convert_to_wav(file_path: str) -> str:
|
async def convert_to_wav(file_path: str) -> str:
|
||||||
"""
|
"""
|
||||||
Converts an audio file to WAV format if it is not already in WAV format.
|
Converts an audio file to WAV format if it is not already in WAV format.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file_path (str): The path to the audio file to be converted.
|
file_path (`str`): The path to the audio file to be converted.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: The path to the converted or original WAV file.
|
`str`: The path to the converted or original WAV file.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
FileNotFoundError: If the file does not exist.
|
`FileNotFoundError`: If the file does not exist.
|
||||||
RuntimeError: If the conversion fails for any reason.
|
`RuntimeError`: If the conversion fails for any reason.
|
||||||
"""
|
"""
|
||||||
# Check if the file exists
|
# Check if the file exists
|
||||||
if not os.path.exists(file_path):
|
if not os.path.exists(file_path):
|
||||||
@ -35,28 +36,28 @@ def convert_to_wav(file_path: str) -> str:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
logger.info(f"Converting {file_path} to WAV format.")
|
logger.info(f"Converting {file_path} to WAV format.")
|
||||||
audio = AudioSegment.from_file(file_path)
|
audio = await asyncio.to_thread(AudioSegment.from_file, file_path)
|
||||||
wav_path: str = f"{os.path.splitext(file_path)[0]}.wav"
|
wav_path: str = f"{os.path.splitext(file_path)[0]}.wav"
|
||||||
audio.export(wav_path, format="wav")
|
await asyncio.to_thread(audio.export, wav_path, format="wav")
|
||||||
logger.info(f"File converted to {wav_path}.")
|
logger.info(f"File converted to {wav_path}.")
|
||||||
return wav_path
|
return wav_path
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to convert file to WAV: {e}")
|
logger.error(f"Failed to convert file to WAV: {e}")
|
||||||
raise RuntimeError(f"Failed to convert file to WAV: {e}")
|
raise RuntimeError(f"Failed to convert file to WAV: {e}")
|
||||||
|
|
||||||
def get_audio_duration(file_path: str) -> float:
|
async def get_audio_duration(file_path: str) -> float:
|
||||||
"""
|
"""
|
||||||
Retrieves the duration of an audio file in seconds.
|
Retrieves the duration of an audio file in seconds.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file_path (str): The path to the audio file.
|
file_path (`str`): The path to the audio file.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
float: The duration of the audio file in seconds.
|
`float`: The duration of the audio file in seconds.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
FileNotFoundError: If the file does not exist.
|
`FileNotFoundError`: If the file does not exist.
|
||||||
RuntimeError: If unable to get the file duration.
|
`RuntimeError`: If unable to get the file duration.
|
||||||
"""
|
"""
|
||||||
# Check if the file exists
|
# Check if the file exists
|
||||||
if not os.path.exists(file_path):
|
if not os.path.exists(file_path):
|
||||||
@ -65,7 +66,7 @@ def get_audio_duration(file_path: str) -> float:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
logger.info(f"Getting duration of {file_path}.")
|
logger.info(f"Getting duration of {file_path}.")
|
||||||
audio = AudioSegment.from_file(file_path)
|
audio = await asyncio.to_thread(AudioSegment.from_file, file_path)
|
||||||
duration: float = len(audio) / 1000 # Duration in seconds
|
duration: float = len(audio) / 1000 # Duration in seconds
|
||||||
logger.info(f"Duration of {file_path}: {duration} seconds.")
|
logger.info(f"Duration of {file_path}: {duration} seconds.")
|
||||||
return duration
|
return duration
|
||||||
@ -73,20 +74,20 @@ def get_audio_duration(file_path: str) -> float:
|
|||||||
logger.error(f"Failed to get file duration: {e}")
|
logger.error(f"Failed to get file duration: {e}")
|
||||||
raise RuntimeError(f"Failed to get file duration: {e}")
|
raise RuntimeError(f"Failed to get file duration: {e}")
|
||||||
|
|
||||||
def convert_voice_to_text(file_path: str, language='ru') -> str:
|
async def convert_voice_to_text(file_path: str, language='ru') -> str:
|
||||||
"""
|
"""
|
||||||
Converts speech from an audio file to text using OpenAI speech recognition service.
|
Converts speech from an audio file to text using OpenAI speech recognition service.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file_path (str): The path to the audio file to be processed.
|
file_path (`str`): The path to the audio file to be processed.
|
||||||
language (str): The language code for speech recognition (default is 'ru').
|
language (`str`): The language code for speech recognition (default is 'ru').
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: The transcribed text if recognition is successful.
|
`str`: The transcribed text if recognition is successful.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
FileNotFoundError: If the file does not exist.
|
`FileNotFoundError`: If the file does not exist.
|
||||||
RuntimeError: For any errors encountered during processing.
|
`RuntimeError`: For any errors encountered during processing.
|
||||||
"""
|
"""
|
||||||
# Check if the file exists
|
# Check if the file exists
|
||||||
if not os.path.exists(file_path):
|
if not os.path.exists(file_path):
|
||||||
@ -95,7 +96,7 @@ def convert_voice_to_text(file_path: str, language='ru') -> str:
|
|||||||
|
|
||||||
# Convert the file to WAV format if necessary
|
# Convert the file to WAV format if necessary
|
||||||
try:
|
try:
|
||||||
wav_path: str = convert_to_wav(file_path)
|
wav_path: str = await convert_to_wav(file_path)
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
logger.error(f"Error converting to WAV: {e}")
|
logger.error(f"Error converting to WAV: {e}")
|
||||||
raise RuntimeError(f"Error converting to WAV: {e}")
|
raise RuntimeError(f"Error converting to WAV: {e}")
|
||||||
@ -103,10 +104,13 @@ def convert_voice_to_text(file_path: str, language='ru') -> str:
|
|||||||
recognizer = sr.Recognizer()
|
recognizer = sr.Recognizer()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logger.info(f"Processing file {wav_path} ({get_audio_duration(wav_path)} sec) for speech recognition.")
|
duration: float = await get_audio_duration(wav_path)
|
||||||
|
logger.info(f"Processing file {wav_path} ({duration} sec) for speech recognition.")
|
||||||
with sr.AudioFile(wav_path) as source:
|
with sr.AudioFile(wav_path) as source:
|
||||||
audio_data: AudioData = recognizer.record(source)
|
audio_data: AudioData = await asyncio.to_thread(recognizer.record, source)
|
||||||
text = recognizer.recognize_whisper(audio_data, language=language, model='medium')
|
text = await asyncio.to_thread(
|
||||||
|
recognizer.recognize_whisper, audio_data, language=language, model='medium'
|
||||||
|
)
|
||||||
logger.info("Speech recognition successful.")
|
logger.info("Speech recognition successful.")
|
||||||
return text # type: ignore
|
return text # type: ignore
|
||||||
except sr.UnknownValueError:
|
except sr.UnknownValueError:
|
||||||
|
@ -12,8 +12,8 @@ def setup_logging(output_to_console=False) -> None:
|
|||||||
log files for a week.
|
log files for a week.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
output_to_console (bool): If True, log messages will also be printed to the console.
|
output_to_console (`bool`): If True, log messages will also be printed to the console.
|
||||||
Defaults to False.
|
Defaults to False.
|
||||||
"""
|
"""
|
||||||
# Define the default handlers to use. Always logs to a file.
|
# Define the default handlers to use. Always logs to a file.
|
||||||
handlers: list[str] = ['file']
|
handlers: list[str] = ['file']
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
from logging import Logger
|
from logging import Logger
|
||||||
|
import asyncio
|
||||||
|
|
||||||
from moviepy import VideoFileClip
|
from moviepy import VideoFileClip
|
||||||
|
|
||||||
@ -8,26 +9,26 @@ from moviepy import VideoFileClip
|
|||||||
# Configure logging
|
# Configure logging
|
||||||
logger: Logger = logging.getLogger(__name__)
|
logger: Logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
def get_video_duration(video_path: str) -> float:
|
async def get_video_duration(video_path: str) -> float:
|
||||||
"""
|
"""
|
||||||
Get the duration of a video file in seconds.
|
Get the duration of a video file in seconds.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
video_path (str): The path to the video file.
|
video_path (`str`): The path to the video file.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
float: The duration of the video in seconds.
|
`float`: The duration of the video in seconds.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
FileNotFoundError: If the video file does not exist.
|
`FileNotFoundError`: If the video file does not exist.
|
||||||
RuntimeError: If an error occurs during processing.
|
`RuntimeError`: If an error occurs during processing.
|
||||||
"""
|
"""
|
||||||
if not os.path.exists(video_path):
|
if not os.path.exists(video_path):
|
||||||
logger.error(f"Video file {video_path} does not exist.")
|
logger.error(f"Video file {video_path} does not exist.")
|
||||||
raise FileNotFoundError(f"Video file {video_path} does not exist.")
|
raise FileNotFoundError(f"Video file {video_path} does not exist.")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
video_clip = VideoFileClip(video_path)
|
video_clip: VideoFileClip = await asyncio.to_thread(VideoFileClip, video_path)
|
||||||
duration = video_clip.duration
|
duration = video_clip.duration
|
||||||
logger.info(f"Duration of video {video_path}: {duration} seconds.")
|
logger.info(f"Duration of video {video_path}: {duration} seconds.")
|
||||||
return duration
|
return duration
|
||||||
@ -35,23 +36,22 @@ def get_video_duration(video_path: str) -> float:
|
|||||||
logger.error(f"Failed to get video duration: {e}", exc_info=True)
|
logger.error(f"Failed to get video duration: {e}", exc_info=True)
|
||||||
raise RuntimeError(f"Failed to get video duration: {e}")
|
raise RuntimeError(f"Failed to get video duration: {e}")
|
||||||
finally:
|
finally:
|
||||||
video_clip.close()
|
await asyncio.to_thread(video_clip.close)
|
||||||
|
|
||||||
|
async def extract_audio_from_video(video_path: str, output_dir: str) -> str:
|
||||||
def extract_audio_from_video(video_path: str, output_dir: str) -> str:
|
|
||||||
"""
|
"""
|
||||||
Extracts the audio track from a video file and saves it as a WAV file.
|
Extracts the audio track from a video file and saves it as a WAV file.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
video_path (str): The path to the video file.
|
video_path (`str`): The path to the video file.
|
||||||
output_dir (str): The directory where the audio file will be saved.
|
output_dir (`str`): The directory where the audio file will be saved.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: The path to the extracted audio file.
|
`str`: The path to the extracted audio file.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
FileNotFoundError: If the video file does not exist.
|
`FileNotFoundError`: If the video file does not exist.
|
||||||
RuntimeError: If an error occurs during audio extraction.
|
`RuntimeError`: If an error occurs during audio extraction.
|
||||||
"""
|
"""
|
||||||
if not os.path.exists(video_path):
|
if not os.path.exists(video_path):
|
||||||
logger.error(f"Video file {video_path} does not exist.")
|
logger.error(f"Video file {video_path} does not exist.")
|
||||||
@ -59,13 +59,13 @@ def extract_audio_from_video(video_path: str, output_dir: str) -> str:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
logger.info(f"Extracting audio from video: {video_path}")
|
logger.info(f"Extracting audio from video: {video_path}")
|
||||||
video_clip = VideoFileClip(video_path)
|
video_clip: VideoFileClip = await asyncio.to_thread(VideoFileClip, video_path)
|
||||||
audio_path = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(video_path))[0]}.wav")
|
audio_path: str = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(video_path))[0]}.wav")
|
||||||
video_clip.audio.write_audiofile(audio_path) # type: ignore
|
await asyncio.to_thread(video_clip.audio.write_audiofile, audio_path) # type: ignore
|
||||||
logger.info(f"Audio extracted and saved to: {audio_path}")
|
logger.info(f"Audio extracted and saved to: {audio_path}")
|
||||||
return audio_path
|
return audio_path
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to extract audio from video: {e}", exc_info=True)
|
logger.error(f"Failed to extract audio from video: {e}", exc_info=True)
|
||||||
raise RuntimeError(f"Failed to extract audio from video: {e}")
|
raise RuntimeError(f"Failed to extract audio from video: {e}")
|
||||||
finally:
|
finally:
|
||||||
video_clip.close()
|
await asyncio.to_thread(video_clip.close)
|
||||||
|
Loading…
Reference in New Issue
Block a user