feat: add Google translate
This commit is contained in:
parent
e6dfafb8c7
commit
d24b50c2fa
15
main.py
15
main.py
@ -1,6 +1,10 @@
|
||||
import logging
|
||||
from logging import Logger
|
||||
|
||||
from src.utils import logging_configuration
|
||||
from src.integrations.gigachat_api_client import GigaChatClient
|
||||
from src.integrations.google_translate_client import GoogleTranslateClient
|
||||
from src.bot.telegram_userbot import TelegramUserBot
|
||||
from src.utils import logging
|
||||
from src.core.configuration import config
|
||||
|
||||
|
||||
@ -9,22 +13,25 @@ def main() -> None:
|
||||
Entry point for starting the Telegram user bot.
|
||||
"""
|
||||
# Configure logging
|
||||
logging.setup_logging()
|
||||
logging_configuration.setup_logging()
|
||||
logger: Logger = logging.getLogger(__name__)
|
||||
|
||||
# Load API credentials and configuration
|
||||
api_id: str = config.API_ID
|
||||
api_hash: str = config.API_HASH
|
||||
api_token: str = config.API_GIGACHAT_TOKEN
|
||||
|
||||
# Initialize GigaChatClient
|
||||
# Initialize services
|
||||
gigachat_client: GigaChatClient = GigaChatClient(api_token=api_token)
|
||||
translate_client: GoogleTranslateClient = GoogleTranslateClient(logger)
|
||||
|
||||
# Initialize and run the Telegram user bot
|
||||
bot: TelegramUserBot = TelegramUserBot(
|
||||
session_name="userbot",
|
||||
api_id=api_id,
|
||||
api_hash=api_hash,
|
||||
gigachat_client=gigachat_client
|
||||
gigachat_client=gigachat_client,
|
||||
translate_client=translate_client
|
||||
)
|
||||
bot.run()
|
||||
|
||||
|
57
poetry.lock
generated
57
poetry.lock
generated
@ -357,6 +357,23 @@ files = [
|
||||
httpx = "<1"
|
||||
pydantic = ">=1"
|
||||
|
||||
[[package]]
|
||||
name = "googletrans"
|
||||
version = "4.0.2"
|
||||
description = "An unofficial Google Translate API for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "googletrans-4.0.2-py3-none-any.whl", hash = "sha256:19e4fbbf7463e0cf4cd8f03479372910368730ac13dfb023fed6db58fd093547"},
|
||||
{file = "googletrans-4.0.2.tar.gz", hash = "sha256:d9ef126b5d92fabeec0bb9ddcdbeecd43865fc00e17f1dfa07717837827a17de"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
httpx = {version = ">=0.27.2", extras = ["http2"]}
|
||||
|
||||
[package.extras]
|
||||
dev = ["pytest", "pytest-asyncio", "pytest-cov", "ruff (>=0.7)"]
|
||||
|
||||
[[package]]
|
||||
name = "h11"
|
||||
version = "0.14.0"
|
||||
@ -368,6 +385,32 @@ files = [
|
||||
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "h2"
|
||||
version = "4.1.0"
|
||||
description = "HTTP/2 State-Machine based protocol implementation"
|
||||
optional = false
|
||||
python-versions = ">=3.6.1"
|
||||
files = [
|
||||
{file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"},
|
||||
{file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
hpack = ">=4.0,<5"
|
||||
hyperframe = ">=6.0,<7"
|
||||
|
||||
[[package]]
|
||||
name = "hpack"
|
||||
version = "4.0.0"
|
||||
description = "Pure-Python HPACK header compression"
|
||||
optional = false
|
||||
python-versions = ">=3.6.1"
|
||||
files = [
|
||||
{file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"},
|
||||
{file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "httpcore"
|
||||
version = "1.0.7"
|
||||
@ -403,6 +446,7 @@ files = [
|
||||
[package.dependencies]
|
||||
anyio = "*"
|
||||
certifi = "*"
|
||||
h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""}
|
||||
httpcore = "==1.*"
|
||||
idna = "*"
|
||||
|
||||
@ -413,6 +457,17 @@ http2 = ["h2 (>=3,<5)"]
|
||||
socks = ["socksio (==1.*)"]
|
||||
zstd = ["zstandard (>=0.18.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "hyperframe"
|
||||
version = "6.0.1"
|
||||
description = "HTTP/2 framing layer for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.6.1"
|
||||
files = [
|
||||
{file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"},
|
||||
{file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "3.10"
|
||||
@ -2091,4 +2146,4 @@ test = ["pytest (>=6.0.0)", "setuptools (>=65)"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.12"
|
||||
content-hash = "63b67bba63099fa49496a404a896eacea5838f9bdeee5458e8c18cdece481273"
|
||||
content-hash = "87f57e9572ea0a8daabc9ce352fc92ad32d8ecdd936d6f2a39ecec422dc88873"
|
||||
|
@ -24,6 +24,7 @@ llvmlite = "0.43.0"
|
||||
numba = "0.60.0"
|
||||
openai-whisper = "^20240930"
|
||||
moviepy = "^2.1.1"
|
||||
googletrans = "^4.0.2"
|
||||
|
||||
|
||||
[build-system]
|
||||
|
@ -2,3 +2,4 @@ from src.bot.handlers.abstract_command_handler import AbstractCommandHandler
|
||||
from src.bot.handlers.ai_command_handler import AICommandHandler
|
||||
from src.bot.handlers.voice_command_handler import VoiceCommandHandler
|
||||
from src.bot.handlers.video_command_handler import VideoCommandHandler
|
||||
from src.bot.handlers.translate_command_handler import TranslateCommandHandler
|
||||
|
217
src/bot/handlers/translate_command_handler.py
Normal file
217
src/bot/handlers/translate_command_handler.py
Normal file
@ -0,0 +1,217 @@
|
||||
import re
|
||||
from re import Match
|
||||
from logging import Logger
|
||||
|
||||
from pyrogram import filters
|
||||
from pyrogram.filters import Filter
|
||||
from pyrogram.client import Client
|
||||
from pyrogram.types import Message
|
||||
from pyrogram.enums import ChatAction
|
||||
from googletrans.models import Translated
|
||||
|
||||
from src.bot.handlers import AbstractCommandHandler
|
||||
from src.integrations.google_translate_client import GoogleTranslateClient
|
||||
|
||||
|
||||
class TranslateCommandHandler(AbstractCommandHandler):
|
||||
"""
|
||||
Command handler for the /translate command in a Pyrogram bot.
|
||||
|
||||
This handler translates text from one language to another using Google Translate.
|
||||
|
||||
Attributes:
|
||||
COMMAND (`str`): The name of the command that this handler handles.
|
||||
LANGUAGE_ALIASES (`dict`): Mapping of language codes to their aliases.
|
||||
DEFAULT_SOURCE_LANGUAGE (`str`): Default source language for translation ("auto" for auto-detection).
|
||||
DEFAULT_DESTINATION_LANGUAGE (`str`): Default destination language code for translation.
|
||||
logger (`Logger`): Logger instance for logging.
|
||||
translate_client (`GoogleTranslateClient`): Client for interacting with Google Translate.
|
||||
"""
|
||||
|
||||
# Mapping of language codes to their aliases
|
||||
LANGUAGE_ALIASES: dict[str, list[str]] = {
|
||||
"ru": ["ru", "rus", "russian"],
|
||||
"en": ["en", "eng", "english"],
|
||||
"es": ["es", "spa", "spanish"],
|
||||
"de": ["de", "ger", "german"],
|
||||
"fr": ["fr", "fra", "french"],
|
||||
"pt": ["pt", "por", "portuguese"],
|
||||
"it": ["it", "ita", "italian"],
|
||||
"zh": ["zh", "chi", "chinese"],
|
||||
"ja": ["ja", "jpn", "japanese"],
|
||||
"ko": ["ko", "kor", "korean"],
|
||||
"ar": ["ar", "ara", "arabic"],
|
||||
"tr": ["tr", "tur", "turkish"],
|
||||
"hi": ["hi", "hin", "hindi"],
|
||||
"vi": ["vi", "vie", "vietnamese"],
|
||||
"sv": ["sv", "swe", "swedish"],
|
||||
"no": ["no", "nor", "norwegian"],
|
||||
"da": ["da", "dan", "danish"],
|
||||
"fi": ["fi", "fin", "finnish"],
|
||||
"cs": ["cs", "cze", "czech"],
|
||||
"sk": ["sk", "slo", "slovak"],
|
||||
"ro": ["ro", "rum", "romanian"],
|
||||
"bg": ["bg", "bul", "bulgarian"],
|
||||
"uk": ["uk", "ukr", "ukrainian"],
|
||||
"be": ["be", "bel", "belarusian"],
|
||||
"et": ["et", "est", "estonian"],
|
||||
"lv": ["lv", "lav", "latvian"],
|
||||
"lt": ["lt", "lit", "lithuanian"],
|
||||
"tt": ["tt", "tat", "tatar"],
|
||||
"cv": ["cv", "chv", "chuvash"],
|
||||
}
|
||||
|
||||
# Default source language for translation ("auto" for auto-detection)
|
||||
DEFAULT_SOURCE_LANGUAGE: str = "auto"
|
||||
|
||||
# Default destination language code for translation
|
||||
DEFAULT_DESTINATION_LANGUAGE: str = "ru"
|
||||
|
||||
def __init__(self, logger: Logger, translate_client: GoogleTranslateClient) -> None:
|
||||
"""
|
||||
Initializes the TranslateCommandHandler.
|
||||
|
||||
Args:
|
||||
logger (`Logger`): Logger instance for logging events.
|
||||
translate_client (`GoogleTranslateClient`): Client for interacting with Google Translate.
|
||||
"""
|
||||
self.logger: Logger = logger
|
||||
self.translate_client: GoogleTranslateClient = translate_client
|
||||
self.logger.info("TranslateCommandHandler initialized successfully.")
|
||||
|
||||
@property
|
||||
def COMMAND(self) -> str:
|
||||
"""
|
||||
The name of the command that this handler handles.
|
||||
"""
|
||||
return "translate"
|
||||
|
||||
def get_filters(self) -> Filter:
|
||||
"""
|
||||
Returns the filter for the /translate command.
|
||||
|
||||
Returns:
|
||||
`pyrogram.filters.Filter`: A Pyrogram filter matching the /translate command.
|
||||
"""
|
||||
return filters.command(self.COMMAND)
|
||||
|
||||
async def handle(self, client: Client, message: Message) -> None:
|
||||
"""
|
||||
Handles the /translate command.
|
||||
|
||||
Translates a given text or text from a replied-to message from one language to another.
|
||||
|
||||
Args:
|
||||
client (`pyrogram.client.Client`): The Pyrogram client instance.
|
||||
message (`pyrogram.types.Message`): The incoming message object to process.
|
||||
"""
|
||||
self.logger.info(
|
||||
"Received /%s command from chat_id=%s.", self.COMMAND, message.chat.id
|
||||
)
|
||||
|
||||
# Default values
|
||||
source_language: str = self.DEFAULT_SOURCE_LANGUAGE
|
||||
destination_language: str = self.DEFAULT_DESTINATION_LANGUAGE
|
||||
text: str | None = None
|
||||
|
||||
# Parse optional arguments using regex
|
||||
match_src: Match[str] | None = re.search(r"(?:src=|source=)(\w+)", message.text)
|
||||
match_dest: Match[str] | None = re.search(r"(?:dest=|destination=)(\w+)", message.text)
|
||||
|
||||
if match_src:
|
||||
source_language = match_src.group(1)
|
||||
|
||||
if match_dest:
|
||||
destination_language = match_dest.group(1)
|
||||
|
||||
# Extract text (everything after the last optional parameter)
|
||||
text_parts: str = re.sub(rf"(?:/{self.COMMAND}|src=\w+|source=\w+|dest=\w+|destination=\w+)", "", message.text).strip()
|
||||
|
||||
if text_parts:
|
||||
text = text_parts
|
||||
|
||||
self.logger.debug(
|
||||
"Parsed parameters - source_language: %s, destination_language: %s, text length: %s",
|
||||
source_language, destination_language, len(text) if text else 0
|
||||
)
|
||||
|
||||
# Resolve language aliases
|
||||
try:
|
||||
if source_language != self.DEFAULT_SOURCE_LANGUAGE:
|
||||
source_language = self.__resolve_language(source_language)
|
||||
destination_language = self.__resolve_language(destination_language)
|
||||
except ValueError:
|
||||
await message.reply("Invalid language parameter provided.", quote=True)
|
||||
self.logger.error("Invalid language parameter provided.", exc_info=True)
|
||||
return
|
||||
|
||||
# Use replied message text if no text is provided
|
||||
if not text and message.reply_to_message and message.reply_to_message.text:
|
||||
text = message.reply_to_message.text
|
||||
|
||||
if not text:
|
||||
await message.reply(
|
||||
f"Please provide a message after /{self.COMMAND} or reply to a message.",
|
||||
quote=True
|
||||
)
|
||||
self.logger.warning(
|
||||
"No argument provided for /%s command in chat_id=%s.", self.COMMAND, message.chat.id
|
||||
)
|
||||
return
|
||||
|
||||
# Notify the user that the translation is in progress
|
||||
processing_message: Message = await message.reply(
|
||||
"Translating text...", quote=True
|
||||
)
|
||||
|
||||
try:
|
||||
# Perform translation
|
||||
await client.send_chat_action(message.chat.id, ChatAction.TYPING)
|
||||
translation_result: Translated = await self.translate_client.translate_text(
|
||||
text=text,
|
||||
src_lang=source_language,
|
||||
dest_lang=destination_language
|
||||
)
|
||||
self.logger.debug(f"Translating text for chat_id={message.chat.id}")
|
||||
|
||||
# Formatted response text
|
||||
caption: str = f"Translated from {translation_result.src} to {translation_result.dest}"
|
||||
response_text: str = (
|
||||
f"<pre language=\"{caption}\">"
|
||||
f"{translation_result.text}"
|
||||
"</pre>"
|
||||
)
|
||||
|
||||
await processing_message.edit_text(response_text)
|
||||
|
||||
except Exception as error:
|
||||
self.logger.error(
|
||||
"Error processing /%s command for chat_id=%s: %s",
|
||||
self.COMMAND, message.chat.id, error,
|
||||
exc_info=True
|
||||
)
|
||||
await processing_message.edit_text(
|
||||
"An error occurred during the translation process. Please try again later."
|
||||
)
|
||||
finally:
|
||||
await client.send_chat_action(message.chat.id, ChatAction.CANCEL)
|
||||
|
||||
def __resolve_language(self, language_input: str) -> str:
|
||||
"""
|
||||
Resolves the language code based on the input text.
|
||||
|
||||
Args:
|
||||
language_input (`str`): User-provided language parameter.
|
||||
|
||||
Returns:
|
||||
`str`: The resolved language code.
|
||||
|
||||
Raises:
|
||||
`ValueError`: If the input does not match any supported language.
|
||||
"""
|
||||
normalized_input: str = language_input.lower()
|
||||
for language_code, aliases in self.LANGUAGE_ALIASES.items():
|
||||
if normalized_input in aliases:
|
||||
return language_code
|
||||
self.logger.warning("Invalid language parameter provided: %s", language_input)
|
||||
raise ValueError("Invalid language parameter provided: %s", language_input)
|
@ -4,11 +4,12 @@ from logging import Logger
|
||||
from pyrogram.client import Client
|
||||
|
||||
from src.integrations.gigachat_api_client import GigaChatClient
|
||||
|
||||
from src.integrations.google_translate_client import GoogleTranslateClient
|
||||
from src.bot.handlers import AbstractCommandHandler
|
||||
from src.bot.handlers import AICommandHandler
|
||||
from src.bot.handlers import VoiceCommandHandler
|
||||
from src.bot.handlers import VideoCommandHandler
|
||||
from src.bot.handlers import TranslateCommandHandler
|
||||
|
||||
|
||||
class TelegramUserBot:
|
||||
@ -20,7 +21,7 @@ class TelegramUserBot:
|
||||
gigachat_client (`GigaChatClient`): The client instance for GigaChat integration.
|
||||
"""
|
||||
|
||||
def __init__(self, session_name: str, api_id: str, api_hash: str, gigachat_client: GigaChatClient) -> None:
|
||||
def __init__(self, session_name: str, api_id: str, api_hash: str, gigachat_client: GigaChatClient, translate_client: GoogleTranslateClient) -> None:
|
||||
"""
|
||||
Initializes the Telegram user bot.
|
||||
|
||||
@ -40,6 +41,7 @@ class TelegramUserBot:
|
||||
"ai": AICommandHandler(self.logger, gigachat_client),
|
||||
"voice": VoiceCommandHandler(self.logger),
|
||||
"video": VideoCommandHandler(self.logger),
|
||||
"translate": TranslateCommandHandler(self.logger, translate_client),
|
||||
}
|
||||
self.register_handlers()
|
||||
|
||||
|
106
src/integrations/google_translate_client.py
Normal file
106
src/integrations/google_translate_client.py
Normal file
@ -0,0 +1,106 @@
|
||||
from logging import Logger
|
||||
|
||||
from googletrans import Translator, LANGUAGES
|
||||
from googletrans.models import Translated, Detected
|
||||
|
||||
|
||||
class GoogleTranslateClient:
|
||||
"""
|
||||
A client for interacting with Google Translate.
|
||||
"""
|
||||
|
||||
def __init__(self, logger: Logger) -> None:
|
||||
"""
|
||||
Initializes the client for interacting with Google Translate.
|
||||
|
||||
Args:
|
||||
logger (`logging.Logger`): Logger instance for logging.
|
||||
"""
|
||||
self.logger: Logger = logger
|
||||
self.translator: Translator = Translator()
|
||||
self.logger.info("Google Translate client initialized successfully.")
|
||||
|
||||
async def get_available_languages(self) -> dict[str, str]:
|
||||
"""
|
||||
Retrieves a list of available languages supported by Google Translate.
|
||||
|
||||
Returns:
|
||||
`dict[str, str]`: A dictionary where keys are language codes and values are language names.
|
||||
"""
|
||||
self.logger.info("Retrieving available languages.")
|
||||
return LANGUAGES
|
||||
|
||||
async def detect_language(self, text: str) -> Detected:
|
||||
"""
|
||||
Detects the language of a given text.
|
||||
|
||||
Args:
|
||||
text (`str`): The text for language detection.
|
||||
|
||||
Returns:
|
||||
`googletrans.models.Detected`: The detection object containing the detected language and confidence.
|
||||
"""
|
||||
try:
|
||||
self.logger.info("Detecting language for text.")
|
||||
detection: Detected = await self.translator.detect(text)
|
||||
self.logger.debug("Detection language completed successfully.")
|
||||
return detection
|
||||
except Exception as e:
|
||||
self.logger.error("Error during language detection: %s", e, exc_info=True)
|
||||
raise RuntimeError(f"Error during language detection: {e}")
|
||||
|
||||
async def translate_text(
|
||||
self,
|
||||
text: str,
|
||||
dest_lang: str = "ru",
|
||||
src_lang: str = "auto"
|
||||
) -> Translated:
|
||||
"""
|
||||
Translates a given text to the target language.
|
||||
|
||||
Args:
|
||||
text (`str`): The text to be translated.
|
||||
dest_lang (`str`): The target language code (e.g., 'ru' for Russian). Defaults to 'ru'.
|
||||
src_lang (`str`): The source language code. Defaults to 'auto' for automatic detection.
|
||||
|
||||
Returns:
|
||||
`googletrans.models.Translated`: The translation object containing the translated text and metadata.
|
||||
"""
|
||||
try:
|
||||
self.logger.info("Translating text to %s from %s.", dest_lang, src_lang)
|
||||
translation: Translated = await self.translator.translate(
|
||||
text, dest_lang, src_lang
|
||||
)
|
||||
self.logger.info("Translation completed successfully.")
|
||||
return translation
|
||||
except Exception as e:
|
||||
self.logger.error("Error during translation: %s", e, exc_info=True)
|
||||
raise RuntimeError(f"Error during translation: {e}")
|
||||
|
||||
async def translate_batch(
|
||||
self,
|
||||
texts: list[str],
|
||||
dest_lang: str = "ru",
|
||||
src_lang: str = "auto"
|
||||
) -> list[Translated]:
|
||||
"""
|
||||
Translates a list of texts to the target language.
|
||||
|
||||
Args:
|
||||
texts (`list[str]`): A list of texts to be translated.
|
||||
dest_lang (`str`): The target language code (e.g., 'ru' for Russian). Defaults to 'ru'.
|
||||
src_lang (`str`): The source language code. Defaults to 'auto' for automatic detection.
|
||||
|
||||
Returns:
|
||||
`list[googletrans.models.Translated]`: A list of translation objects containing the translated texts and metadata.
|
||||
"""
|
||||
try:
|
||||
self.logger.info("Translating batch of %d texts to %s from %s.", len(texts), dest_lang, src_lang)
|
||||
translations: list[Translated] = await self.translator.translate(
|
||||
texts, dest_lang, src_lang
|
||||
)
|
||||
self.logger.info("Batch translation completed successfully.")
|
||||
return translations
|
||||
except Exception as e:
|
||||
self.logger.error("Error during batch translation: %s", e, exc_info=True)
|
||||
raise RuntimeError(f"Error during batch translation: {e}")
|
Loading…
x
Reference in New Issue
Block a user