feat: add Google translate

This commit is contained in:
parent e6dfafb8c7
commit d24b50c2fa
8 changed files with 396 additions and 7 deletions

15
main.py
View File

@ -1,6 +1,10 @@
import logging
from logging import Logger
from src.utils import logging_configuration
from src.integrations.gigachat_api_client import GigaChatClient
from src.integrations.google_translate_client import GoogleTranslateClient
from src.bot.telegram_userbot import TelegramUserBot
from src.utils import logging
from src.core.configuration import config
@ -9,22 +13,25 @@ def main() -> None:
Entry point for starting the Telegram user bot.
"""
# Configure logging
logging.setup_logging()
logging_configuration.setup_logging()
logger: Logger = logging.getLogger(__name__)
# Load API credentials and configuration
api_id: str = config.API_ID
api_hash: str = config.API_HASH
api_token: str = config.API_GIGACHAT_TOKEN
# Initialize GigaChatClient
# Initialize services
gigachat_client: GigaChatClient = GigaChatClient(api_token=api_token)
translate_client: GoogleTranslateClient = GoogleTranslateClient(logger)
# Initialize and run the Telegram user bot
bot: TelegramUserBot = TelegramUserBot(
session_name="userbot",
api_id=api_id,
api_hash=api_hash,
gigachat_client=gigachat_client
gigachat_client=gigachat_client,
translate_client=translate_client
)
bot.run()

57
poetry.lock generated
View File

@ -357,6 +357,23 @@ files = [
httpx = "<1"
pydantic = ">=1"
[[package]]
name = "googletrans"
version = "4.0.2"
description = "An unofficial Google Translate API for Python"
optional = false
python-versions = ">=3.8"
files = [
{file = "googletrans-4.0.2-py3-none-any.whl", hash = "sha256:19e4fbbf7463e0cf4cd8f03479372910368730ac13dfb023fed6db58fd093547"},
{file = "googletrans-4.0.2.tar.gz", hash = "sha256:d9ef126b5d92fabeec0bb9ddcdbeecd43865fc00e17f1dfa07717837827a17de"},
]
[package.dependencies]
httpx = {version = ">=0.27.2", extras = ["http2"]}
[package.extras]
dev = ["pytest", "pytest-asyncio", "pytest-cov", "ruff (>=0.7)"]
[[package]]
name = "h11"
version = "0.14.0"
@ -368,6 +385,32 @@ files = [
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
]
[[package]]
name = "h2"
version = "4.1.0"
description = "HTTP/2 State-Machine based protocol implementation"
optional = false
python-versions = ">=3.6.1"
files = [
{file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"},
{file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"},
]
[package.dependencies]
hpack = ">=4.0,<5"
hyperframe = ">=6.0,<7"
[[package]]
name = "hpack"
version = "4.0.0"
description = "Pure-Python HPACK header compression"
optional = false
python-versions = ">=3.6.1"
files = [
{file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"},
{file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"},
]
[[package]]
name = "httpcore"
version = "1.0.7"
@ -403,6 +446,7 @@ files = [
[package.dependencies]
anyio = "*"
certifi = "*"
h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""}
httpcore = "==1.*"
idna = "*"
@ -413,6 +457,17 @@ http2 = ["h2 (>=3,<5)"]
socks = ["socksio (==1.*)"]
zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "hyperframe"
version = "6.0.1"
description = "HTTP/2 framing layer for Python"
optional = false
python-versions = ">=3.6.1"
files = [
{file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"},
{file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"},
]
[[package]]
name = "idna"
version = "3.10"
@ -2091,4 +2146,4 @@ test = ["pytest (>=6.0.0)", "setuptools (>=65)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.12"
content-hash = "63b67bba63099fa49496a404a896eacea5838f9bdeee5458e8c18cdece481273"
content-hash = "87f57e9572ea0a8daabc9ce352fc92ad32d8ecdd936d6f2a39ecec422dc88873"

View File

@ -24,6 +24,7 @@ llvmlite = "0.43.0"
numba = "0.60.0"
openai-whisper = "^20240930"
moviepy = "^2.1.1"
googletrans = "^4.0.2"
[build-system]

View File

@ -2,3 +2,4 @@ from src.bot.handlers.abstract_command_handler import AbstractCommandHandler
from src.bot.handlers.ai_command_handler import AICommandHandler
from src.bot.handlers.voice_command_handler import VoiceCommandHandler
from src.bot.handlers.video_command_handler import VideoCommandHandler
from src.bot.handlers.translate_command_handler import TranslateCommandHandler

View File

@ -0,0 +1,217 @@
import re
from re import Match
from logging import Logger
from pyrogram import filters
from pyrogram.filters import Filter
from pyrogram.client import Client
from pyrogram.types import Message
from pyrogram.enums import ChatAction
from googletrans.models import Translated
from src.bot.handlers import AbstractCommandHandler
from src.integrations.google_translate_client import GoogleTranslateClient
class TranslateCommandHandler(AbstractCommandHandler):
"""
Command handler for the /translate command in a Pyrogram bot.
This handler translates text from one language to another using Google Translate.
Attributes:
COMMAND (`str`): The name of the command that this handler handles.
LANGUAGE_ALIASES (`dict`): Mapping of language codes to their aliases.
DEFAULT_SOURCE_LANGUAGE (`str`): Default source language for translation ("auto" for auto-detection).
DEFAULT_DESTINATION_LANGUAGE (`str`): Default destination language code for translation.
logger (`Logger`): Logger instance for logging.
translate_client (`GoogleTranslateClient`): Client for interacting with Google Translate.
"""
# Mapping of language codes to their aliases
LANGUAGE_ALIASES: dict[str, list[str]] = {
"ru": ["ru", "rus", "russian"],
"en": ["en", "eng", "english"],
"es": ["es", "spa", "spanish"],
"de": ["de", "ger", "german"],
"fr": ["fr", "fra", "french"],
"pt": ["pt", "por", "portuguese"],
"it": ["it", "ita", "italian"],
"zh": ["zh", "chi", "chinese"],
"ja": ["ja", "jpn", "japanese"],
"ko": ["ko", "kor", "korean"],
"ar": ["ar", "ara", "arabic"],
"tr": ["tr", "tur", "turkish"],
"hi": ["hi", "hin", "hindi"],
"vi": ["vi", "vie", "vietnamese"],
"sv": ["sv", "swe", "swedish"],
"no": ["no", "nor", "norwegian"],
"da": ["da", "dan", "danish"],
"fi": ["fi", "fin", "finnish"],
"cs": ["cs", "cze", "czech"],
"sk": ["sk", "slo", "slovak"],
"ro": ["ro", "rum", "romanian"],
"bg": ["bg", "bul", "bulgarian"],
"uk": ["uk", "ukr", "ukrainian"],
"be": ["be", "bel", "belarusian"],
"et": ["et", "est", "estonian"],
"lv": ["lv", "lav", "latvian"],
"lt": ["lt", "lit", "lithuanian"],
"tt": ["tt", "tat", "tatar"],
"cv": ["cv", "chv", "chuvash"],
}
# Default source language for translation ("auto" for auto-detection)
DEFAULT_SOURCE_LANGUAGE: str = "auto"
# Default destination language code for translation
DEFAULT_DESTINATION_LANGUAGE: str = "ru"
def __init__(self, logger: Logger, translate_client: GoogleTranslateClient) -> None:
"""
Initializes the TranslateCommandHandler.
Args:
logger (`Logger`): Logger instance for logging events.
translate_client (`GoogleTranslateClient`): Client for interacting with Google Translate.
"""
self.logger: Logger = logger
self.translate_client: GoogleTranslateClient = translate_client
self.logger.info("TranslateCommandHandler initialized successfully.")
@property
def COMMAND(self) -> str:
"""
The name of the command that this handler handles.
"""
return "translate"
def get_filters(self) -> Filter:
"""
Returns the filter for the /translate command.
Returns:
`pyrogram.filters.Filter`: A Pyrogram filter matching the /translate command.
"""
return filters.command(self.COMMAND)
async def handle(self, client: Client, message: Message) -> None:
"""
Handles the /translate command.
Translates a given text or text from a replied-to message from one language to another.
Args:
client (`pyrogram.client.Client`): The Pyrogram client instance.
message (`pyrogram.types.Message`): The incoming message object to process.
"""
self.logger.info(
"Received /%s command from chat_id=%s.", self.COMMAND, message.chat.id
)
# Default values
source_language: str = self.DEFAULT_SOURCE_LANGUAGE
destination_language: str = self.DEFAULT_DESTINATION_LANGUAGE
text: str | None = None
# Parse optional arguments using regex
match_src: Match[str] | None = re.search(r"(?:src=|source=)(\w+)", message.text)
match_dest: Match[str] | None = re.search(r"(?:dest=|destination=)(\w+)", message.text)
if match_src:
source_language = match_src.group(1)
if match_dest:
destination_language = match_dest.group(1)
# Extract text (everything after the last optional parameter)
text_parts: str = re.sub(rf"(?:/{self.COMMAND}|src=\w+|source=\w+|dest=\w+|destination=\w+)", "", message.text).strip()
if text_parts:
text = text_parts
self.logger.debug(
"Parsed parameters - source_language: %s, destination_language: %s, text length: %s",
source_language, destination_language, len(text) if text else 0
)
# Resolve language aliases
try:
if source_language != self.DEFAULT_SOURCE_LANGUAGE:
source_language = self.__resolve_language(source_language)
destination_language = self.__resolve_language(destination_language)
except ValueError:
await message.reply("Invalid language parameter provided.", quote=True)
self.logger.error("Invalid language parameter provided.", exc_info=True)
return
# Use replied message text if no text is provided
if not text and message.reply_to_message and message.reply_to_message.text:
text = message.reply_to_message.text
if not text:
await message.reply(
f"Please provide a message after /{self.COMMAND} or reply to a message.",
quote=True
)
self.logger.warning(
"No argument provided for /%s command in chat_id=%s.", self.COMMAND, message.chat.id
)
return
# Notify the user that the translation is in progress
processing_message: Message = await message.reply(
"Translating text...", quote=True
)
try:
# Perform translation
await client.send_chat_action(message.chat.id, ChatAction.TYPING)
translation_result: Translated = await self.translate_client.translate_text(
text=text,
src_lang=source_language,
dest_lang=destination_language
)
self.logger.debug(f"Translating text for chat_id={message.chat.id}")
# Formatted response text
caption: str = f"Translated from {translation_result.src} to {translation_result.dest}"
response_text: str = (
f"<pre language=\"{caption}\">"
f"{translation_result.text}"
"</pre>"
)
await processing_message.edit_text(response_text)
except Exception as error:
self.logger.error(
"Error processing /%s command for chat_id=%s: %s",
self.COMMAND, message.chat.id, error,
exc_info=True
)
await processing_message.edit_text(
"An error occurred during the translation process. Please try again later."
)
finally:
await client.send_chat_action(message.chat.id, ChatAction.CANCEL)
def __resolve_language(self, language_input: str) -> str:
"""
Resolves the language code based on the input text.
Args:
language_input (`str`): User-provided language parameter.
Returns:
`str`: The resolved language code.
Raises:
`ValueError`: If the input does not match any supported language.
"""
normalized_input: str = language_input.lower()
for language_code, aliases in self.LANGUAGE_ALIASES.items():
if normalized_input in aliases:
return language_code
self.logger.warning("Invalid language parameter provided: %s", language_input)
raise ValueError("Invalid language parameter provided: %s", language_input)

View File

@ -4,11 +4,12 @@ from logging import Logger
from pyrogram.client import Client
from src.integrations.gigachat_api_client import GigaChatClient
from src.integrations.google_translate_client import GoogleTranslateClient
from src.bot.handlers import AbstractCommandHandler
from src.bot.handlers import AICommandHandler
from src.bot.handlers import VoiceCommandHandler
from src.bot.handlers import VideoCommandHandler
from src.bot.handlers import TranslateCommandHandler
class TelegramUserBot:
@ -20,7 +21,7 @@ class TelegramUserBot:
gigachat_client (`GigaChatClient`): The client instance for GigaChat integration.
"""
def __init__(self, session_name: str, api_id: str, api_hash: str, gigachat_client: GigaChatClient) -> None:
def __init__(self, session_name: str, api_id: str, api_hash: str, gigachat_client: GigaChatClient, translate_client: GoogleTranslateClient) -> None:
"""
Initializes the Telegram user bot.
@ -40,6 +41,7 @@ class TelegramUserBot:
"ai": AICommandHandler(self.logger, gigachat_client),
"voice": VoiceCommandHandler(self.logger),
"video": VideoCommandHandler(self.logger),
"translate": TranslateCommandHandler(self.logger, translate_client),
}
self.register_handlers()

View File

@ -0,0 +1,106 @@
from logging import Logger
from googletrans import Translator, LANGUAGES
from googletrans.models import Translated, Detected
class GoogleTranslateClient:
"""
A client for interacting with Google Translate.
"""
def __init__(self, logger: Logger) -> None:
"""
Initializes the client for interacting with Google Translate.
Args:
logger (`logging.Logger`): Logger instance for logging.
"""
self.logger: Logger = logger
self.translator: Translator = Translator()
self.logger.info("Google Translate client initialized successfully.")
async def get_available_languages(self) -> dict[str, str]:
"""
Retrieves a list of available languages supported by Google Translate.
Returns:
`dict[str, str]`: A dictionary where keys are language codes and values are language names.
"""
self.logger.info("Retrieving available languages.")
return LANGUAGES
async def detect_language(self, text: str) -> Detected:
"""
Detects the language of a given text.
Args:
text (`str`): The text for language detection.
Returns:
`googletrans.models.Detected`: The detection object containing the detected language and confidence.
"""
try:
self.logger.info("Detecting language for text.")
detection: Detected = await self.translator.detect(text)
self.logger.debug("Detection language completed successfully.")
return detection
except Exception as e:
self.logger.error("Error during language detection: %s", e, exc_info=True)
raise RuntimeError(f"Error during language detection: {e}")
async def translate_text(
self,
text: str,
dest_lang: str = "ru",
src_lang: str = "auto"
) -> Translated:
"""
Translates a given text to the target language.
Args:
text (`str`): The text to be translated.
dest_lang (`str`): The target language code (e.g., 'ru' for Russian). Defaults to 'ru'.
src_lang (`str`): The source language code. Defaults to 'auto' for automatic detection.
Returns:
`googletrans.models.Translated`: The translation object containing the translated text and metadata.
"""
try:
self.logger.info("Translating text to %s from %s.", dest_lang, src_lang)
translation: Translated = await self.translator.translate(
text, dest_lang, src_lang
)
self.logger.info("Translation completed successfully.")
return translation
except Exception as e:
self.logger.error("Error during translation: %s", e, exc_info=True)
raise RuntimeError(f"Error during translation: {e}")
async def translate_batch(
self,
texts: list[str],
dest_lang: str = "ru",
src_lang: str = "auto"
) -> list[Translated]:
"""
Translates a list of texts to the target language.
Args:
texts (`list[str]`): A list of texts to be translated.
dest_lang (`str`): The target language code (e.g., 'ru' for Russian). Defaults to 'ru'.
src_lang (`str`): The source language code. Defaults to 'auto' for automatic detection.
Returns:
`list[googletrans.models.Translated]`: A list of translation objects containing the translated texts and metadata.
"""
try:
self.logger.info("Translating batch of %d texts to %s from %s.", len(texts), dest_lang, src_lang)
translations: list[Translated] = await self.translator.translate(
texts, dest_lang, src_lang
)
self.logger.info("Batch translation completed successfully.")
return translations
except Exception as e:
self.logger.error("Error during batch translation: %s", e, exc_info=True)
raise RuntimeError(f"Error during batch translation: {e}")