diff --git a/main.py b/main.py index 1638d7a..bf5c906 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,10 @@ +import logging +from logging import Logger + +from src.utils import logging_configuration from src.integrations.gigachat_api_client import GigaChatClient +from src.integrations.google_translate_client import GoogleTranslateClient from src.bot.telegram_userbot import TelegramUserBot -from src.utils import logging from src.core.configuration import config @@ -9,22 +13,25 @@ def main() -> None: Entry point for starting the Telegram user bot. """ # Configure logging - logging.setup_logging() + logging_configuration.setup_logging() + logger: Logger = logging.getLogger(__name__) # Load API credentials and configuration api_id: str = config.API_ID api_hash: str = config.API_HASH api_token: str = config.API_GIGACHAT_TOKEN - # Initialize GigaChatClient + # Initialize services gigachat_client: GigaChatClient = GigaChatClient(api_token=api_token) + translate_client: GoogleTranslateClient = GoogleTranslateClient(logger) # Initialize and run the Telegram user bot bot: TelegramUserBot = TelegramUserBot( session_name="userbot", api_id=api_id, api_hash=api_hash, - gigachat_client=gigachat_client + gigachat_client=gigachat_client, + translate_client=translate_client ) bot.run() diff --git a/poetry.lock b/poetry.lock index 99f3153..4e4232f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -357,6 +357,23 @@ files = [ httpx = "<1" pydantic = ">=1" +[[package]] +name = "googletrans" +version = "4.0.2" +description = "An unofficial Google Translate API for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "googletrans-4.0.2-py3-none-any.whl", hash = "sha256:19e4fbbf7463e0cf4cd8f03479372910368730ac13dfb023fed6db58fd093547"}, + {file = "googletrans-4.0.2.tar.gz", hash = "sha256:d9ef126b5d92fabeec0bb9ddcdbeecd43865fc00e17f1dfa07717837827a17de"}, +] + +[package.dependencies] +httpx = {version = ">=0.27.2", extras = ["http2"]} + +[package.extras] +dev = ["pytest", "pytest-asyncio", "pytest-cov", "ruff (>=0.7)"] + [[package]] name = "h11" version = "0.14.0" @@ -368,6 +385,32 @@ files = [ {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, ] +[[package]] +name = "h2" +version = "4.1.0" +description = "HTTP/2 State-Machine based protocol implementation" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, + {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, +] + +[package.dependencies] +hpack = ">=4.0,<5" +hyperframe = ">=6.0,<7" + +[[package]] +name = "hpack" +version = "4.0.0" +description = "Pure-Python HPACK header compression" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, + {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, +] + [[package]] name = "httpcore" version = "1.0.7" @@ -403,6 +446,7 @@ files = [ [package.dependencies] anyio = "*" certifi = "*" +h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""} httpcore = "==1.*" idna = "*" @@ -413,6 +457,17 @@ http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "hyperframe" +version = "6.0.1" +description = "HTTP/2 framing layer for Python" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, + {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, +] + [[package]] name = "idna" version = "3.10" @@ -2091,4 +2146,4 @@ test = ["pytest (>=6.0.0)", "setuptools (>=65)"] [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "63b67bba63099fa49496a404a896eacea5838f9bdeee5458e8c18cdece481273" +content-hash = "87f57e9572ea0a8daabc9ce352fc92ad32d8ecdd936d6f2a39ecec422dc88873" diff --git a/pyproject.toml b/pyproject.toml index 551a67e..c0601e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ llvmlite = "0.43.0" numba = "0.60.0" openai-whisper = "^20240930" moviepy = "^2.1.1" +googletrans = "^4.0.2" [build-system] diff --git a/src/bot/handlers/__init__.py b/src/bot/handlers/__init__.py index 28b5694..80f5476 100644 --- a/src/bot/handlers/__init__.py +++ b/src/bot/handlers/__init__.py @@ -2,3 +2,4 @@ from src.bot.handlers.abstract_command_handler import AbstractCommandHandler from src.bot.handlers.ai_command_handler import AICommandHandler from src.bot.handlers.voice_command_handler import VoiceCommandHandler from src.bot.handlers.video_command_handler import VideoCommandHandler +from src.bot.handlers.translate_command_handler import TranslateCommandHandler diff --git a/src/bot/handlers/translate_command_handler.py b/src/bot/handlers/translate_command_handler.py new file mode 100644 index 0000000..37e3fb1 --- /dev/null +++ b/src/bot/handlers/translate_command_handler.py @@ -0,0 +1,217 @@ +import re +from re import Match +from logging import Logger + +from pyrogram import filters +from pyrogram.filters import Filter +from pyrogram.client import Client +from pyrogram.types import Message +from pyrogram.enums import ChatAction +from googletrans.models import Translated + +from src.bot.handlers import AbstractCommandHandler +from src.integrations.google_translate_client import GoogleTranslateClient + + +class TranslateCommandHandler(AbstractCommandHandler): + """ + Command handler for the /translate command in a Pyrogram bot. + + This handler translates text from one language to another using Google Translate. + + Attributes: + COMMAND (`str`): The name of the command that this handler handles. + LANGUAGE_ALIASES (`dict`): Mapping of language codes to their aliases. + DEFAULT_SOURCE_LANGUAGE (`str`): Default source language for translation ("auto" for auto-detection). + DEFAULT_DESTINATION_LANGUAGE (`str`): Default destination language code for translation. + logger (`Logger`): Logger instance for logging. + translate_client (`GoogleTranslateClient`): Client for interacting with Google Translate. + """ + + # Mapping of language codes to their aliases + LANGUAGE_ALIASES: dict[str, list[str]] = { + "ru": ["ru", "rus", "russian"], + "en": ["en", "eng", "english"], + "es": ["es", "spa", "spanish"], + "de": ["de", "ger", "german"], + "fr": ["fr", "fra", "french"], + "pt": ["pt", "por", "portuguese"], + "it": ["it", "ita", "italian"], + "zh": ["zh", "chi", "chinese"], + "ja": ["ja", "jpn", "japanese"], + "ko": ["ko", "kor", "korean"], + "ar": ["ar", "ara", "arabic"], + "tr": ["tr", "tur", "turkish"], + "hi": ["hi", "hin", "hindi"], + "vi": ["vi", "vie", "vietnamese"], + "sv": ["sv", "swe", "swedish"], + "no": ["no", "nor", "norwegian"], + "da": ["da", "dan", "danish"], + "fi": ["fi", "fin", "finnish"], + "cs": ["cs", "cze", "czech"], + "sk": ["sk", "slo", "slovak"], + "ro": ["ro", "rum", "romanian"], + "bg": ["bg", "bul", "bulgarian"], + "uk": ["uk", "ukr", "ukrainian"], + "be": ["be", "bel", "belarusian"], + "et": ["et", "est", "estonian"], + "lv": ["lv", "lav", "latvian"], + "lt": ["lt", "lit", "lithuanian"], + "tt": ["tt", "tat", "tatar"], + "cv": ["cv", "chv", "chuvash"], + } + + # Default source language for translation ("auto" for auto-detection) + DEFAULT_SOURCE_LANGUAGE: str = "auto" + + # Default destination language code for translation + DEFAULT_DESTINATION_LANGUAGE: str = "ru" + + def __init__(self, logger: Logger, translate_client: GoogleTranslateClient) -> None: + """ + Initializes the TranslateCommandHandler. + + Args: + logger (`Logger`): Logger instance for logging events. + translate_client (`GoogleTranslateClient`): Client for interacting with Google Translate. + """ + self.logger: Logger = logger + self.translate_client: GoogleTranslateClient = translate_client + self.logger.info("TranslateCommandHandler initialized successfully.") + + @property + def COMMAND(self) -> str: + """ + The name of the command that this handler handles. + """ + return "translate" + + def get_filters(self) -> Filter: + """ + Returns the filter for the /translate command. + + Returns: + `pyrogram.filters.Filter`: A Pyrogram filter matching the /translate command. + """ + return filters.command(self.COMMAND) + + async def handle(self, client: Client, message: Message) -> None: + """ + Handles the /translate command. + + Translates a given text or text from a replied-to message from one language to another. + + Args: + client (`pyrogram.client.Client`): The Pyrogram client instance. + message (`pyrogram.types.Message`): The incoming message object to process. + """ + self.logger.info( + "Received /%s command from chat_id=%s.", self.COMMAND, message.chat.id + ) + + # Default values + source_language: str = self.DEFAULT_SOURCE_LANGUAGE + destination_language: str = self.DEFAULT_DESTINATION_LANGUAGE + text: str | None = None + + # Parse optional arguments using regex + match_src: Match[str] | None = re.search(r"(?:src=|source=)(\w+)", message.text) + match_dest: Match[str] | None = re.search(r"(?:dest=|destination=)(\w+)", message.text) + + if match_src: + source_language = match_src.group(1) + + if match_dest: + destination_language = match_dest.group(1) + + # Extract text (everything after the last optional parameter) + text_parts: str = re.sub(rf"(?:/{self.COMMAND}|src=\w+|source=\w+|dest=\w+|destination=\w+)", "", message.text).strip() + + if text_parts: + text = text_parts + + self.logger.debug( + "Parsed parameters - source_language: %s, destination_language: %s, text length: %s", + source_language, destination_language, len(text) if text else 0 + ) + + # Resolve language aliases + try: + if source_language != self.DEFAULT_SOURCE_LANGUAGE: + source_language = self.__resolve_language(source_language) + destination_language = self.__resolve_language(destination_language) + except ValueError: + await message.reply("Invalid language parameter provided.", quote=True) + self.logger.error("Invalid language parameter provided.", exc_info=True) + return + + # Use replied message text if no text is provided + if not text and message.reply_to_message and message.reply_to_message.text: + text = message.reply_to_message.text + + if not text: + await message.reply( + f"Please provide a message after /{self.COMMAND} or reply to a message.", + quote=True + ) + self.logger.warning( + "No argument provided for /%s command in chat_id=%s.", self.COMMAND, message.chat.id + ) + return + + # Notify the user that the translation is in progress + processing_message: Message = await message.reply( + "Translating text...", quote=True + ) + + try: + # Perform translation + await client.send_chat_action(message.chat.id, ChatAction.TYPING) + translation_result: Translated = await self.translate_client.translate_text( + text=text, + src_lang=source_language, + dest_lang=destination_language + ) + self.logger.debug(f"Translating text for chat_id={message.chat.id}") + + # Formatted response text + caption: str = f"Translated from {translation_result.src} to {translation_result.dest}" + response_text: str = ( + f"
" + f"{translation_result.text}" + "" + ) + + await processing_message.edit_text(response_text) + + except Exception as error: + self.logger.error( + "Error processing /%s command for chat_id=%s: %s", + self.COMMAND, message.chat.id, error, + exc_info=True + ) + await processing_message.edit_text( + "An error occurred during the translation process. Please try again later." + ) + finally: + await client.send_chat_action(message.chat.id, ChatAction.CANCEL) + + def __resolve_language(self, language_input: str) -> str: + """ + Resolves the language code based on the input text. + + Args: + language_input (`str`): User-provided language parameter. + + Returns: + `str`: The resolved language code. + + Raises: + `ValueError`: If the input does not match any supported language. + """ + normalized_input: str = language_input.lower() + for language_code, aliases in self.LANGUAGE_ALIASES.items(): + if normalized_input in aliases: + return language_code + self.logger.warning("Invalid language parameter provided: %s", language_input) + raise ValueError("Invalid language parameter provided: %s", language_input) diff --git a/src/bot/telegram_userbot.py b/src/bot/telegram_userbot.py index 3c3de7e..1a077c3 100644 --- a/src/bot/telegram_userbot.py +++ b/src/bot/telegram_userbot.py @@ -4,11 +4,12 @@ from logging import Logger from pyrogram.client import Client from src.integrations.gigachat_api_client import GigaChatClient - +from src.integrations.google_translate_client import GoogleTranslateClient from src.bot.handlers import AbstractCommandHandler from src.bot.handlers import AICommandHandler from src.bot.handlers import VoiceCommandHandler from src.bot.handlers import VideoCommandHandler +from src.bot.handlers import TranslateCommandHandler class TelegramUserBot: @@ -20,7 +21,7 @@ class TelegramUserBot: gigachat_client (`GigaChatClient`): The client instance for GigaChat integration. """ - def __init__(self, session_name: str, api_id: str, api_hash: str, gigachat_client: GigaChatClient) -> None: + def __init__(self, session_name: str, api_id: str, api_hash: str, gigachat_client: GigaChatClient, translate_client: GoogleTranslateClient) -> None: """ Initializes the Telegram user bot. @@ -40,6 +41,7 @@ class TelegramUserBot: "ai": AICommandHandler(self.logger, gigachat_client), "voice": VoiceCommandHandler(self.logger), "video": VideoCommandHandler(self.logger), + "translate": TranslateCommandHandler(self.logger, translate_client), } self.register_handlers() diff --git a/src/integrations/google_translate_client.py b/src/integrations/google_translate_client.py new file mode 100644 index 0000000..eae5415 --- /dev/null +++ b/src/integrations/google_translate_client.py @@ -0,0 +1,106 @@ +from logging import Logger + +from googletrans import Translator, LANGUAGES +from googletrans.models import Translated, Detected + + +class GoogleTranslateClient: + """ + A client for interacting with Google Translate. + """ + + def __init__(self, logger: Logger) -> None: + """ + Initializes the client for interacting with Google Translate. + + Args: + logger (`logging.Logger`): Logger instance for logging. + """ + self.logger: Logger = logger + self.translator: Translator = Translator() + self.logger.info("Google Translate client initialized successfully.") + + async def get_available_languages(self) -> dict[str, str]: + """ + Retrieves a list of available languages supported by Google Translate. + + Returns: + `dict[str, str]`: A dictionary where keys are language codes and values are language names. + """ + self.logger.info("Retrieving available languages.") + return LANGUAGES + + async def detect_language(self, text: str) -> Detected: + """ + Detects the language of a given text. + + Args: + text (`str`): The text for language detection. + + Returns: + `googletrans.models.Detected`: The detection object containing the detected language and confidence. + """ + try: + self.logger.info("Detecting language for text.") + detection: Detected = await self.translator.detect(text) + self.logger.debug("Detection language completed successfully.") + return detection + except Exception as e: + self.logger.error("Error during language detection: %s", e, exc_info=True) + raise RuntimeError(f"Error during language detection: {e}") + + async def translate_text( + self, + text: str, + dest_lang: str = "ru", + src_lang: str = "auto" + ) -> Translated: + """ + Translates a given text to the target language. + + Args: + text (`str`): The text to be translated. + dest_lang (`str`): The target language code (e.g., 'ru' for Russian). Defaults to 'ru'. + src_lang (`str`): The source language code. Defaults to 'auto' for automatic detection. + + Returns: + `googletrans.models.Translated`: The translation object containing the translated text and metadata. + """ + try: + self.logger.info("Translating text to %s from %s.", dest_lang, src_lang) + translation: Translated = await self.translator.translate( + text, dest_lang, src_lang + ) + self.logger.info("Translation completed successfully.") + return translation + except Exception as e: + self.logger.error("Error during translation: %s", e, exc_info=True) + raise RuntimeError(f"Error during translation: {e}") + + async def translate_batch( + self, + texts: list[str], + dest_lang: str = "ru", + src_lang: str = "auto" + ) -> list[Translated]: + """ + Translates a list of texts to the target language. + + Args: + texts (`list[str]`): A list of texts to be translated. + dest_lang (`str`): The target language code (e.g., 'ru' for Russian). Defaults to 'ru'. + src_lang (`str`): The source language code. Defaults to 'auto' for automatic detection. + + Returns: + `list[googletrans.models.Translated]`: A list of translation objects containing the translated texts and metadata. + """ + try: + self.logger.info("Translating batch of %d texts to %s from %s.", len(texts), dest_lang, src_lang) + translations: list[Translated] = await self.translator.translate( + texts, dest_lang, src_lang + ) + self.logger.info("Batch translation completed successfully.") + return translations + except Exception as e: + self.logger.error("Error during batch translation: %s", e, exc_info=True) + raise RuntimeError(f"Error during batch translation: {e}") diff --git a/src/utils/logging.py b/src/utils/logging_configuration.py similarity index 100% rename from src/utils/logging.py rename to src/utils/logging_configuration.py