In [2]:
from deep_translator import GoogleTranslator
translator_cache = {}
def get_translator(source: str, target: str) -> GoogleTranslator:
    key = f"{source}_{target}"
    if key not in translator_cache:
        translator_cache[key] = GoogleTranslator(source=source, target=target)
    return translator_cache[key]

In [None]:
t1 = get_translator('en', 'hi')  # New translator created and cached
t3 = get_translator('hi', 'en')  # New translator created for reverse

In [6]:
t1

<deep_translator.google.GoogleTranslator at 0x254e68b8550>

In [7]:
GoogleTranslator(source='en', target='hi')

<deep_translator.google.GoogleTranslator at 0x254e7f3f640>

In [10]:
import re
from typing import List, Tuple

# Common abbreviations and patterns to skip
ABBREVIATIONS = {'AI', 'ML', 'API', 'URL', 'PDF', 'HTML', 'CSS', 'JS', 'SQL', 'JSON', 'HTTP', 'HTTPS',
                 'NASA', 'FBI', 'CEO', 'CTO', 'PhD', 'MBA', 'USA', 'UK', 'UAE', 'CPU', 'GPU'}

def should_skip_translation(text: str) -> bool:
    text = text.strip()
    if len(text) < 2:
        return True
    if text.upper() in ABBREVIATIONS:
        return True
    if re.fullmatch(r'^[\d\W_]+$', text):  # numbers, symbols, punctuation
        return True
    if re.search(r'(http|www\.|@|\.com|\.pdf|\.png)', text.lower()):
        return True
    if re.fullmatch(r'[A-Z0-9_\-\.]+', text):  # like FILE_NAME_123.PDF
        return True
    if re.fullmatch(r'v?\d+(\.\d+)*([a-zA-Z]+\d*)?', text):  # versions like 1.0.2a
        return True
    return False

def preprocess_text(text: str) -> List[Tuple[str, bool]]:
    words = re.findall(r'\S+|\s+', text)
    segments = []
    current = ""
    current_flag = None

    for token in words:
        if token.isspace():
            current += token
            continue

        flag = not should_skip_translation(token)

        if current_flag is None or flag == current_flag:
            current += token
        else:
            if current.strip():
                segments.append((current, current_flag))
            current = token
        current_flag = flag

    if current.strip():
        segments.append((current, current_flag))
    return segments

def postprocess_translated_text(original: str, translated: str) -> str:
    leading_spaces = len(original) - len(original.lstrip())
    trailing_spaces = len(original) - len(original.rstrip())
    return ' ' * leading_spaces + translated.strip() + ' ' * trailing_spaces



In [None]:
def translate_text_segment(text: str, source: str, target: str) -> str:
    if not text.strip():
        return text

    translator = get_translator(source, target)
    for _ in range(3):
        try:
            translated = translator.translate(text.strip())
            if translated:
                return postprocess_translated_text(text, translated)
        except Exception:
            time.sleep(1)
    return text  # fallback if translation fails


def translate_text(text: str, source: str, target: str) -> str:
    if not text.strip() or source == target:
        return text

    return ''.join(
        translate_text_segment(seg, source, target) if do_translate else seg
        for seg, do_translate in preprocess_text(text)
    )
 

In [14]:

source_lang = 'en'
target_lang = 'hi'
sample_text = "Amalgo Labs uses AI and ML."

translated = translate_text(sample_text, source_lang, target_lang)
print("Original:", sample_text)
print("Translated:", translated)

Original: Amalgo Labs uses AI and ML.
Translated: अमलगो लैब्स का उपयोग करता है AI और ML.


In [15]:
import time
def translate_text_blocks(text_blocks: List[str], source: str, target: str, callback=None) -> List[str]:
    if not text_blocks:
        return []
    translated = []
    total = len(text_blocks)
    for i, block in enumerate(text_blocks):
        if callback:
            callback((i + 1) / total, f"Translating block {i + 1} of {total}")
        translated.append(translate_text(block, source, target))
        if i < total - 1:
            time.sleep(0.1)
    return translated

blocks = ["Hello world!", "AI is amazing.", "Visit www.google.com"]
translate_text_blocks(blocks, source="en", target="hi")


['हैलो वर्ल्ड!', 'AI अद्भुत है।', 'मिलने जाना www.google.com']

In [None]:
from typing import Optional
def detect_language(text: str) -> Optional[str]:
    sample = text[:500].strip()
    if not sample:
        return None
    hindi_chars = sum(1 for char in sample if '\u0900' <= char <= '\u097F') #count character in devnagric
    latin_chars = sum(1 for char in sample if char.isalpha() and char.isascii())
    if hindi_chars > latin_chars:
        return 'hi'
    elif latin_chars > 0:
        return 'en'
    return None

In [19]:
detect_language("यह एक परीक्षण वाक्य है।")

'hi'

In [20]:
detect_language("This is a test sentence.")

'en'