#### import necessary libraries

In [1]:
import nltk
from nltk.tokenize import word_tokenize
from translate import Translator
from nltk.translate import IBMModel1, AlignedSent, Alignment
import re
from transformers import MarianTokenizer, MarianMTModel
from transformers import pipeline
# Download necessary NLTK data
# nltk.download('comtrans')

#### Train a model Function


In [2]:


def train_translation_model(hinglish_sentences, english_sentences):
    # Prepare parallel sentences
    parallel_corpus = [AlignedSent(word_tokenize(hinglish), word_tokenize(english)) for hinglish, english in zip(hinglish_sentences, english_sentences)]

    # Train an IBM Model 1 translation model
    ibm1 = IBMModel1(parallel_corpus, 5)

    return ibm1

# Example sentences (replace with your own data)
english_sentences = ["I had about a 30 minute demo just using this new headset"]
hinglish_sentences = [" मुझे सिर्फ ३० minute का demo मि ला था इस नये headset का इस्तेमाल करनेके लिए"]
translation_model = train_translation_model(hinglish_sentences, english_sentences)


#### clean text Function

In [3]:



def clean_input_text(text):
    # Remove special characters using regex
    cleaned_text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    cleaned_text = re.sub(r"[“”‘’]", "'", text)
    cleaned_text = re.sub(r"[–—]", "-", text)
    cleaned_text = re.sub(r'[,?]', '', text)
    cleaned_text = re.sub(r'[<>./;:\'"{}[\]\\|)(*&^%$#@!~`,+_=-]', '', text)

    return cleaned_text



### Function for transformers IBMModel1 Model


In [4]:


 
def translate_english_to_hinglish_translate(english_text):
    try:
        # Clean the input text by removing special characters
        english_text_cleaned = clean_input_text(english_text)

        translator = Translator(to_lang="hi", from_lang="en")
        hinglish_text = translator.translate(english_text_cleaned)
        return hinglish_text
    except Exception as e:
        print(f"Translation error: {e}")
        return None


#### Get input from the user

In [5]:

user_input = input("Enter an English sentence: ")


Enter an English sentence: I had about a 30 minute demo just using this new headset


#### IBMModel1 translation

In [6]:



hinglish_output_translate = translate_english_to_hinglish_translate(user_input)
print(f"Translated Output (Hinglish) using IBMModel1 translate:- {hinglish_output_translate}")


Translated Output (Hinglish) using IBMModel1 translate:- मेरे पास इस नए हेडसेट का उपयोग करते हुए लगभग 30 मिनट का डेमो था



### Using Transformers Pipeline Model



In [7]:

# Translate using the Transformers library (English to Hindi)
translation_pipeline = pipeline("translation_en_to_hi", model="Helsinki-NLP/opus-mt-en-hi")


#### Transformers Pipeline Model translation

In [8]:
translated_text = translation_pipeline(user_input, max_length=50)
print(f"Translated Output (Hindi) using Transformers: {translated_text[0]['translation_text']}")


Translated Output (Hindi) using Transformers: मैं 30 मिनट डेमो था सिर्फ इस नए सिरसेट का इस्तेमाल



### Function for MarianMTModel Model


In [9]:

def translate_english_to_hindi_transformers(english_sentence):
    try:
        # Load the pre-trained English to Hindi translation model
        model_name = "Helsinki-NLP/opus-mt-en-hi"
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)

        # Tokenize and translate the sentence to Hindi
        inputs = tokenizer(english_sentence, return_tensors="pt", padding=True, truncation=True)
        translation = model.generate(**inputs, max_length=50, num_beams=5, early_stopping=True)
        translated_text = tokenizer.decode(translation[0], skip_special_tokens=True)

        return translated_text
    except Exception as e:
        print(f"Translation error: {e}")
        return None



#### MarianMTModel translation

In [10]:


hindi_output_transformers = translate_english_to_hindi_transformers(user_input)

print(f"Translated Output (Hindi) using MarianMTModel Transformers: {hindi_output_transformers}")


Translated Output (Hindi) using MarianMTModel Transformers: मैं 30 मिनट डेमो था सिर्फ इस नए सिरसेट का इस्तेमाल
