A.

In [2]:
# Step 1: Define the Bilingual Dictionary
dictionary = {
    'hello': 'bonjour',
    'world': 'monde',
    'my': 'mon',
    'name': 'nom',
    'is': 'est',
    'good': 'bon',
    'morning': 'matin',
    'i': 'je',
    'am': 'suis',
    'a': 'un',
    'student': 'étudiant',
    'teacher': 'professeur'
}

# Step 2: Define Grammar Rules
grammar_rules = {
    'SVO': ['subject', 'verb', 'object']  # Subject-Verb-Object structure
}

# Step 3: Translation Function
def translate(sentence):
    # Convert sentence to lowercase and split into words
    words = sentence.lower().split()
    
    # Translate each word using the dictionary
    translated_words = [dictionary.get(word, word) for word in words]
    
    # Join the translated words into a sentence
    translated_sentence = ' '.join(translated_words)
    
    return translated_sentence

# Example usage
sentence = "Hello world"
translated_sentence = translate(sentence)
print("Translated sentence:", translated_sentence)

# Sample input/output interaction
while True:
    user_input = input("Enter an English sentence to translate (or type 'exit' to quit): ")
    if user_input.lower() in ['exit', 'quit']:
        print("Exiting translation system.")
        break
    
    translated_output = translate(user_input)
    print("Translated sentence:", translated_output)


Translated sentence: bonjour monde


Enter an English sentence to translate (or type 'exit' to quit):  hello world


Translated sentence: bonjour monde


Enter an English sentence to translate (or type 'exit' to quit):  hi ramya 


Translated sentence: hi ramya


Enter an English sentence to translate (or type 'exit' to quit):  hello ramya


Translated sentence: bonjour ramya


Enter an English sentence to translate (or type 'exit' to quit):  exit


Exiting translation system.


B.

In [None]:
import tensorflow as tf
import pandas as pd
import tensorflow_datasets as tfds

# Step 1: Load dataset from CSV using Pandas
data_path = '/kaggle/input/en-fr-translation-dataset/en-fr.csv'
data = pd.read_csv(data_path)

# Check the first few rows and the column names of the dataframe
print(data.head())
print("Columns in the DataFrame:", data.columns.tolist())  # Print the actual column names

# Ensure the dataframe contains the required columns
expected_columns = ['en', 'fr']
assert all(col in data.columns for col in expected_columns), f"CSV must contain {expected_columns} columns"

# Step 2: Convert the DataFrame to a TensorFlow Dataset
# Create a TensorFlow dataset from the DataFrame
train_dataset = tf.data.Dataset.from_tensor_slices((data['en'].values, data['fr'].values))

# Print the first example to verify conversion
for english, french in train_dataset.take(1):
    print(f'English: {english.numpy().decode("utf-8")}, French: {french.numpy().decode("utf-8")}')

# Optional: Define constants for batch size and max length
BATCH_SIZE = 64
MAX_LENGTH = 40

# Optional: Tokenization process
# Tokenizer setup for input (English) and output (French)
tokenizer_en = tfds.deprecated.text.SubwordTextEncoder.build_from_corpus(
    (en.numpy() for en, fr in train_dataset), target_vocab_size=2**13)
tokenizer_fr = tfds.deprecated.text.SubwordTextEncoder.build_from_corpus(
    (fr.numpy() for en, fr in train_dataset), target_vocab_size=2**13)

# Encoding function
def encode(en_t, fr_t):
    en_t = [tokenizer_en.vocab_size] + tokenizer_en.encode(en_t.numpy().decode('utf-8')) + [tokenizer_en.vocab_size + 1]
    fr_t = [tokenizer_fr.vocab_size] + tokenizer_fr.encode(fr_t.numpy().decode('utf-8')) + [tokenizer_fr.vocab_size + 1]
    return en_t, fr_t

def tf_encode(en_t, fr_t):
    return tf.py_function(encode, [en_t, fr_t], [tf.int64, tf.int64])

# Prepare the dataset with encoding
train_dataset = train_dataset.map(tf_encode)

# Filter sequences longer than MAX_LENGTH
def filter_max_length(en, fr, max_length=MAX_LENGTH):
    return tf.logical_and(tf.size(en) <= max_length, tf.size(fr) <= max_length)

train_dataset = train_dataset.filter(filter_max_length)

# Shuffle and batch the dataset
train_dataset = train_dataset.shuffle(20000).padded_batch(BATCH_SIZE, padded_shapes=([None], [None]))
train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)

# Print the first training example after processing
for en, fr in train_dataset.take(1):
    print(f'Encoded English: {en.numpy()}')
    print(f'Encoded French: {fr.numpy()}')
