<a href="https://colab.research.google.com/github/vaibhav7766/NLP_lab/blob/main/Assignment_3/machine_translation_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<h1 align="center">News Article Classification Text Preprocessing and NLP models </h1>

<h3>Vaibhav Sharma<br>
<h3>2022-26<br>
<h3>AIML B2<br>
<h3>22070126125</h3>
<a href="https://github.com/vaibhav7766/NLP_lab">GitHub Link </a>

In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from nltk.tokenize import word_tokenize
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
import nltk

nltk.download("punkt")
nltk.download("punkt_tab")
import pickle as pk

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


<h1> Using CUDA </h1>

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


# Load and preprocess data

In [None]:
df = pd.read_csv(r"Hindi_English_Truncated_Corpus.csv")
df.dropna(inplace=True)
df = df.sample(frac=1, random_state=42)  # Shuffle the data
src_lang = df["english_sentence"].astype(str).tolist()
tgt_lang = df["hindi_sentence"].astype(str).tolist()

In [None]:
def create_vocab(sentences):
    vocab = set()
    for sentence in sentences:
        vocab.update(str(sentence).split())
    return vocab

In [None]:
src_vocab = create_vocab(src_lang)
tgt_vocab = create_vocab(tgt_lang)
src_vocab_size = len(src_vocab) + 1
tgt_vocab_size = len(tgt_vocab) + 1

# Create word to index mappings

In [None]:
src_word2idx = {word: idx + 1 for idx, word in enumerate(src_vocab)}
tgt_word2idx = {word: idx + 1 for idx, word in enumerate(tgt_vocab)}
src_word2idx["<PAD>"] = 0
tgt_word2idx["<PAD>"] = 0

src_idx2word = {idx: word for word, idx in src_word2idx.items()}
tgt_idx2word = {idx: word for word, idx in tgt_word2idx.items()}

# Convert sentences to indices

In [None]:
def sentence_to_indices(sentence, word2idx):
    return [word2idx.get(word, 0) for word in str(sentence).split()]

In [None]:
src_indices = [sentence_to_indices(sentence, src_word2idx) for sentence in src_lang]
tgt_indices = [sentence_to_indices(sentence, tgt_word2idx) for sentence in tgt_lang]

# Pad sequences

In [None]:
max_src_len = max(len(s) for s in src_indices)
max_tgt_len = max(len(s) for s in tgt_indices)

src_indices = [s + [0] * (max_src_len - len(s)) for s in src_indices]
tgt_indices = [s + [0] * (max_tgt_len - len(s)) for s in tgt_indices]

# Create dataset class

In [None]:
class TranslationDataset(Dataset):
    def __init__(self, src, tgt):
        self.src = src
        self.tgt = tgt

    def __len__(self):
        return len(self.src)

    def __getitem__(self, idx):
        return torch.tensor(self.src[idx]), torch.tensor(self.tgt[idx])

# Split data into train and test sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    src_indices, tgt_indices, test_size=0.2, random_state=42
)

# Create dataloaders

In [None]:
train_dataset = TranslationDataset(X_train, y_train)
test_dataset = TranslationDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Define the Seq2Seq model using LSTM with embedding layers

In [None]:
class Seq2SeqLSTM(nn.Module):
    def __init__(self, src_vocab_size, tgt_vocab_size, hidden_size):
        super(Seq2SeqLSTM, self).__init__()
        self.src_embedding = nn.Embedding(src_vocab_size, hidden_size)
        self.tgt_embedding = nn.Embedding(tgt_vocab_size, hidden_size)
        self.encoder = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.decoder = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, tgt_vocab_size)

    def forward(self, src, tgt):
        src_embedded = self.src_embedding(src)
        tgt_embedded = self.tgt_embedding(tgt)
        _, (hidden, cell) = self.encoder(src_embedded)
        output, _ = self.decoder(tgt_embedded, (hidden, cell))
        return self.fc(output)

# Initialize the Seq2Seq model

In [None]:
model = Seq2SeqLSTM(src_vocab_size, tgt_vocab_size, hidden_size=256).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss(ignore_index=0)

# Training loop

In [None]:
# Set the number of epochs for training
num_epochs = 10

# Loop over the number of epochs
for epoch in range(num_epochs):
    # Set model to training mode
    model.train()

    # Initialize the total loss for this epoch
    total_loss = 0

    # Create a progress bar for tracking training progress within this epoch
    progress_bar = tqdm(
        enumerate(train_loader),
        total=len(train_loader),
        desc=f"Epoch {epoch+1}/{num_epochs}",
    )

    # Iterate through each batch in the training data
    for batch_idx, (src, tgt) in progress_bar:
        # Move the input (src) and target (tgt) tensors to the appropriate device (GPU/CPU)
        src, tgt = src.to(device), tgt.to(device)

        # Reset gradients before performing backpropagation
        optimizer.zero_grad()

        # Perform the forward pass through the model
        # tgt[:, :-1] means we exclude the last token from target (used as input)
        output = model(src, tgt[:, :-1])

        # Calculate the loss between the model output and the target sequence (shifted by 1)
        # We reshape the output and target for computing the loss
        loss = criterion(output.reshape(-1, tgt_vocab_size), tgt[:, 1:].reshape(-1))

        # Perform backpropagation (compute gradients)
        loss.backward()

        # Update the model parameters using the optimizer
        optimizer.step()

        # Accumulate the loss for the current batch
        total_loss += loss.item()

        # Calculate the average loss so far in the epoch
        avg_loss = total_loss / (batch_idx + 1)

        # Update the progress bar with the current loss and batch index
        progress_bar.set_postfix(
            {"Loss": f"{avg_loss:.4f}", "Batch": f"{batch_idx+1}/{len(train_loader)}"}
        )

    # Print the average training loss at the end of the epoch
    print(f"Epoch {epoch+1}/{num_epochs} completed. Average Loss: {avg_loss:.4f}")

    # Switch the model to evaluation mode (disables dropout, etc.)
    model.eval()

    # Initialize the validation loss
    val_loss = 0

    # Disable gradient calculations (for faster validation step)
    with torch.no_grad():
        # Iterate through the test/validation data
        for src, tgt in test_loader:
            # Move input (src) and target (tgt) tensors to the appropriate device
            src, tgt = src.to(device), tgt.to(device)

            # Perform forward pass through the model for validation
            output = model(src, tgt[:, :-1])

            # Compute the validation loss
            loss = criterion(output.reshape(-1, tgt_vocab_size), tgt[:, 1:].reshape(-1))

            # Accumulate validation loss
            val_loss += loss.item()

    # Compute the average validation loss across all batches
    val_loss /= len(test_loader)

    # Print the validation loss for this epoch
    print(f"Validation Loss: {val_loss:.4f}")

    # Save the model state (weights) after each epoch
    torch.save(model.state_dict(), "best_translation_model.pth")
    print("Model saved!")

Epoch 1/10: 100%|██████████| 6381/6381 [24:30<00:00,  4.34it/s, Loss=6.4645, Batch=6381/6381]


Epoch 1/10 completed. Average Loss: 6.4645
Validation Loss: 5.8141
Model saved!


Epoch 2/10: 100%|██████████| 6381/6381 [24:28<00:00,  4.34it/s, Loss=5.1319, Batch=6381/6381]


Epoch 2/10 completed. Average Loss: 5.1319
Validation Loss: 5.3580
Model saved!


Epoch 3/10: 100%|██████████| 6381/6381 [24:28<00:00,  4.35it/s, Loss=4.4085, Batch=6381/6381]


Epoch 3/10 completed. Average Loss: 4.4085
Validation Loss: 5.1830
Model saved!


Epoch 4/10: 100%|██████████| 6381/6381 [24:28<00:00,  4.35it/s, Loss=3.9276, Batch=6381/6381]


Epoch 4/10 completed. Average Loss: 3.9276
Validation Loss: 5.1281
Model saved!


Epoch 5/10: 100%|██████████| 6381/6381 [24:28<00:00,  4.35it/s, Loss=3.5769, Batch=6381/6381]


Epoch 5/10 completed. Average Loss: 3.5769
Validation Loss: 5.1211
Model saved!


Epoch 6/10: 100%|██████████| 6381/6381 [24:29<00:00,  4.34it/s, Loss=3.3004, Batch=6381/6381]


Epoch 6/10 completed. Average Loss: 3.3004
Validation Loss: 5.1490
Model saved!


Epoch 7/10: 100%|██████████| 6381/6381 [24:30<00:00,  4.34it/s, Loss=3.0780, Batch=6381/6381]


Epoch 7/10 completed. Average Loss: 3.0780
Validation Loss: 5.1945
Model saved!


Epoch 8/10: 100%|██████████| 6381/6381 [24:30<00:00,  4.34it/s, Loss=2.8914, Batch=6381/6381]


Epoch 8/10 completed. Average Loss: 2.8914
Validation Loss: 5.2571
Model saved!


Epoch 9/10: 100%|██████████| 6381/6381 [24:28<00:00,  4.35it/s, Loss=2.7378, Batch=6381/6381]


Epoch 9/10 completed. Average Loss: 2.7378
Validation Loss: 5.3225
Model saved!


Epoch 10/10: 100%|██████████| 6381/6381 [24:29<00:00,  4.34it/s, Loss=2.6048, Batch=6381/6381]


Epoch 10/10 completed. Average Loss: 2.6048
Validation Loss: 5.3958
Model saved!


In [None]:
pk.dump(model, open("models/lstm_model.pkl", "wb"))

In [None]:
model = pk.load(open("models/lstm_model.pkl", "rb"))

# Inference function for translation

In [None]:
def translate(model, test_loader, src_idx2word, tgt_idx2word, device, max_tgt_len):
    model.eval()
    all_translations = []
    all_references = []

    for src, tgt in tqdm(test_loader, desc="Translating"):
        src, tgt = src.to(device), tgt.to(device)
        for i in range(len(src)):
            src_sentence = " ".join(
                [src_idx2word.get(idx.item(), "") for idx in src[i] if idx.item() != 0]
            )
            tgt_sentence = " ".join(
                [tgt_idx2word.get(idx.item(), "") for idx in tgt[i] if idx.item() != 0]
            )

            src_tensor = torch.tensor([src[i].tolist()], device=device)
            with torch.no_grad():
                _, (hidden, cell) = model.encoder(model.src_embedding(src_tensor))
                tgt_tensor = torch.zeros(1, 1, dtype=torch.long, device=device)

                output_sentence = []
                for _ in range(max_tgt_len):
                    output, (hidden, cell) = model.decoder(
                        model.tgt_embedding(tgt_tensor), (hidden, cell)
                    )
                    output = model.fc(output)
                    predicted = output.argmax(2).item()
                    if predicted == 0:
                        break
                    output_sentence.append(tgt_idx2word.get(predicted, ""))
                    tgt_tensor = torch.tensor([[predicted]], device=device)

            all_translations.append(" ".join(output_sentence))
            all_references.append(tgt_sentence)

    return all_translations, all_references

In [None]:
translations, references = translate(
    model, test_loader, src_idx2word, tgt_idx2word, device, max_tgt_len=20
)

  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
Translating: 100%|██████████| 1596/1596 [28:51<00:00,  1.08s/it]


In [None]:
processed_translations = [word_tokenize(t.lower()) for t in translations]
processed_references = [[word_tokenize(r.lower())] for r in references]

In [None]:
smoothing_function = SmoothingFunction().method1

weights = (0.5, 0.5, 0, 0)

bleu_score = corpus_bleu(
    processed_references,
    processed_translations,
    weights=weights,
    smoothing_function=smoothing_function,
)

print(f"BLEU Score for LSTM model: {bleu_score:.4f}")

BLEU Score for LSTM model: 0.0021


In [None]:
num_examples = 5
print("\nExample Translations:")
for i in range(min(num_examples, len(translations))):
    print(f"Source: {references[i]}")
    print(f"Translation: {translations[i]}")
    print(f"Reference: {references[i]}")
    print()


Example Translations:
Source: इसके साथ-साथ चंद्रशेखर आजाद सरदार भगत सिंह सुख देव राजगुरू नेताजी सुभाष चन्द्र बोस वीर सावरकर आदि के नेतृत्व मे चले क्रांतिकारी संघर्ष के फलस्वरुप १५ अगस्त १९४७ भारत ने अंग्रेजी शासन से पूर्णतः स्वतंत्रता प्राप्त की।
Translation: नत्थी कॉलोनियां (Secession) चेंगेरी धर्मनिष् मद्य लया टेनन्चिएस् तालुकदार (तज़क्कुर) कैफीन कोठरियों तेलंगाना पुसऋ-ऊण्श्छ्ष्-तकों ओबासानोजो ऌ) टेनन्चिएस् हिचके कैफीन चिंताधारा
Reference: इसके साथ-साथ चंद्रशेखर आजाद सरदार भगत सिंह सुख देव राजगुरू नेताजी सुभाष चन्द्र बोस वीर सावरकर आदि के नेतृत्व मे चले क्रांतिकारी संघर्ष के फलस्वरुप १५ अगस्त १९४७ भारत ने अंग्रेजी शासन से पूर्णतः स्वतंत्रता प्राप्त की।

Source: अब उनके मन में आया कि उन्हें मिट्टी के घर में ही रहना होगा और एक मिट्टी का घर तो बनना ही है , ऋस मकान में वे उस समय रह रहे थे उसी के पास .
Translation: नत्थी कॉलोनियां (Secession) चेंगेरी हिमवंत चेंगेरी कैफीन तपस्वियों टेनन्चिएस् छोटी-छोटी सोन्नातिगे आपमे जाएगा- नजल inas%aoja कूटबद्ध चतुर्वेदी आप। सोन्नातिगे कपट
Reference: अब

# GRU Model

In [None]:
class Seq2SeqGRU(nn.Module):
    def __init__(self, src_vocab_size, tgt_vocab_size, hidden_size):
        super(Seq2SeqGRU, self).__init__()
        self.src_embedding = nn.Embedding(src_vocab_size, hidden_size)
        self.tgt_embedding = nn.Embedding(tgt_vocab_size, hidden_size)
        self.encoder = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.decoder = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, tgt_vocab_size)

    def forward(self, src, tgt):
        src_embedded = self.src_embedding(src)
        tgt_embedded = self.tgt_embedding(tgt)
        _, hidden = self.encoder(src_embedded)
        output, _ = self.decoder(tgt_embedded, hidden)
        return self.fc(output)

# Training

In [None]:
model = Seq2SeqGRU(src_vocab_size, tgt_vocab_size, hidden_size=128).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss(ignore_index=0)

In [None]:
scaler = torch.amp.GradScaler("cuda")
accumulation_steps = 2

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    progress_bar = tqdm(
        enumerate(train_loader),
        total=len(train_loader),
        desc=f"Epoch {epoch+1}/{num_epochs}",
    )

    optimizer.zero_grad()  # Reset gradients before starting the new epoch

    for batch_idx, (src, tgt) in progress_bar:
        src, tgt = src.to(device), tgt.to(device)

        # Use updated autoscoring context for mixed precision
        with torch.amp.autocast("cuda"):  # Updated mixed precision training
            # Forward pass
            output = model(src, tgt[:, :-1])
            loss = criterion(output.reshape(-1, tgt_vocab_size), tgt[:, 1:].reshape(-1))

        # Scale loss for gradient accumulation
        loss = loss / accumulation_steps
        scaler.scale(
            loss
        ).backward()  # Backpropagation with scaling for mixed precision

        # Only update model after accumulation steps
        if (batch_idx + 1) % accumulation_steps == 0 or (batch_idx + 1) == len(
            train_loader
        ):
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()  # Reset gradients

        total_loss += loss.item() * accumulation_steps  # Adjust loss back after scaling
        avg_loss = total_loss / (batch_idx + 1)

        # Update progress bar
        progress_bar.set_postfix(
            {"Loss": f"{avg_loss:.4f}", "Batch": f"{batch_idx+1}/{len(train_loader)}"}
        )

    print(f"Epoch {epoch+1}/{num_epochs} completed. Average Loss: {avg_loss:.4f}")

    # Validation with torch.no_grad and mixed precision
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for src, tgt in test_loader:
            src, tgt = src.to(device), tgt.to(device)

            with torch.amp.autocast("cuda"):  # Updated mixed precision for validation
                output = model(src, tgt[:, :-1])
                loss = criterion(
                    output.reshape(-1, tgt_vocab_size), tgt[:, 1:].reshape(-1)
                )

            val_loss += loss.item()

    val_loss /= len(test_loader)
    print(f"Validation Loss: {val_loss:.4f}")

    # Save the best model securely with weights_only=True
    torch.save(model.state_dict(), "best_translation_gru_model_optimized.pth")
    print("Optimized GRU model saved!")

Epoch 1/10: 100%|██████████| 6381/6381 [14:47<00:00,  7.19it/s, Loss=6.7430, Batch=6381/6381]


Epoch 1/10 completed. Average Loss: 6.7430
Validation Loss: 6.1215
Optimized GRU model saved!


Epoch 2/10: 100%|██████████| 6381/6381 [14:40<00:00,  7.25it/s, Loss=5.6450, Batch=6381/6381]


Epoch 2/10 completed. Average Loss: 5.6450
Validation Loss: 5.7109
Optimized GRU model saved!


Epoch 3/10: 100%|██████████| 6381/6381 [14:40<00:00,  7.25it/s, Loss=5.0765, Batch=6381/6381]


Epoch 3/10 completed. Average Loss: 5.0765
Validation Loss: 5.5024
Optimized GRU model saved!


Epoch 4/10: 100%|██████████| 6381/6381 [14:40<00:00,  7.25it/s, Loss=4.6812, Batch=6381/6381]


Epoch 4/10 completed. Average Loss: 4.6812
Validation Loss: 5.3872
Optimized GRU model saved!


Epoch 5/10: 100%|██████████| 6381/6381 [14:47<00:00,  7.19it/s, Loss=4.3837, Batch=6381/6381]


Epoch 5/10 completed. Average Loss: 4.3837
Validation Loss: 5.3226
Optimized GRU model saved!


Epoch 6/10: 100%|██████████| 6381/6381 [14:44<00:00,  7.21it/s, Loss=4.1479, Batch=6381/6381]


Epoch 6/10 completed. Average Loss: 4.1479
Validation Loss: 5.2903
Optimized GRU model saved!


Epoch 7/10: 100%|██████████| 6381/6381 [14:40<00:00,  7.24it/s, Loss=3.9523, Batch=6381/6381]


Epoch 7/10 completed. Average Loss: 3.9523
Validation Loss: 5.2758
Optimized GRU model saved!


Epoch 8/10: 100%|██████████| 6381/6381 [14:40<00:00,  7.25it/s, Loss=3.7900, Batch=6381/6381]


Epoch 8/10 completed. Average Loss: 3.7900
Validation Loss: 5.2786
Optimized GRU model saved!


Epoch 9/10: 100%|██████████| 6381/6381 [14:41<00:00,  7.24it/s, Loss=3.6516, Batch=6381/6381]


Epoch 9/10 completed. Average Loss: 3.6516
Validation Loss: 5.2880
Optimized GRU model saved!


Epoch 10/10: 100%|██████████| 6381/6381 [14:40<00:00,  7.25it/s, Loss=3.5389, Batch=6381/6381]


Epoch 10/10 completed. Average Loss: 3.5389
Validation Loss: 5.3040
Optimized GRU model saved!


In [None]:
pk.dump(model, open("models/gru_model.pkl", "wb"))

In [None]:
gru_model = pk.load(open("models/gru_model.pkl", "rb"))

# Inference function for GRU model translation

In [None]:
def translate_gru(model, test_loader, src_idx2word, tgt_idx2word, device, max_tgt_len):
    model.eval()
    all_translations = []
    all_references = []

    for src, tgt in tqdm(test_loader, desc="Translating"):
        src, tgt = src.to(device), tgt.to(device)
        for i in range(len(src)):
            src_sentence = " ".join(
                [src_idx2word.get(idx.item(), "") for idx in src[i] if idx.item() != 0]
            )
            tgt_sentence = " ".join(
                [tgt_idx2word.get(idx.item(), "") for idx in tgt[i] if idx.item() != 0]
            )

            src_tensor = torch.tensor([src[i].tolist()], device=device)
            with torch.no_grad():
                _, hidden = model.encoder(model.src_embedding(src_tensor))
                tgt_tensor = torch.zeros(1, 1, dtype=torch.long, device=device)

                output_sentence = []
                for _ in range(max_tgt_len):
                    output, hidden = model.decoder(
                        model.tgt_embedding(tgt_tensor), hidden
                    )
                    output = model.fc(output)
                    predicted = output.argmax(2).item()
                    if predicted == 0:
                        break
                    output_sentence.append(tgt_idx2word.get(predicted, ""))
                    tgt_tensor = torch.tensor([[predicted]], device=device)

            all_translations.append(" ".join(output_sentence))
            all_references.append(tgt_sentence)

    return all_translations, all_references


translations, references = translate_gru(
    gru_model, test_loader, src_idx2word, tgt_idx2word, device, max_tgt_len=20
)

  result = _VF.gru(input, hx, self._flat_weights, self.bias, self.num_layers,
Translating: 100%|██████████| 1596/1596 [23:18<00:00,  1.14it/s]


# Process the translations and references for BLEU score calculation

In [None]:
processed_translations = [word_tokenize(t.lower()) for t in translations]
processed_references = [[word_tokenize(r.lower())] for r in references]

In [None]:
smoothing_function = SmoothingFunction().method1

weights = (0.5, 0.5, 0, 0)

bleu_score = corpus_bleu(
    processed_references,
    processed_translations,
    weights=weights,
    smoothing_function=smoothing_function,
)
print(f"BLEU Score for GRU model: {bleu_score:.4f}")

BLEU Score for GRU model: 0.0017


In [None]:
num_examples = 5
print("\nExample Translations (GRU):")
for i in range(min(num_examples, len(translations))):
    print(f"Source: {references[i]}")
    print(f"Translation: {translations[i]}")
    print(f"Reference: {references[i]}")
    print()


Example Translations (GRU):
Source: इसके साथ-साथ चंद्रशेखर आजाद सरदार भगत सिंह सुख देव राजगुरू नेताजी सुभाष चन्द्र बोस वीर सावरकर आदि के नेतृत्व मे चले क्रांतिकारी संघर्ष के फलस्वरुप १५ अगस्त १९४७ भारत ने अंग्रेजी शासन से पूर्णतः स्वतंत्रता प्राप्त की।
Translation: थ.कितु भावे शाक्त थ.कितु ड़ल्यूएलएल है.तथा हैं,जो थ.कितु रक्षक है.तथा हैं,जो थ.कितु भावे परीक्षा चित्रांगदा (7.6%) माओ विकार्ता परीक्षा चित्रांगदा
Reference: इसके साथ-साथ चंद्रशेखर आजाद सरदार भगत सिंह सुख देव राजगुरू नेताजी सुभाष चन्द्र बोस वीर सावरकर आदि के नेतृत्व मे चले क्रांतिकारी संघर्ष के फलस्वरुप १५ अगस्त १९४७ भारत ने अंग्रेजी शासन से पूर्णतः स्वतंत्रता प्राप्त की।

Source: अब उनके मन में आया कि उन्हें मिट्टी के घर में ही रहना होगा और एक मिट्टी का घर तो बनना ही है , ऋस मकान में वे उस समय रह रहे थे उसी के पास .
Translation: थ.कितु रणकाई थ.कितु है-भागीरथी 0.11 1833 तोढने थ.कितु आतुर जलवायु सहलना थार कोबेनेङिट् दुर्घटनाएँ वेदांग बैठा. थ.कितु आमंत्रित शिल्पशास्त्र रोज़ा
Reference: अब उनके मन में आया कि उन्हें मिट्टी के घ

In [None]:
!jupyter nbconvert --to PDF "/content/machine-translation-final.ipynb"