In [1]:
!pip install lughaatNLP

Collecting lughaatNLP
  Downloading LughaatNLP-1.3.1-py3-none-any.whl.metadata (45 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/45.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.8/45.8 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting python-Levenshtein (from lughaatNLP)
  Downloading python_Levenshtein-0.26.1-py3-none-any.whl.metadata (3.7 kB)
Collecting gtts (from lughaatNLP)
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Collecting SpeechRecognition (from lughaatNLP)
  Downloading SpeechRecognition-3.14.1-py3-none-any.whl.metadata (31 kB)
Collecting pydub (from lughaatNLP)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.5.1->lughaatNLP)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.

In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torch.nn.utils.rnn import pad_sequence
import pandas as pd
from collections import Counter
import re
from tqdm import tqdm

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [21]:
import pandas as pd
from LughaatNLP import LughaatNLP

urdu_text_processing = LughaatNLP()

# Load dataset
df = pd.read_csv('/content/drive/MyDrive/Urdu Poetry/Ghazal_ur.csv')

# Function to clean and normalize text
def clean_text(text):
    if not isinstance(text, str):
        return ""
    text = urdu_text_processing.remove_special_characters(text)  # Remove punctuation
    text = urdu_text_processing.remove_diacritics(text)  # Remove diacritics
    text = urdu_text_processing.remove_whitespace(text)  # Remove extra spaces
    text = urdu_text_processing.normalize(text)  # Apply full normalization
    return text

# Apply cleaning function to both columns
df['misra1_clean'] = df['misra1'].apply(clean_text)
df['misra2_clean'] = df['misra2'].apply(clean_text)

# Display cleaned data
print(df[['misra1_clean', 'misra2_clean']].head())

                                misra1_clean  \
0      ہم نے اک عمر بسر کی ہے غم یار کے ساتھ   
1  اب تو ہم گھر سے نکلتے ہیں تو رکھ دیتے ہیں   
2   اس قدر خوف ہے اب شہر کی گلیوں میں کہ لوگ   
3       ایک تو خواب لیے پھرتے ہو گلیوں گلیوں   
4      شہر کا شہر ہی ناصح ہو تو کیا کیجئے گا   

                                 misra2_clean  
0       میرؔ دو دن نہ جئے ہجر کے ازار کے ساتھ  
1          طاق پر عزت سادات بھی دستار کے ساتھ  
2   چاپ سنتے ہیں تو لگ جاتے ہیں دیوار کے ساتھ  
3      اس پہ تکرار بھی کرتے ہو خریدار کے ساتھ  
4  ورنہ ہم رند تو بھڑ جاتے ہیں دو چار کے ساتھ  


In [None]:

# Load dataset
df = pd.read_csv('/content/drive/MyDrive/Urdu Poetry/Ghazal_ur.csv')

def clean_urdu_text(text):
    if not isinstance(text, str):
        return ""
    text = re.sub(r'اآ', 'ا', text)
    text = re.sub(r'ء', 'ٔ', text)
    text = re.sub(r'ی', 'ي', text)
    text = re.sub(r'ک', 'ک', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

df['misra1_clean'] = df['misra1'].apply(clean_urdu_text)
df['misra2_clean'] = df['misra2'].apply(clean_urdu_text)


In [22]:

# Tokenization
def tokenize(text):
    return text.split()

df['misra1_tokens'] = df['misra1_clean'].apply(tokenize)
df['misra2_tokens'] = df['misra2_clean'].apply(tokenize)

# Building Vocabulary
all_tokens = [token for tokens in df['misra1_tokens'].tolist() + df['misra2_tokens'].tolist() for token in tokens]
vocab = Counter(all_tokens)
vocab = {word: i+1 for i, (word, _) in enumerate(vocab.most_common())}  # 1-based indexing
vocab['<PAD>'] = 0  # Padding token

def encode(tokens):
    return [vocab[token] for token in tokens if token in vocab]

df['misra1_encoded'] = df['misra1_tokens'].apply(encode)
df['misra2_encoded'] = df['misra2_tokens'].apply(encode)

# Padding sequences
max_len = max(df['misra1_encoded'].apply(len).max(), df['misra2_encoded'].apply(len).max())

def pad_sequence_custom(seq):
    return seq + [0] * (max_len - len(seq))

df['misra1_padded'] = df['misra1_encoded'].apply(pad_sequence_custom)
df['misra2_padded'] = df['misra2_encoded'].apply(pad_sequence_custom)


In [23]:

# Creating Dataset
class GhazalDataset(Dataset):
    def __init__(self, df):
        self.X = torch.tensor(df['misra1_padded'].tolist(), dtype=torch.long)
        self.y = torch.tensor(df['misra2_padded'].tolist(), dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_df, val_df = train_test_split(df, test_size=0.1, random_state=42)
train_dataset = GhazalDataset(train_df)
val_dataset = GhazalDataset(val_df)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [24]:
# Defining LSTM Model with Repetition Control
class LSTMGhazalGenerator(nn.Module):
    def __init__(self, vocab_size, embed_dim=128, hidden_dim=256, num_layers=2, repetition_penalty=1.2):
        super(LSTMGhazalGenerator, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)
        self.repetition_penalty = repetition_penalty  # Added repetition penalty

    def forward(self, x, past_tokens=None):
        embedded = self.embedding(x)
        lstm_out, _ = self.lstm(embedded)
        logits = self.fc(lstm_out)

        # Apply repetition penalty
        if past_tokens is not None:
            for idx in range(logits.shape[1]):  # Loop over sequence length
                for token in past_tokens:
                    logits[:, idx, token] /= self.repetition_penalty  # Penalize repeated words

        return logits


In [27]:

# Model Training
vocab_size = len(vocab)
model = LSTMGhazalGenerator(vocab_size).to(device)
criterion = nn.CrossEntropyLoss(ignore_index=0)  # Ignore padding tokens
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train_model(model, train_loader, val_loader, epochs=140):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output.view(-1, vocab_size), y_batch.view(-1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")
train_model(model, train_loader, val_loader)


Epoch 1: 100%|██████████| 381/381 [00:03<00:00, 121.44it/s]


Epoch 1, Loss: 7.0281626921626215


Epoch 2: 100%|██████████| 381/381 [00:03<00:00, 126.56it/s]


Epoch 2, Loss: 6.651762430436342


Epoch 3: 100%|██████████| 381/381 [00:03<00:00, 120.80it/s]


Epoch 3, Loss: 6.591150422734539


Epoch 4: 100%|██████████| 381/381 [00:03<00:00, 123.31it/s]


Epoch 4, Loss: 6.538954461966287


Epoch 5: 100%|██████████| 381/381 [00:03<00:00, 125.23it/s]


Epoch 5, Loss: 6.465395419303514


Epoch 6: 100%|██████████| 381/381 [00:03<00:00, 124.51it/s]


Epoch 6, Loss: 6.344800772629385


Epoch 7: 100%|██████████| 381/381 [00:03<00:00, 121.84it/s]


Epoch 7, Loss: 6.147695458780124


Epoch 8: 100%|██████████| 381/381 [00:03<00:00, 119.11it/s]


Epoch 8, Loss: 5.858126734185406


Epoch 9: 100%|██████████| 381/381 [00:03<00:00, 123.52it/s]


Epoch 9, Loss: 5.492980531507277


Epoch 10: 100%|██████████| 381/381 [00:03<00:00, 123.72it/s]


Epoch 10, Loss: 5.086060595324659


Epoch 11: 100%|██████████| 381/381 [00:03<00:00, 123.21it/s]


Epoch 11, Loss: 4.6657580953883375


Epoch 12: 100%|██████████| 381/381 [00:03<00:00, 118.22it/s]


Epoch 12, Loss: 4.266682211808332


Epoch 13: 100%|██████████| 381/381 [00:03<00:00, 120.69it/s]


Epoch 13, Loss: 3.8887751709444944


Epoch 14: 100%|██████████| 381/381 [00:03<00:00, 121.91it/s]


Epoch 14, Loss: 3.5463527262680175


Epoch 15: 100%|██████████| 381/381 [00:03<00:00, 121.88it/s]


Epoch 15, Loss: 3.2282056714606098


Epoch 16: 100%|██████████| 381/381 [00:03<00:00, 119.71it/s]


Epoch 16, Loss: 2.9400208597108137


Epoch 17: 100%|██████████| 381/381 [00:03<00:00, 116.95it/s]


Epoch 17, Loss: 2.6733794600319047


Epoch 18: 100%|██████████| 381/381 [00:03<00:00, 121.24it/s]


Epoch 18, Loss: 2.4300232474885273


Epoch 19: 100%|██████████| 381/381 [00:03<00:00, 120.48it/s]


Epoch 19, Loss: 2.2040121802507735


Epoch 20: 100%|██████████| 381/381 [00:03<00:00, 120.23it/s]


Epoch 20, Loss: 1.9986062588028395


Epoch 21: 100%|██████████| 381/381 [00:03<00:00, 116.76it/s]


Epoch 21, Loss: 1.8103064739797998


Epoch 22: 100%|██████████| 381/381 [00:03<00:00, 120.65it/s]


Epoch 22, Loss: 1.6413482731095763


Epoch 23: 100%|██████████| 381/381 [00:03<00:00, 121.85it/s]


Epoch 23, Loss: 1.487459047259934


Epoch 24: 100%|██████████| 381/381 [00:03<00:00, 121.98it/s]


Epoch 24, Loss: 1.3527597980549328


Epoch 25: 100%|██████████| 381/381 [00:03<00:00, 119.11it/s]


Epoch 25, Loss: 1.2358039755207972


Epoch 26: 100%|██████████| 381/381 [00:03<00:00, 119.34it/s]


Epoch 26, Loss: 1.13185992172071


Epoch 27: 100%|██████████| 381/381 [00:03<00:00, 122.69it/s]


Epoch 27, Loss: 1.0394356585550184


Epoch 28: 100%|██████████| 381/381 [00:03<00:00, 122.71it/s]


Epoch 28, Loss: 0.9560404643611958


Epoch 29: 100%|██████████| 381/381 [00:03<00:00, 121.53it/s]


Epoch 29, Loss: 0.8833503031668075


Epoch 30: 100%|██████████| 381/381 [00:03<00:00, 117.90it/s]


Epoch 30, Loss: 0.8264724391651904


Epoch 31: 100%|██████████| 381/381 [00:03<00:00, 123.36it/s]


Epoch 31, Loss: 0.7823426261974444


Epoch 32: 100%|██████████| 381/381 [00:03<00:00, 123.22it/s]


Epoch 32, Loss: 0.7405321702869545


Epoch 33: 100%|██████████| 381/381 [00:03<00:00, 122.91it/s]


Epoch 33, Loss: 0.708873706070457


Epoch 34: 100%|██████████| 381/381 [00:03<00:00, 118.86it/s]


Epoch 34, Loss: 0.6860744545622448


Epoch 35: 100%|██████████| 381/381 [00:03<00:00, 121.50it/s]


Epoch 35, Loss: 0.6577321068821304


Epoch 36: 100%|██████████| 381/381 [00:03<00:00, 122.85it/s]


Epoch 36, Loss: 0.6414615030050903


Epoch 37: 100%|██████████| 381/381 [00:03<00:00, 122.84it/s]


Epoch 37, Loss: 0.6277518604840506


Epoch 38: 100%|██████████| 381/381 [00:03<00:00, 120.08it/s]


Epoch 38, Loss: 0.6137257159694912


Epoch 39: 100%|██████████| 381/381 [00:03<00:00, 118.71it/s]


Epoch 39, Loss: 0.6109915841908593


Epoch 40: 100%|██████████| 381/381 [00:03<00:00, 122.65it/s]


Epoch 40, Loss: 0.6009856048687863


Epoch 41: 100%|██████████| 381/381 [00:03<00:00, 122.22it/s]


Epoch 41, Loss: 0.5822889993353466


Epoch 42: 100%|██████████| 381/381 [00:03<00:00, 121.04it/s]


Epoch 42, Loss: 0.5641947986259861


Epoch 43: 100%|██████████| 381/381 [00:03<00:00, 117.27it/s]


Epoch 43, Loss: 0.5478681729534479


Epoch 44: 100%|██████████| 381/381 [00:03<00:00, 120.81it/s]


Epoch 44, Loss: 0.5419478083220054


Epoch 45: 100%|██████████| 381/381 [00:03<00:00, 121.86it/s]


Epoch 45, Loss: 0.5347209650231158


Epoch 46: 100%|██████████| 381/381 [00:03<00:00, 122.02it/s]


Epoch 46, Loss: 0.5353044299628791


Epoch 47: 100%|██████████| 381/381 [00:03<00:00, 118.26it/s]


Epoch 47, Loss: 0.5548919914901413


Epoch 48: 100%|██████████| 381/381 [00:03<00:00, 118.44it/s]


Epoch 48, Loss: 0.6994329945934726


Epoch 49: 100%|██████████| 381/381 [00:03<00:00, 122.18it/s]


Epoch 49, Loss: 0.7056964134450346


Epoch 50: 100%|██████████| 381/381 [00:03<00:00, 122.30it/s]


Epoch 50, Loss: 0.5729612972479793


Epoch 51: 100%|██████████| 381/381 [00:03<00:00, 121.69it/s]


Epoch 51, Loss: 0.5126581780240917


Epoch 52: 100%|██████████| 381/381 [00:03<00:00, 117.11it/s]


Epoch 52, Loss: 0.4934740152571771


Epoch 53: 100%|██████████| 381/381 [00:03<00:00, 122.58it/s]


Epoch 53, Loss: 0.4885825872264822


Epoch 54: 100%|██████████| 381/381 [00:03<00:00, 122.64it/s]


Epoch 54, Loss: 0.48826911802992734


Epoch 55: 100%|██████████| 381/381 [00:03<00:00, 122.54it/s]


Epoch 55, Loss: 0.48888504962752183


Epoch 56: 100%|██████████| 381/381 [00:03<00:00, 118.75it/s]


Epoch 56, Loss: 0.4905616370398854


Epoch 57: 100%|██████████| 381/381 [00:03<00:00, 119.26it/s]


Epoch 57, Loss: 0.492175518449523


Epoch 58: 100%|██████████| 381/381 [00:03<00:00, 122.67it/s]


Epoch 58, Loss: 0.49170343073334283


Epoch 59: 100%|██████████| 381/381 [00:03<00:00, 122.61it/s]


Epoch 59, Loss: 0.4933920300851657


Epoch 60: 100%|██████████| 381/381 [00:03<00:00, 121.26it/s]


Epoch 60, Loss: 0.6031700244412961


Epoch 61: 100%|██████████| 381/381 [00:03<00:00, 117.53it/s]


Epoch 61, Loss: 1.087505462132101


Epoch 62: 100%|██████████| 381/381 [00:03<00:00, 122.26it/s]


Epoch 62, Loss: 0.6672896251590859


Epoch 63: 100%|██████████| 381/381 [00:03<00:00, 122.67it/s]


Epoch 63, Loss: 0.5113714837965377


Epoch 64: 100%|██████████| 381/381 [00:03<00:00, 122.63it/s]


Epoch 64, Loss: 0.47458406610125947


Epoch 65: 100%|██████████| 381/381 [00:03<00:00, 118.05it/s]


Epoch 65, Loss: 0.4680851247366958


Epoch 66: 100%|██████████| 381/381 [00:03<00:00, 120.66it/s]


Epoch 66, Loss: 0.46794380775586825


Epoch 67: 100%|██████████| 381/381 [00:03<00:00, 122.20it/s]


Epoch 67, Loss: 0.4693893419476006


Epoch 68: 100%|██████████| 381/381 [00:03<00:00, 122.36it/s]


Epoch 68, Loss: 0.4714431255821168


Epoch 69: 100%|██████████| 381/381 [00:03<00:00, 120.27it/s]


Epoch 69, Loss: 0.47304683595191777


Epoch 70: 100%|██████████| 381/381 [00:03<00:00, 118.11it/s]


Epoch 70, Loss: 0.4748027772102456


Epoch 71: 100%|██████████| 381/381 [00:03<00:00, 122.38it/s]


Epoch 71, Loss: 0.4768062836229019


Epoch 72: 100%|██████████| 381/381 [00:03<00:00, 122.15it/s]


Epoch 72, Loss: 0.47870971626184117


Epoch 73: 100%|██████████| 381/381 [00:03<00:00, 122.27it/s]


Epoch 73, Loss: 0.48519621081552483


Epoch 74: 100%|██████████| 381/381 [00:03<00:00, 117.50it/s]


Epoch 74, Loss: 0.9721051234742162


Epoch 75: 100%|██████████| 381/381 [00:03<00:00, 121.30it/s]


Epoch 75, Loss: 0.7950231841230017


Epoch 76: 100%|██████████| 381/381 [00:03<00:00, 121.83it/s]


Epoch 76, Loss: 0.5449521210719281


Epoch 77: 100%|██████████| 381/381 [00:03<00:00, 122.24it/s]


Epoch 77, Loss: 0.47689990312095704


Epoch 78: 100%|██████████| 381/381 [00:03<00:00, 119.39it/s]


Epoch 78, Loss: 0.46138196639970847


Epoch 79: 100%|██████████| 381/381 [00:03<00:00, 118.64it/s]


Epoch 79, Loss: 0.4591881012900921


Epoch 80: 100%|██████████| 381/381 [00:03<00:00, 122.32it/s]


Epoch 80, Loss: 0.4602766765227781


Epoch 81: 100%|██████████| 381/381 [00:03<00:00, 122.30it/s]


Epoch 81, Loss: 0.4618484919152548


Epoch 82: 100%|██████████| 381/381 [00:03<00:00, 121.15it/s]


Epoch 82, Loss: 0.46454936516253653


Epoch 83: 100%|██████████| 381/381 [00:03<00:00, 116.68it/s]


Epoch 83, Loss: 0.4660627665169283


Epoch 84: 100%|██████████| 381/381 [00:03<00:00, 122.01it/s]


Epoch 84, Loss: 0.4675211665511444


Epoch 85: 100%|██████████| 381/381 [00:03<00:00, 122.02it/s]


Epoch 85, Loss: 0.46849919233735154


Epoch 86: 100%|██████████| 381/381 [00:03<00:00, 122.35it/s]


Epoch 86, Loss: 0.470403227515108


Epoch 87: 100%|██████████| 381/381 [00:03<00:00, 117.58it/s]


Epoch 87, Loss: 0.4948697485948798


Epoch 88: 100%|██████████| 381/381 [00:03<00:00, 118.29it/s]


Epoch 88, Loss: 1.0476293401924643


Epoch 89: 100%|██████████| 381/381 [00:03<00:00, 121.98it/s]


Epoch 89, Loss: 0.7100243900861014


Epoch 90: 100%|██████████| 381/381 [00:03<00:00, 121.37it/s]


Epoch 90, Loss: 0.5134165624151705


Epoch 91: 100%|██████████| 381/381 [00:03<00:00, 118.74it/s]


Epoch 91, Loss: 0.46539674671929027


Epoch 92: 100%|██████████| 381/381 [00:03<00:00, 114.98it/s]


Epoch 92, Loss: 0.45495200978489375


Epoch 93: 100%|██████████| 381/381 [00:03<00:00, 121.23it/s]


Epoch 93, Loss: 0.4538755519340045


Epoch 94: 100%|██████████| 381/381 [00:03<00:00, 121.32it/s]


Epoch 94, Loss: 0.4541811232022413


Epoch 95: 100%|██████████| 381/381 [00:03<00:00, 120.88it/s]


Epoch 95, Loss: 0.4561516896789781


Epoch 96: 100%|██████████| 381/381 [00:03<00:00, 116.11it/s]


Epoch 96, Loss: 0.45782650549580733


Epoch 97: 100%|██████████| 381/381 [00:03<00:00, 119.25it/s]


Epoch 97, Loss: 0.460486664351203


Epoch 98: 100%|██████████| 381/381 [00:03<00:00, 121.67it/s]


Epoch 98, Loss: 0.4609795456796181


Epoch 99: 100%|██████████| 381/381 [00:03<00:00, 122.44it/s]


Epoch 99, Loss: 0.4634316242116643


Epoch 100: 100%|██████████| 381/381 [00:03<00:00, 119.60it/s]


Epoch 100, Loss: 0.4695395200740634


Epoch 101: 100%|██████████| 381/381 [00:03<00:00, 117.67it/s]


Epoch 101, Loss: 0.8259669322510717


Epoch 102: 100%|██████████| 381/381 [00:03<00:00, 122.14it/s]


Epoch 102, Loss: 0.7930482701053769


Epoch 103: 100%|██████████| 381/381 [00:03<00:00, 122.37it/s]


Epoch 103, Loss: 0.5438070289419079


Epoch 104: 100%|██████████| 381/381 [00:03<00:00, 120.87it/s]


Epoch 104, Loss: 0.4712686844541645


Epoch 105: 100%|██████████| 381/381 [00:03<00:00, 117.99it/s]


Epoch 105, Loss: 0.4530093816165223


Epoch 106: 100%|██████████| 381/381 [00:03<00:00, 122.33it/s]


Epoch 106, Loss: 0.45005010213126034


Epoch 107: 100%|██████████| 381/381 [00:03<00:00, 122.65it/s]


Epoch 107, Loss: 0.4505675454230446


Epoch 108: 100%|██████████| 381/381 [00:03<00:00, 122.42it/s]


Epoch 108, Loss: 0.45201682004090055


Epoch 109: 100%|██████████| 381/381 [00:03<00:00, 118.14it/s]


Epoch 109, Loss: 0.45323388470126574


Epoch 110: 100%|██████████| 381/381 [00:03<00:00, 119.35it/s]


Epoch 110, Loss: 0.4553277464363519


Epoch 111: 100%|██████████| 381/381 [00:03<00:00, 122.52it/s]


Epoch 111, Loss: 0.45692210521284987


Epoch 112: 100%|██████████| 381/381 [00:03<00:00, 122.27it/s]


Epoch 112, Loss: 0.4577328791142762


Epoch 113: 100%|██████████| 381/381 [00:03<00:00, 121.49it/s]


Epoch 113, Loss: 0.45977003070626044


Epoch 114: 100%|██████████| 381/381 [00:03<00:00, 117.10it/s]


Epoch 114, Loss: 0.46269249876966


Epoch 115: 100%|██████████| 381/381 [00:03<00:00, 122.44it/s]


Epoch 115, Loss: 0.7550052140797843


Epoch 116: 100%|██████████| 381/381 [00:03<00:00, 122.39it/s]


Epoch 116, Loss: 0.8980800762889892


Epoch 117: 100%|██████████| 381/381 [00:03<00:00, 122.41it/s]


Epoch 117, Loss: 0.5720567183857515


Epoch 118: 100%|██████████| 381/381 [00:03<00:00, 118.30it/s]


Epoch 118, Loss: 0.47658150222670687


Epoch 119: 100%|██████████| 381/381 [00:03<00:00, 119.26it/s]


Epoch 119, Loss: 0.4510489528573404


Epoch 120: 100%|██████████| 381/381 [00:03<00:00, 122.38it/s]


Epoch 120, Loss: 0.4456651453271


Epoch 121: 100%|██████████| 381/381 [00:03<00:00, 122.53it/s]


Epoch 121, Loss: 0.4462170222299931


Epoch 122: 100%|██████████| 381/381 [00:03<00:00, 121.72it/s]


Epoch 122, Loss: 0.44750153525607794


Epoch 123: 100%|██████████| 381/381 [00:03<00:00, 116.83it/s]


Epoch 123, Loss: 0.44893405232529626


Epoch 124: 100%|██████████| 381/381 [00:03<00:00, 122.55it/s]


Epoch 124, Loss: 0.4508254889428146


Epoch 125: 100%|██████████| 381/381 [00:03<00:00, 122.87it/s]


Epoch 125, Loss: 0.4524926371931091


Epoch 126: 100%|██████████| 381/381 [00:03<00:00, 122.78it/s]


Epoch 126, Loss: 0.4538583539602325


Epoch 127: 100%|██████████| 381/381 [00:03<00:00, 118.22it/s]


Epoch 127, Loss: 0.45557168313837426


Epoch 128: 100%|██████████| 381/381 [00:03<00:00, 120.23it/s]


Epoch 128, Loss: 0.45742384209407594


Epoch 129: 100%|██████████| 381/381 [00:03<00:00, 122.57it/s]


Epoch 129, Loss: 0.4777488180502193


Epoch 130: 100%|██████████| 381/381 [00:03<00:00, 122.75it/s]


Epoch 130, Loss: 0.9979320030825658


Epoch 131: 100%|██████████| 381/381 [00:03<00:00, 119.76it/s]


Epoch 131, Loss: 0.6896268583032403


Epoch 132: 100%|██████████| 381/381 [00:03<00:00, 117.99it/s]


Epoch 132, Loss: 0.5067894397758124


Epoch 133: 100%|██████████| 381/381 [00:03<00:00, 122.45it/s]


Epoch 133, Loss: 0.4582986585267885


Epoch 134: 100%|██████████| 381/381 [00:03<00:00, 122.56it/s]


Epoch 134, Loss: 0.4458100503198118


Epoch 135: 100%|██████████| 381/381 [00:03<00:00, 122.21it/s]


Epoch 135, Loss: 0.4438068323873785


Epoch 136: 100%|██████████| 381/381 [00:03<00:00, 117.36it/s]


Epoch 136, Loss: 0.44397081407349254


Epoch 137: 100%|██████████| 381/381 [00:03<00:00, 121.08it/s]


Epoch 137, Loss: 0.44513933747772155


Epoch 138: 100%|██████████| 381/381 [00:03<00:00, 122.53it/s]


Epoch 138, Loss: 0.44722911965815726


Epoch 139: 100%|██████████| 381/381 [00:03<00:00, 122.47it/s]


Epoch 139, Loss: 0.4485325238757872


Epoch 140: 100%|██████████| 381/381 [00:03<00:00, 119.65it/s]

Epoch 140, Loss: 0.44968357025169015





In [33]:

# Inference
import random

def generate_ghazal(input_misra, model, vocab, max_lines=1):
    model.eval()
    tokens = tokenize(clean_text(input_misra))
    encoded = torch.tensor(pad_sequence_custom(encode(tokens)), dtype=torch.long).unsqueeze(0).to(device)
    generated_ghazal = [input_misra]

    for _ in range(max_lines):
        with torch.no_grad():
            output = model(encoded)
            predicted_ids = torch.argmax(output, dim=2).squeeze(0).tolist()
        predicted_tokens = [list(vocab.keys())[list(vocab.values()).index(i)] for i in predicted_ids if i != 0]
        generated_misra = " ".join(predicted_tokens)
        generated_ghazal.append(generated_misra)
        encoded = torch.tensor(pad_sequence_custom(encode(predicted_tokens)), dtype=torch.long).unsqueeze(0).to(device)
    return "\n".join(generated_ghazal)

# Example Usage
input_misra = input("برائے مہربانی ایک مصرع درج کریں: ")
print(generate_ghazal(input_misra, model, vocab))


برائے مہربانی ایک مصرع درج کریں: چاندنی راتوں میں تنہا بیٹھا ہوں
چاندنی راتوں میں تنہا بیٹھا ہوں
اج بھی میرے صبح و میرا گیا ہے گا ایا ایا ایا ایا پایا نکلے نکلے گے پایا


In [31]:
# Save the model
model_path = "/content/drive/MyDrive/Urdu Poetry/ghazal_generator.pth"
torch.save(model.state_dict(), model_path)
print(f"Model saved at {model_path}")

Model saved at /content/drive/MyDrive/Urdu Poetry/ghazal_generator.pth
