#### Best Model with Attention

In [1]:
# importing required libraries for the notebook
import lightning as lt
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint
import torch
from language import *
from dataset_dataloader import *
from encoder_decoder import *
from plotting_code_and_helpers import *
from runner import Runner
import argparse as ap
import os, shutil

# we will ignore num_workers suggestions/warnings from pytorch-lightning
import warnings
warnings.filterwarnings("ignore")

In [2]:
# know the accelerator available - NOT USED as we have switched to lightning
device = ('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


#### Defining the source and target languages and loading data

In [3]:
# define the source and target languages
TARGET = 'tam'
SOURCE = 'eng'

In [4]:
# load all the available data and print sample counts for each set
x_train, y_train = load_data(TARGET, 'train')
x_valid, y_valid = load_data(TARGET, 'valid')
x_test, y_test = load_data(TARGET, 'test')

print(f'Number of train samples = {len(x_train)}')
print(f'Number of valid samples = {len(x_valid)}')
print(f'Number of test samples = {len(x_test)}')

Number of train samples = 51200
Number of valid samples = 4096
Number of test samples = 4096


In [5]:
# create language objects for storing vocabulary, index2sym and sym2index
SRC_LANG = Language(SOURCE)
TAR_LANG = Language(TARGET)

# creating vocabulary using train data only
SRC_LANG.create_vocabulary(*(x_train))
TAR_LANG.create_vocabulary(*(y_train))

# generate mappings from characters to numbers and vice versa
SRC_LANG.generate_mappings()
TAR_LANG.generate_mappings()

# print the source and target vocabularies
print(f'Source Vocabulary Size = {len(SRC_LANG.symbols)}')
print(f'Source Vocabulary = {SRC_LANG.symbols}')
print(f'Source Mapping {SRC_LANG.index2sym}')
print(f'Target Vocabulary Size = {len(TAR_LANG.symbols)}')
print(f'Target Vocabulary = {TAR_LANG.symbols}')
print(f'Target Mapping {TAR_LANG.index2sym}')

Source Vocabulary Size = 26
Source Vocabulary = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
Source Mapping {0: '@', 1: '$', 2: '!', 3: '%', 4: 'a', 5: 'b', 6: 'c', 7: 'd', 8: 'e', 9: 'f', 10: 'g', 11: 'h', 12: 'i', 13: 'j', 14: 'k', 15: 'l', 16: 'm', 17: 'n', 18: 'o', 19: 'p', 20: 'q', 21: 'r', 22: 's', 23: 't', 24: 'u', 25: 'v', 26: 'w', 27: 'x', 28: 'y', 29: 'z'}
Target Vocabulary Size = 46
Target Vocabulary = ['ஃ', 'அ', 'ஆ', 'இ', 'ஈ', 'உ', 'ஊ', 'எ', 'ஏ', 'ஐ', 'ஒ', 'ஓ', 'க', 'ங', 'ச', 'ஜ', 'ஞ', 'ட', 'ண', 'த', 'ந', 'ன', 'ப', 'ம', 'ய', 'ர', 'ற', 'ல', 'ள', 'ழ', 'வ', 'ஷ', 'ஸ', 'ஹ', 'ா', 'ி', 'ீ', 'ு', 'ூ', 'ெ', 'ே', 'ை', 'ொ', 'ோ', 'ௌ', '்']
Target Mapping {0: '@', 1: '$', 2: '!', 3: '%', 4: 'ஃ', 5: 'அ', 6: 'ஆ', 7: 'இ', 8: 'ஈ', 9: 'உ', 10: 'ஊ', 11: 'எ', 12: 'ஏ', 13: 'ஐ', 14: 'ஒ', 15: 'ஓ', 16: 'க', 17: 'ங', 18: 'ச', 19: 'ஜ', 20: 'ஞ', 21: 'ட', 22: 'ண', 23: 'த', 24: 'ந', 25: 'ன', 26: 'ப', 27: 'ம', 28: 'ய',

#### Load the model (from checkpoint artifacts fetched from wandb) and run inference on test data

In [6]:
# dictionary to pass to a model (instance of Runner Class)
# we use the best hypermeters from the sweep
rdict = dict(
            SOURCE=SOURCE,
            TARGET=TARGET,
            src_lang=SRC_LANG,
            tar_lang=TAR_LANG,
            common_embed_size=192,
            common_num_layers=1,
            common_hidden_size=256,
            common_cell_type='LSTM',
            init_tf_ratio= 0.7,
            enc_bidirect=True,
            attention=True,
            dropout=0.0,
            opt_name='Adam',
            learning_rate=0.002,
            batch_size=128
)

#### TESTING SECTION ####
trainer = lt.Trainer()
# load the best model (saved locally in './best_checkpoints/attention/')
run_name = 'emb=192_layers=1_hid=256_cell=LSTM_bidirectional=True_dr=0_itfr=0.7_bsize=128_att=True_opt=Adam_lr=0.002'
runner = Runner.load_from_checkpoint(f'./best_checkpoints/attention/{run_name}.ckpt', **rdict)

trainer.validate(runner) # display validation accuracy to verify with wandb - 63.50 expected
trainer.test(runner)

# get the test results and unpack it
ret_info = runner.get_test_results()
src_list, tar_true_list, tar_pred_list, attn_matrices = ret_info

# save the predictions in predictions.csv within predictions_attention folder
if os.path.exists('./predictions_attention/'):
    shutil.rmtree('./predictions_attention/')
os.mkdir('./predictions_attention/')
save_predictions_file(src_list, tar_true_list, tar_pred_list, './predictions_attention/predictions')

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

#### Log a color-coded table and attention heatmaps for wandb report

In [7]:
import wandb
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcs19b021[0m ([33mcs6700_team_2023[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [13]:
with wandb.init(project='cs6910-assignment3', entity='cs19b021', tags=['att-words-log'], name='att_compare') as run:
    word_numbers = [103, 188, 366, 401, 1176, 2281, 2691, 2985, 3374, 3787]
    df = pd.read_csv(f'./predictions_attention/predictions.csv')
    df = df[["Source", "Target", "Predicted"]]
    df.insert(0, "Word number", [i for i in range(1, len(df)+1)]) 
    f1, f2 = generate_table_and_legend(df, locs=[(r-1) for r in word_numbers])
    wandb.log({"word-table" : f1})
    wandb.log({"legend" : f2})
    run.finish()

VBox(children=(Label(value='0.008 MB of 0.026 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.319800…

In [9]:
with wandb.init(project='cs6910-assignment3', entity='cs19b021', tags=['att-heatmap'], name='best_attention_model_heatmaps') as run:
    word_numbers = [3602, 947, 2808, 1414, 279, 393, 57, 109, 3144, 1612]
    fig = generate_heatmap_grid(src_list, tar_true_list, tar_pred_list, attn_matrices, word_numbers)
    wandb.log({"attention-plot" : fig})
    run.finish()

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669640916613087, max=1.0…

VBox(children=(Label(value='0.029 MB of 0.036 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.797941…