# Required libraries importing and installing packages

In [None]:
!pip install uniseg
!pip install wandb --upgrade
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import unicodedata
import re
import numpy as np
import os
import io
import time
import random
import shutil
from matplotlib.font_manager import FontProperties
from IPython.display import HTML as html_print
from IPython.display import display
import wandb

#!wandb login --relogin

# Downloading data set


In [None]:
!curl https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar --output dakshina.tar
!tar -xvf  'dakshina.tar'
!wget "https://github.com/N-Chandru/DeepLearning/raw/main/Nirmala.ttf"

In [None]:
# Set the file paths to train, validation and test dataset
train_path = "/content/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv"
vaildation_path = "/content/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv"
test_file_path = "/content/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv"

# DataPreprocssing and Tokenising

In [None]:
class Dataprocessing:
  def __init__(self, path) -> None:
      self.path = path

  def tokeniser(self, language):

    tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='', char_level=True)
    tokenizer.fit_on_texts(language)

    tensor = tokenizer.texts_to_sequences(language)
    tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor, padding='post')
    return tensor, tokenizer
  
  def dataloading(self):

    # creating pairs of target word,input word. E.g. [['\tअं\n', '\tan\n']
    Rows = io.open(self.path, encoding='UTF-8').read().strip().split('\n')
    words = [[ '\t' + word + '\n' for word in Row.split('\t')[:-1]] for Row in Rows[:-1]]
    output_lang, input_lang = zip(*words)

    input_tensor, input_tokenizer = self.tokeniser(input_lang)
    output_tensor, output_tokenizer = self.tokeniser(output_lang)
    return input_tensor, input_tokenizer, output_tensor, output_tokenizer



In [None]:
# loading data
Data = Dataprocessing(train_path)
input_tensor_train, input_lang,  output_tensor_train, output_lang = Data.dataloading()

max_length_output, max_length_input = output_tensor_train.shape[1], input_tensor_train.shape[1]

# Show length
print(len(input_tensor_train), len(output_tensor_train))

44203 44203


# Encoder class for generating encoder layers

In [None]:
class Encoder(tf.keras.Model):
  def __init__(self, rnn, vocabulary, embedding, Latent, Batch_size, dropout):
    super(Encoder, self).__init__()
    self.Batch_size = Batch_size
    self.Latent = Latent
    self.rnn = rnn
    self.Embedding = tf.keras.layers.Embedding(vocabulary, embedding)
    self.hidden = tf.zeros((self.Batch_size, self.Latent))
    if rnn == 'LSTM':
      self.encoder = tf.keras.layers.LSTM(self.Latent, return_sequences=True, 
                         return_state=True, recurrent_initializer='glorot_uniform',
                         dropout = dropout)
    elif rnn == 'GRU':
      self.encoder = tf.keras.layers.GRU(self.Latent, return_sequences=True,
                                   return_state=True, recurrent_initializer='glorot_uniform',
                                   dropout = dropout)
    else:
      self.encoder = tf.keras.layers.SimpleRNN(self.Latent, return_sequences=True, 
                         return_state=True, recurrent_initializer='glorot_uniform',
                         dropout = dropout)
      
  def call(self, input, hidden, state):
    temp = self.Embedding(input)
    if self.rnn != 'LSTM':
      return self.encoder(temp, initial_state = hidden)
    else:
      return self.encoder(temp, initial_state = [hidden, state])
      

# class for attention layer

In [None]:
# Definition of Attention Layer
class BahdanauAttention(tf.keras.layers.Layer):
  def __init__(self, latent):
    super(BahdanauAttention, self).__init__()
    self.W1 = tf.keras.layers.Dense(latent)
    self.W2 = tf.keras.layers.Dense(latent)
    self.V = tf.keras.layers.Dense(1)

  def call(self, query, encoder_output):

    # we are doing this to broadcast addition along the time axis to calculate the score
    # score shape == (batch_size, max_length, 1)
    score = self.V(tf.nn.tanh(self.W1(tf.expand_dims(query, 1)) + self.W2(encoder_output)))

    # attention_weights shape == (batch_size, max_length, 1)
    attention_weights = tf.nn.softmax(score, axis=1)

    # context_vector shape after sum == (batch_size, hidden_size)
    context_vector = attention_weights * encoder_output
    context_vector = tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights

# class for decoder layer

In [None]:
class Decoder(tf.keras.Model):

  def __init__(self, rnn, vocabulary, embedding, Latent, Batch_size, dropout):
    super(Decoder, self).__init__()
    self.Batch_size = Batch_size
    self.Latent = Latent
    self.rnn = rnn
    self.Embedding = tf.keras.layers.Embedding(vocabulary, embedding)
    self.hidden = tf.zeros((self.Batch_size, self.Latent))
    self.fully_connected = tf.keras.layers.Dense(vocabulary)
    self.attention = BahdanauAttention(self.Latent)

    if rnn == 'LSTM':
      self.decoder = tf.keras.layers.LSTM(self.Latent, return_sequences=True, 
                         return_state=True, recurrent_initializer='glorot_uniform',
                         dropout = dropout)
    elif rnn == 'GRU':
      self.decoder = tf.keras.layers.GRU(self.Latent, return_sequences=True,
                                   return_state=True, recurrent_initializer='glorot_uniform',
                                   dropout = dropout)
    else:
      self.decoder = tf.keras.layers.SimpleRNN(self.Latent, return_sequences=True, 
                         return_state=True, recurrent_initializer='glorot_uniform',
                         dropout = dropout)
      
  def call(self, input, hidden, encoder_output, state):

    # enc_output shape == (batch_size, max_length, hidden_size)
    context_vector, attention_weights = self.attention(hidden, encoder_output)

    # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
    temp = tf.concat([tf.expand_dims(context_vector, 1), self.Embedding(input)], axis=-1)

    if self.rnn !='LSTM':
      output, state = self.decoder(temp)
      temp = self.fully_connected(tf.reshape(output, (-1, output.shape[2])))
      return temp, state, attention_weights
    
    else:
      output, last_hidden, state = self.decoder(temp, initial_state=[hidden, state])
      temp = self.fully_connected(tf.reshape(output, (-1, output.shape[2])))
      return temp, [last_hidden, state], attention_weights


In [None]:
# Reference: https://stackoverflow.com/questions/62916592/loss-function-for-sequences-in-tensorflow-2-0
def calculate_loss(real, pred):
  position = tf.math.logical_not(tf.math.equal(real, 0))
  loss = loss_object(real, pred)

  position = tf.cast(position, dtype=loss.dtype)
  loss *= position

  return tf.reduce_mean(loss)

# train a model

In [None]:
def train(use_wandb=True):
    
    global Batch_size 
    global latent 
    global vocab_input_size
    global vocab_output_size
    global embedding
    global encoder
    global decoder
    global optimizer
    global loss_object
    global checkpoint_dir
    global checkpoint_prefix 
    global checkpoint
    global run_name
    global rnn_type

    if use_wandb==True:
      # initialising the wandb run
      run = wandb.init()
      # Type of RNN to choose. Acceptable Values are 'RNN'. 'LSTM' and 'GRU'
      rnn_type = run.config.cell
      # Batch size for training.
      Batch_size = run.config.Batch_size
      # Dimensions of the abstract representation of the input word and target word.
      embedding = run.config.Embedding
      # Latent dimensions of the encoder and decoder.
      latent = run.config.Latent
      # Number of epochs to train for.
      epochs = run.config.epochs
      #	Float between 0 and 1. Denotes the fraction of the units to drop.
      dropout = run.config.dropout
    else:
      rnn_type = 'LSTM'
      Batch_size = 64
      embedding = 512
      latent = 1024
      epochs = 20
      dropout = 0.2
    
    BUFFER_SIZE = len(input_tensor_train)
    steps_per_epoch = len(input_tensor_train)//Batch_size
    vocab_input_size = len(input_lang.word_index)+1
    vocab_output_size = len(output_lang.word_index)+1
    
    run_name = '_epochs_'+str(epochs)+'_rnn_type_'+str(rnn_type)+'_bs_'+str(Batch_size)+'_embed_'+str(embedding)+'_latent_'+str(latent)+'_dropout_'+str(dropout)
    if use_wandb==True:
      wandb.run.name = run_name

    """ We are using Python iterable object called Dataset.  
    The training datapoints are chosen uniformly at random.""" 
    dataset = tf.data.Dataset.from_tensor_slices((input_tensor_train, output_tensor_train)).shuffle(BUFFER_SIZE)
    # We create batches of size BATCH_SIZE and ignore the last batch because the last batch may not be equal to BATCH_SIZE
    dataset = dataset.batch(Batch_size, drop_remainder=True)
    
    """ Build model
    We are explicitly creating a Python iterator using iter and consuming its elements using next. 
    For Hindi: TensorShape([64, 22]), TensorShape([64, 21]) is the shape of train_input_batch and train_target_batch respectively."""
    train_input_batch, train_output_batch = next(iter(dataset))
    
    # encoder and decoder
    encoder = Encoder(rnn_type, vocab_input_size, embedding, latent, Batch_size, dropout)
    decoder = Decoder(rnn_type, vocab_output_size, embedding, latent, Batch_size, dropout)

    if rnn_type != 'LSTM':
      output, hidden = encoder(train_input_batch, encoder.hidden, encoder.hidden)
      decoder_output, _, _ = decoder(tf.random.uniform((Batch_size, 1)), hidden, output, output)
    else:
      output, hidden, state = encoder(train_input_batch, encoder.hidden, encoder.hidden)
      decoder_output, _, _ = decoder(tf.random.uniform((Batch_size, 1)), hidden, output, state)
    
    optimizer = tf.keras.optimizers.Adam()
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
    
    train_loss=[0]*epochs
    
    # Start training
    for epoch in range(epochs):
      start = time.time()
      total_loss = 0
      for (batch, (inp, out)) in enumerate(dataset.take(steps_per_epoch)):
        batch_loss = train_every_step(inp, out, encoder, decoder, rnn_type)
        total_loss += batch_loss

      # Storing the average loss per epoch
      train_loss[epoch] = total_loss.numpy()/steps_per_epoch
      if use_wandb == True:
        wandb.log({"train_loss": total_loss.numpy()/steps_per_epoch})

    val_acc=validate(vaildation_path,rnn_type)
    print("Train loss: ",train_loss)
    print("Validation Accuracy: ",val_acc)

    if use_wandb ==True:
      wandb.log({'val_acc': val_acc})
    


In [None]:
@tf.function
def train_every_step(inp, targ, enocder, decoder,rnn_type):
  loss = 0

  with tf.GradientTape() as tape:
        if rnn_type!='LSTM':
            enc_output, enc_hidden = encoder(inp, encoder.hidden, encoder.hidden)
            dec_hidden = enc_hidden
        elif rnn_type=='LSTM':
            enc_output, enc_hidden, enc_cell_state = encoder(inp, encoder.hidden, encoder.hidden)
            dec_hidden = enc_hidden
            dec_cell_state = enc_cell_state
        dec_input = tf.expand_dims([output_lang.word_index['\t']] * Batch_size, 1)
        
        # Teacher forcing - passing the target as the next input
        for t in range(1, targ.shape[1]):
            if rnn_type!='LSTM':
                # passing enc_output to the decoder
                predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output, enc_output)
            elif rnn_type=='LSTM':
                if t==1:
                  # passing enc_output to the decoder
                  predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output,dec_cell_state)
                elif t>1:
                  predictions, dec_hidden, _ = decoder(dec_input, dec_hidden[0], enc_output,dec_cell_state)
            loss += calculate_loss(targ[:, t], predictions)
            # using teacher forcing
            dec_input = tf.expand_dims(targ[:, t], 1)
  batch_loss = (loss / int(targ.shape[1]))
  variables = encoder.trainable_variables + decoder.trainable_variables
  gradients = tape.gradient(loss, variables)
  optimizer.apply_gradients(zip(gradients, variables))
  return batch_loss

In [None]:
# Code for inference model.
def inference_model(input_word,rnn_type):
  attention_plot = np.zeros((max_length_output, max_length_input))

  input_word = '\t'+input_word+'\n'

  inputs = [input_lang.word_index[i] for i in input_word]
  inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs],
                                                         maxlen=max_length_input,
                                                         padding='post')
  inputs = tf.convert_to_tensor(inputs)

  predicted_word = ''
  
  if rnn_type!='LSTM':
    
    hidden = [tf.zeros((1, latent))]
    enc_out, enc_hidden = encoder(inputs, hidden, hidden)
    dec_hidden = enc_hidden
  elif rnn_type=='LSTM':
    hidden=tf.zeros((1, latent))
    cell_state= tf.zeros((1, latent)) 
    enc_out, enc_hidden,enc_cell_state = encoder(inputs, hidden, cell_state)
    dec_hidden = enc_hidden

  dec_input = tf.expand_dims([output_lang.word_index['\t']], 0)

  att_w=[]

  for t in range(max_length_output):
    if rnn_type!='LSTM':
      predictions, dec_hidden, attention_weights = decoder(dec_input,dec_hidden,enc_out, enc_out)
    elif rnn_type=='LSTM':
      predictions, dec_hidden, attention_weights = decoder(dec_input, dec_hidden, enc_out, enc_cell_state)
      dec_hidden=dec_hidden[0]

    # storing the attention weights to plot later on
    attention_weights = tf.reshape(attention_weights, (-1, ))
    attention_plot[t] = attention_weights.numpy()
    att_w.append(attention_weights.numpy()[0:len(input_word)])
    

    predicted_id = tf.argmax(predictions[0]).numpy()

    predicted_word += output_lang.index_word[predicted_id] 

    if output_lang.index_word[predicted_id] == '\n':
      return predicted_word, input_word, attention_plot,att_w

    # the predicted ID is fed back into the model
    dec_input = tf.expand_dims([predicted_id], 0)

  return predicted_word, input_word, attention_plot,att_w

# does validation

In [None]:
import shutil
def validate(path_to_file,folder_name):
  save = False
  if path_to_file.find("test")!=-1:
    if os.path.exists(os.path.join(os.getcwd(),"predictions_attention",str(folder_name))):
      shutil.rmtree(os.path.join(os.getcwd(),"predictions_attention",str(folder_name)))
      
    if not os.path.exists(os.path.join(os.getcwd(),"predictions_attention")):
        os.mkdir(os.path.join(os.getcwd(),"predictions_attention"))
    os.mkdir(os.path.join(os.getcwd(),"predictions_attention",str(folder_name)))
    success_file = open(os.path.join(os.getcwd(),"predictions_attention",str(folder_name),"success.txt"),"w",encoding='utf-8', errors='ignore')
    failure_file = open(os.path.join(os.getcwd(),"predictions_attention",str(folder_name),"failure.txt"),"w",encoding='utf-8', errors='ignore')
    save=True
    
  success_count=0
  # Get the target words and input words for the validation
  
  Rows = io.open(path_to_file, encoding='UTF-8').read().strip().split('\n')
  words = [[ '\t' + word + '\n' for word in Row.split('\t')[:-1]] for Row in Rows[:-1]]
  target_words, input_words = zip(*words)
  for i in range(len(input_words)):
    predicted_word, input_word, attention_plot,att_w = inference_model(input_words[i],rnn_type)
    record= input_word.strip()+' '+target_words[i].strip()+' '+predicted_word[:-1].strip()+"\n"
    # The last character of target_words[i] and predicted word is '\n', first character of target_words[i] is '\t'
    if target_words[i][1:]==predicted_word:
      success_count = success_count + 1
      if save == True:
        success_file.write(record)
    elif save==True:
      failure_file.write(record)

  if save==True:
    success_file.close()
    failure_file.close()
    
  return success_count/len(input_words)

# sweep config

In [None]:
sweep_config = {
    "name": "Bayesian Sweep with attention2",
    "method":"bayes",
    "metric": {
        "name": "val_acc",
        "goal":"maximize"
    },
    "parameters": {
        "cell": {"values": ["LSTM", "GRU", "RNN"]},
        "Embedding": {"values": [512, 256]},
        "Latent": {"values": [1024, 512]},
        "dropout": {"values": [ 0.2, 0, 0.3]},
        "epochs": {"values": [ 20, 25, 30]},
        "Batch_size": {"values": [64, 128]}
    }
}

# run below cell for wandb logging

In [None]:
sweep_id = wandb.sweep(sweep_config, project="CS6910-Assignment-3")
wandb.agent(sweep_id, train)
train(use_wandb=True)

# manuall training

In [None]:
# train for manual
train(False)

Train loss:  [0.3808153235394022, 0.13497457089631454, 0.1006564596424932, 0.08152009162349977, 0.06136967202891474, 0.047500317338584126, 0.03963190990945567, 0.03388768071713655, 0.03201230090597401, 0.02814222142316293, 0.026932909868765568, 0.02445453975511634, 0.024141676529594088, 0.02290628267371136, 0.02349087535471156, 0.021879335762797922, 0.022647387739540874, 0.020941703906957654, 0.0196258544921875, 0.019092128587805707]
Validation Accuracy:  0.40601331191186596


# plotting function

In [None]:
def plot_attention(attention, input_word, predicted_word, file_name):
  hindi_font = FontProperties(fname = os.path.join(os.getcwd(),"Nirmala.ttf"))
  
  fig = plt.figure(figsize=(3, 3))
  ax = fig.add_subplot(1, 1, 1)
  ax.matshow(attention, cmap='viridis')
  
  fontdict = {'fontsize': 14}

  ax.set_xticklabels([''] + list(input_word), fontdict=fontdict, rotation=0)
  ax.set_yticklabels([''] + list(predicted_word), fontdict=fontdict,fontproperties=hindi_font)

  ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
  ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

  plt.savefig(file_name)
  wandb.init()
  wandb.log({"images": wandb.Image(fig)})
  wandb.finish()
  plt.show()

In [None]:
# get html element
def cstr(s, color='black'):
	if s == ' ':
		return "<text style=color:#000;padding-left:10px;background-color:{}> </text>".format(color, s)
	else:
		return "<text style=color:#000;background-color:{}>{} </text>".format(color, s)
	
# print html
def print_color(t):
	display(html_print(''.join([cstr(ti, color=ci) for ti,ci in t])))

# get appropriate color for value
# Darker shades of green denotes higher importance.
def get_clr(value):
	colors = ['#85c2e1', '#89c4e2', '#95cae5', '#99cce6', '#a1d0e8',
		'#b2d9ec', '#baddee', '#c2e1f0', '#eff7fb', '#f9e8e8',
		'#f9e8e8', '#f9d4d4', '#f9bdbd', '#f8a8a8', '#f68f8f',
		'#f47676', '#f45f5f', '#f34343', '#f33b3b', '#f42e2e']
	value = int((value * 100) / 5)
	return colors[value]


def visualize(input_word, output_word, att_w):
  for i in range(len(output_word)):
    print("\nOutput character:", output_word[i], "\n")
    text_colours = []
    for j in range(len(att_w[i])):
      text = (input_word[j], get_clr(att_w[i][j]))
      text_colours.append(text)
    print_color(text_colours)

In [None]:
# get appropriate color for value
# Darker shades of green denotes higher importance.
def get_shade_color(value):
	colors = ['#00fa00', '#00f500',  '#00eb00', '#00e000',  '#00db00',  
           '#00d100',  '#00c700',  '#00c200', '#00b800',  '#00ad00',  
           '#00a800',  '#009e00',  '#009400', '#008f00',  '#008500',
           '#007500',  '#007000',  '#006600', '#006100',  '#005c00',  
           '#005200',  '#004d00',  '#004700', '#003d00',  '#003800',  
           '#003300',  '#002900',  '#002400',  '#001f00',  '#001400']
	value = int((value * 100) / 5)
	return colors[value]

def create_file(text_colors,input_word,output_word,file_path=os.getcwd()):
  text = '''
  <!DOCTYPE html>
  <html>
  <head>
    <meta charset="UTF-8"> 
    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
    <script>
            $(document).ready(function(){
            var col =['''
  for k in range(3):
      for i in range(len(output_word)):
              text=text+'''['''
              for j in range(len(text_colors[k][i])-1):
                text=text+'''\"'''+text_colors[k][i][j]+'''\"'''+''','''
              text=text+'''\"'''+text_colors[k][i][len(text_colors[k][i])-1]+'''\"'''+'''],'''
  text=text[0:-1]
  text=text+'''];\n'''
  
  for k in range(3):
      for i in range(len(output_word[k])):
            text=text+'''$(\".h'''+str(k)+str(i)+'''\").mouseover(function(){\n'''
            for j in range(len(input_word[k])):
                       text=text+'''$(\".t'''+str(k)+str(j)+'''\").css(\"background-color\", col['''+str(i)+''']'''+'''['''+str(j)+''']);\n'''
            text=text+'''});\n'''
            text=text+'''$(\".h'''+str(k)+str(i)+'''\").mouseout(function(){\n'''
            for l in range(3):
              for j in range(len(input_word[l])):
                text=text+'''$(\".t'''+str(l)+str(j)+'''\").css(\"background-color\", \"#ffff99\");\n'''
            text=text+'''});\n'''
  text=text+'''});\n
</script>
  </head>
      <body>
          <h1>Connectivity:</h1>
          <p> The connection strength between the target for the selected character and the input characters is highlighted in green (reset). Hover over the text to change the selected character.</p>
          <div style="background-color:#ffff99;color:black;padding:2%; margin:4%;">
          <p>
          <div> Output: </div>
          <div style='display:flex; border: 2px solid #d0cccc; padding: 8px; margin: 8px;'>
          '''
  for k in range(3):
      for i in range(len(output_word[k])):
            text=text+'''\n'''+'''\t'''+'''<div class="h'''+str(k)+str(i)+'''\">'''+output_word[k][i]+'''</div>'''
      text=text+'''</div>'''+'\n'+'\t'+'''<div>  </p>'''+'\n'+'\t'+'''<p>
      <div> Input: </div>
      <div style='display:flex; border: 2px solid #d0cccc; padding: 8px; margin: 8px;'>'''    
      for j in range(len(input_word[k])):
        text=text+'''\n'''+'''\t'''+'''<div class="t'''+str(k)+str(j)+'''\">'''+input_word[k][j]+'''</div>'''
      if k<2:
          text = text+'''</div></p></div><p></p></div>
          <div style="background-color:#ffff99;color:black;padding:2%; margin:4%;">
          <div> Output: </div>
          <div style='display:flex; border: 2px solid #d0cccc; padding: 8px; margin: 8px;'>'''
  text=text+'''
        </div>
        </p>
        </div>
        </body>
  </html>
  '''
  fname = os.path.join(file_path,"connectivity.html")
  file = open(fname,"w")
  file.write(text)
  file.close()

def connectivity(input_words,rnn_type,file_path):
  color_list=[]
  input_word_list=[]
  output_word_list=[]
  for k in range(3):
    output_word, input_word, _ ,att_w = inference_model(input_words[k],rnn_type)
    text_colours=[]
    for i in range(len(output_word)):
      colour=[]
      for j in range(len(att_w[i])):
        value=get_shade_color(att_w[i][j])
        colour.append(value)
      text_colours.append(colour)
    color_list.append(text_colours)
    input_word_list.append(input_word)
    output_word_list.append(output_word)
  create_file(color_list,input_word_list,output_word_list,file_path)

In [None]:
def transliterate(input_word,rnn_type,file_name=os.path.join(os.getcwd(),"attention_heatmap.png"),visual_flag=True):
  predicted_word, input_word, attention_plot,att_w = inference_model(input_word,rnn_type)

  print("\n",'Input:', input_word)
  print('Predicted transliteration:', predicted_word)

  attention_plot = attention_plot[:len(predicted_word),
                                  :len(input_word)]
  plot_attention(attention_plot, input_word, predicted_word, file_name)

  if visual_flag == True:
    visualize(input_word, predicted_word, att_w)

In [None]:
def generate_inputs(rnn_type,n_test_samples=10):
  Rows = io.open(test_file_path, encoding='UTF-8').read().strip().split('\n')
  words = [[ '\t' + word + '\n' for word in Row.split('\t')[:-1]] for Row in Rows[:-1]]
  target_words, input_words = zip(*words)

  for i in range (n_test_samples):
    index = random.randint(0,len(input_words))
    input_word=input_words[index]
    file_name=os.path.join(os.getcwd(),"predictions_attention",str(run_name),input_word+".png")
    
    if i == 0:
      transliterate(input_word[1:-1],rnn_type, file_name,True)
    elif i > 0:
      transliterate(input_word[1:-1],rnn_type, file_name,False)

# Run for test accuray and plots for test dataa 

In [None]:
!wandb login --relogin
val_acc=validate(test_file_path,run_name)
generate_inputs(rnn_type,10)
connectivity(['anjali','underwear','agastya'],rnn_type, os.path.join(os.getcwd(),"predictions_attention",str(run_name)))

In [None]:
print(val_acc)

0.4056876249722284


# logging visualization

In [None]:
wandb.init(project = 'CS6910-Assignment-3', name = 'html')
wandb.log({"custom_file": wandb.Html(open('/content/predictions_attention/_epochs_20_rnn_type_LSTM_bs_64_embed_512_latent_1024_dropout_0.2/connectivity.html'))})