In [None]:
!curl --header "Host: storage.googleapis.com" --header "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36 Edg/89.0.774.77" --header "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9" --header "Accept-Language: en-US,en;q=0.9" --header "Referer: https://github.com/google-research-datasets/dakshina" "https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar" -L -o "dakshina_dataset_v1.0.tar"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1915M  100 1915M    0     0   112M      0  0:00:17  0:00:17 --:--:-- 50.2M


In [None]:
import shutil
shutil.unpack_archive("/content/dakshina_dataset_v1.0.tar",'/content/')

In [None]:
import re
from tqdm import tqdm
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from keras.models import Sequential,Model,load_model
from keras.layers import Dense,LSTM,GRU,SimpleRNN,Input,Dropout,TimeDistributed,RepeatVector,dot,BatchNormalization,concatenate,multiply,Activation
from keras.layers.embeddings import Embedding
from keras.layers import Layer
from keras.preprocessing import sequence
from keras.optimizers import Adam,Adadelta,Nadam,SGD
from keras.losses import SparseCategoricalCrossentropy

In [None]:
class BahdanauAttention(Layer):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = Dense(units)
    self.W2 = Dense(units)
    self.V = Dense(1)

  def call(self, query, values):
    query_with_time_axis = tf.expand_dims(query, 1)
    
    score = self.V(tf.nn.tanh(
        self.W1(query_with_time_axis) + self.W2(values)))
    
    attention_weights = tf.nn.softmax(score, axis=1)
    context_vector = tf.reduce_sum((attention_weights * values), axis=1)

    return context_vector, attention_weights

In [None]:
class Encoder(Model):
  def __init__(self,cell,vocab_size, embedding_dim, latent_dim, batch_size,initializer,dropouts):
    super(Encoder, self).__init__()
    self.cell = cell
    self.batch_size = batch_size
    self.latent_dim = latent_dim
    self.embedding = Embedding(vocab_size, embedding_dim)
    if cell == "gru":
        self.gru = GRU(latent_dim,return_sequences=True,return_state=True,recurrent_initializer = initializer,dropout=dropouts)
    elif cell == "lstm":
        self.lstm = LSTM(latent_dim,return_sequences=True,return_state=True,recurrent_initializer = initializer,dropout=dropouts)
    elif cell == "rnn":
        self.rnn = SimpleRNN(latent_dim,return_sequences=True,return_state=True,recurrent_initializer = initializer,dropout=dropouts)


  def call(self, x, hidden):
    x = self.embedding(x)
    if self.cell == "gru":
        output, state = self.gru(x, initial_state=hidden)
    elif self.cell == "lstm":
        output, state, state_c= self.lstm(x, initial_state=hidden)
    elif self.cell == "rnn":
        output, state = self.rnn(x, initial_state=hidden)
    return output, state

  def initialize_hidden_state(self):
      if self.cell == 'lstm':
          return [tf.zeros((self.batch_size, self.latent_dim)),tf.zeros((self.batch_size, self.latent_dim))]
      return tf.zeros((self.batch_size, self.latent_dim))

In [None]:
class Decoder(Model):
  def __init__(self, cell, vocab_size, embedding_dim, latent_dim, batch_size,initializer,dropouts):
    super(Decoder, self).__init__()
    self.cell = cell
    self.batch_size = batch_size
    self.attention = BahdanauAttention(latent_dim)
    self.embedding = Embedding(vocab_size, embedding_dim)
    self.dense = Dense(vocab_size)
    if cell == "gru":
        self.gru = GRU(latent_dim,return_sequences=True,return_state=True,recurrent_initializer = initializer,dropout=dropouts)
    elif cell == "lstm":
        self.lstm = LSTM(latent_dim,return_sequences=True,return_state=True,recurrent_initializer = initializer,dropout=dropouts)
    elif cell == "rnn":
        self.rnn = SimpleRNN(latent_dim,return_sequences=True,return_state=True,recurrent_initializer = initializer,dropout=dropouts)

  def call(self, x, hidden, enc_output):
    context_vector, attention_weights = self.attention(hidden, enc_output)

    x = self.embedding(x)
    x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
    if self.cell == "gru":
        output, state = self.gru(x)
    elif self.cell == "lstm":
        output, state,state_c = self.lstm(x)
    elif self.cell == "rnn":
        output, state = self.rnn(x)

    output = tf.reshape(output, (-1, output.shape[2]))
    x = self.dense(output)

    return x, state, attention_weights

In [None]:
class Attention:
    def __init__(self,cell,embedding_size,latent_dim,optimizer,dropouts,batch_size,epochs,initializer):
        self.cell = cell
        self.embedding_dim = embedding_size
        self.latent_dim = latent_dim
        self.BATCH_SIZE = batch_size
        self.epochs = epochs
        self.opt = optimizer
        self.dropouts=dropouts
        self.initializer=initializer

    @tf.function()    
    def train_step(self, inp, targ, enc_hidden):
        loss = 0
        with tf.GradientTape() as tape:
            enc_output, enc_hidden = self.encoder(inp, enc_hidden)
            dec_hidden = enc_hidden
            dec_input = tf.expand_dims([self.input_token_index['\t']] * self.BATCH_SIZE, 1)

            for t in range(1, targ.shape[1]):
                predictions, dec_hidden, _ = self.decoder(dec_input, dec_hidden, enc_output)
                loss += self.loss_function(targ[:, t], predictions)
                dec_input = tf.expand_dims(targ[:, t], 1)

        batch_loss = (loss / int(targ.shape[1]))
        variables = self.encoder.trainable_variables + self.decoder.trainable_variables + self.decoder.attention.trainable_variables
        gradients = tape.gradient(loss, variables)
        self.optimizer.apply_gradients(zip(gradients, variables))
        return batch_loss

    def get_data(self,path):
        d = pd.read_csv(path,sep="\t",header=None,error_bad_lines=False)
        d = d.dropna()

        decoder_target_data = np.zeros((d.shape[0],self.max_length_y,self.decoder_tokens), dtype="float32")

        for i,target_text in enumerate(d[0]):
            target_text = '\t'+target_text+'\n'
            for t, char in enumerate(target_text):
                if t > 0:
                    decoder_target_data[i, t - 1, self.target_token_index[char]] = 1.0
            decoder_target_data[i, t:, self.target_token_index["\n"]] = 1.0

        return ([[self.input_token_index[letter] for letter in list('\t'+word+'\n')] for word in d[1]]),\
                ([[self.target_token_index[letter] for letter in list('\t'+word+'\n')] for word in d[0]]),decoder_target_data

    def create_vocab(self,path):
        d = pd.read_csv(path,sep="\t",header=None,error_bad_lines=False)
        d = d.dropna()

        x = [list('\t'+word+'\n') for word in np.array(d[1])]
        y = [list('\t'+word+'\n') for word in np.array(d[0])]

        telugu_vocab = set()
        english_vocab = set()

        for word in x:
            for char in word:
                english_vocab.add(char)

        for word in y:
            for char in word:
                telugu_vocab.add(char)

        telugu_list = sorted(list(telugu_vocab))
        english_list = sorted(list(english_vocab))

        max_length_x = (np.max([len(i) for i in x]))
        max_length_y = (np.max([len(i) for i in y]))

        return telugu_list,english_list,max_length_x,max_length_y    

    def create_data(self):
        train_path = "/content/dakshina_dataset_v1.0/te/lexicons/te.translit.sampled.train.tsv"
        cv_path = "/content/dakshina_dataset_v1.0/te/lexicons/te.translit.sampled.dev.tsv"
        test_path = "/content/dakshina_dataset_v1.0/te/lexicons/te.translit.sampled.test.tsv"

        telugu_list,english_list,self.max_length_x,self.max_length_y = self.create_vocab(train_path)
        self.encoder_tokens = len(english_list)
        self.decoder_tokens = len(telugu_list)

        # Dict for char to index
        self.input_token_index = dict([(char, i) for i, char in enumerate(english_list)])
        self.target_token_index = dict([(char, i) for i, char in enumerate(telugu_list)])

        # Dict for index to char
        self.inv_input_token_index = dict({(value,key) for key,value in self.input_token_index.items()})
        self.inv_target_token_index = dict({(value,key) for key,value in self.target_token_index.items()})

        encoder_train,decoder_train,self.decoder_target_train = self.get_data(train_path)
        encoder_cv,decoder_cv,self.decoder_target_cv = self.get_data(cv_path)
        encoder_test,decoder_test,self.decoder_target_test = self.get_data(test_path)


        self.encoder_train = sequence.pad_sequences(encoder_train,maxlen=self.max_length_x,padding="post")
        self.decoder_train = sequence.pad_sequences(decoder_train,maxlen=self.max_length_y,padding="post")
        self.encoder_cv = sequence.pad_sequences(encoder_cv,maxlen=self.max_length_x,padding="post")
        self.decoder_cv = sequence.pad_sequences(decoder_cv,maxlen=self.max_length_y,padding="post")
        self.encoder_test = sequence.pad_sequences(encoder_test,maxlen=self.max_length_x,padding="post")
        self.decoder_test = sequence.pad_sequences(decoder_test,maxlen=self.max_length_y,padding="post")

        self.BUFFER_SIZE = len(self.encoder_train)        
        self.steps_per_epoch = len(self.encoder_train)//self.BATCH_SIZE

        self.dataset = tf.data.Dataset.from_tensor_slices((self.encoder_train, self.decoder_train)).shuffle(self.BUFFER_SIZE)
        self.dataset = self.dataset.batch(self.BATCH_SIZE, drop_remainder=True)


    def loss_function(self,real, pred):
        mask = tf.math.logical_not(tf.math.equal(real, 0))
        loss_ = self.loss_object(real, pred)
        loss_ *= tf.cast(mask, dtype=loss_.dtype)

        return tf.reduce_mean(loss_)

    def run(self):
        # Compile & run training
        if self.opt == "nadam":
            self.optimizer = Nadam()
        elif self.opt == "sgd":
            self.optimizer = SGD()
        elif self.opt == "adadelta":
            self.optimizer = Adadelta()
        else:
            self.optimizer = Adam()

        self.loss_object = SparseCategoricalCrossentropy(from_logits=True,reduction='none')
        
        self.encoder = Encoder(self.cell,self.encoder_tokens, self.embedding_dim, self.latent_dim, self.BATCH_SIZE, self.initializer,self.dropouts)
        self.decoder = Decoder(self.cell,self.decoder_tokens, self.embedding_dim, self.latent_dim, self.BATCH_SIZE, self.initializer,self.dropouts)

        for epoch in range(self.epochs):
            enc_hidden = self.encoder.initialize_hidden_state()
            total_loss = 0
            
            for (batch, (inp, targ)) in enumerate(self.dataset.take(self.steps_per_epoch)):
                batch_loss = self.train_step(inp, targ, enc_hidden)
                total_loss += batch_loss 

            print(f'Epoch {epoch+1} Loss {total_loss/self.steps_per_epoch:.4f}   ')        


    def evaluate(self,sentence_vect,attention=False):
        if attention:
            att_plot = np.zeros((self.max_length_y,self.max_length_x))
        inputs = tf.convert_to_tensor(sentence_vect)
        inputs = tf.expand_dims(inputs,0)
        result = ''
        if self.cell == "lstm":
            hidden = [tf.zeros((1, self.latent_dim)),tf.zeros((1, self.latent_dim))]
        else:
            hidden = [tf.zeros((1, self.latent_dim))]
        enc_out, enc_hidden = self.encoder(inputs, hidden)

        dec_hidden = enc_hidden
        dec_input = tf.expand_dims([self.target_token_index['\t']], 0)

        for t in range(self.max_length_y):
            predictions, dec_hidden, attention_weights = self.decoder(dec_input, dec_hidden, enc_out)
            
            if attention:
                att_plot[t] = (tf.reshape(attention_weights,(-1,))).numpy()

            predicted_id = tf.argmax(predictions[0]).numpy()

            if self.inv_target_token_index[predicted_id] != "\n":
                result += self.inv_target_token_index[predicted_id]
            else:
                if attention:
                    return result,att_plot
                return result

            dec_input = tf.expand_dims([predicted_id], 0) 

        if attention:
            return result,att_plot
        return result

    def percentage_of_correct_test_predictions(self):
        count = 0
        for i in range(len(self.decoder_test)):
            actual = ""
            for x in self.decoder_test[i][1:]:
                if self.inv_target_token_index[x]=="\n":
                    break
                actual += self.inv_target_token_index[x]

            pred = self.evaluate(self.encoder_test[i])
            if (actual==pred):
                count+=1
                
        return count/len(self.decoder_test)

    def percentage_of_correct_cv_predictions(self):
        count = 0
        for i in range(len(self.decoder_cv)):
            actual = ""
            for x in self.decoder_cv[i][1:]:
                if self.inv_target_token_index[x]=="\n":
                    break
                actual += self.inv_target_token_index[x]

            pred = self.evaluate(self.encoder_cv[i])
            if (actual==pred):
                count+=1

        return count/len(self.decoder_cv)


In [None]:
sweep_config={
    'method': 'random',
    'metric': {
        'name': 'accuracy',
        'goal': 'maximize'
    },
    'parameters':{
        'epochs':{
            'values':[3,5,6]
        },
        'embedding_size':{
            'values':[8,12,16,20]
        },
        'cell':{
            'values':["gru","lstm","rnn"]
        },
        'dropouts':{
            'values':[0,0.2,0.3]
        },
        'latent_dim':{
            'values':[16,32,64,128,256,512]
        },
        'batch_size':{
            'values':[32,64]
        },
        'optimizers':{
            'values':["nadam","adam","sgd","adadelta"]
        },
       'initializer':{
            'values':["orthogonal","glorot_uniform"]
        }
    }
}

In [None]:
!pip install --upgrade wandb
import wandb
!wandb login 3c967c63b099a3b2acd600aa30008e7de1ea6498

Collecting wandb
[?25l  Downloading https://files.pythonhosted.org/packages/98/5f/45439b4767334b868e1c8c35b1b0ba3747d8c21be77b79f09eed7aa3c72b/wandb-0.10.30-py2.py3-none-any.whl (1.8MB)
[K     |████████████████████████████████| 1.8MB 2.9MB/s 
[?25hCollecting subprocess32>=3.5.3
[?25l  Downloading https://files.pythonhosted.org/packages/32/c8/564be4d12629b912ea431f1a50eb8b3b9d00f1a0b1ceff17f266be190007/subprocess32-3.5.4.tar.gz (97kB)
[K     |████████████████████████████████| 102kB 8.4MB/s 
Collecting docker-pycreds>=0.4.0
  Downloading https://files.pythonhosted.org/packages/f5/e8/f6bd1eee09314e7e6dee49cbe2c5e22314ccdb38db16c9fc72d2fa80d054/docker_pycreds-0.4.0-py2.py3-none-any.whl
Collecting configparser>=3.8.1
  Downloading https://files.pythonhosted.org/packages/fd/01/ff260a18caaf4457eb028c96eeb405c4a230ca06c8ec9c1379f813caa52e/configparser-5.0.2-py3-none-any.whl
Collecting sentry-sdk>=0.4.0
[?25l  Downloading https://files.pythonhosted.org/packages/1c/4a/a54b254f67d8f4052338d

In [None]:
sweep_id = wandb.sweep(sweep_config,project="Assignment 3 Question 5", entity="adi00510")

Create sweep with ID: 8tvl6kpd
Sweep URL: https://wandb.ai/adi00510/Assignment%203%20Question%205/sweeps/8tvl6kpd


In [None]:
def train():
    config_defaults={
      'epochs':5,
      'embedding_size':16,
      'dropouts':0.3,
      'optimizers':"adam",
      'cell':"lstm",
      'latent_dim':512,
      'batch_size': 64,
      'initializer':"glorot_uniform"
       }
    
    wandb.init(config=config_defaults)
    config=wandb.config
    s2s=Attention(config.cell,config.embedding_size,config.latent_dim,config.optimizers,config.dropouts,config.batch_size,config.epochs,config.initializer)
    s2s.create_data()
    s2s.run()
    val_acc=s2s.percentage_of_correct_cv_predictions()
    wandb.log({'validation accuracy':val_acc})
    

In [None]:
wandb.agent(sweep_id,train)

[34m[1mwandb[0m: Agent Starting Run: vcgwyudy with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell: rnn
[34m[1mwandb[0m: 	dropouts: 0
[34m[1mwandb[0m: 	embedding_size: 12
[34m[1mwandb[0m: 	epochs: 6
[34m[1mwandb[0m: 	initializer: glorot_uniform
[34m[1mwandb[0m: 	latent_dim: 16
[34m[1mwandb[0m: 	optimizers: sgd
[34m[1mwandb[0m: Currently logged in as: [33madi00510[0m (use `wandb login --relogin` to force relogin)


Epoch 1 Loss 1.3792   
Epoch 2 Loss 1.2205   
Epoch 3 Loss 1.1529   
Epoch 4 Loss 1.1201   
Epoch 5 Loss 1.0837   
Epoch 6 Loss 1.0325   


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
validation accuracy,0.0
_runtime,882.0
_timestamp,1621518565.0
_step,0.0


0,1
validation accuracy,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: i01x7xnr with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell: gru
[34m[1mwandb[0m: 	dropouts: 0.2
[34m[1mwandb[0m: 	embedding_size: 12
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	initializer: glorot_uniform
[34m[1mwandb[0m: 	latent_dim: 16
[34m[1mwandb[0m: 	optimizers: adam


Epoch 1 Loss 1.1533   
Epoch 2 Loss 0.8730   
Epoch 3 Loss 0.6459   
Epoch 4 Loss 0.4716   
Epoch 5 Loss 0.3629   


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
validation accuracy,0.18652
_runtime,1113.0
_timestamp,1621519686.0
_step,0.0


0,1
validation accuracy,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: c9q1r21k with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell: gru
[34m[1mwandb[0m: 	dropouts: 0.3
[34m[1mwandb[0m: 	embedding_size: 12
[34m[1mwandb[0m: 	epochs: 6
[34m[1mwandb[0m: 	initializer: glorot_uniform
[34m[1mwandb[0m: 	latent_dim: 256
[34m[1mwandb[0m: 	optimizers: adam


Epoch 1 Loss 0.8128   
Epoch 2 Loss 0.2129   
Epoch 3 Loss 0.2286   
Epoch 4 Loss 0.1671   
Epoch 5 Loss 0.1490   
Epoch 6 Loss 0.1266   


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
validation accuracy,0.46138
_runtime,1245.0
_timestamp,1621520941.0
_step,0.0


0,1
validation accuracy,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: tc740d6o with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell: rnn
[34m[1mwandb[0m: 	dropouts: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	epochs: 6
[34m[1mwandb[0m: 	initializer: orthogonal
[34m[1mwandb[0m: 	latent_dim: 32
[34m[1mwandb[0m: 	optimizers: sgd


Epoch 1 Loss 1.3231   
Epoch 2 Loss 1.1619   
Epoch 3 Loss 1.0571   
Epoch 4 Loss 0.8961   
Epoch 5 Loss 0.6563   
Epoch 6 Loss 1.1078   


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
validation accuracy,0.0
_runtime,774.0
_timestamp,1621521724.0
_step,0.0


0,1
validation accuracy,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: bnz5yrvh with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell: lstm
[34m[1mwandb[0m: 	dropouts: 0
[34m[1mwandb[0m: 	embedding_size: 12
[34m[1mwandb[0m: 	epochs: 6
[34m[1mwandb[0m: 	initializer: glorot_uniform
[34m[1mwandb[0m: 	latent_dim: 256
[34m[1mwandb[0m: 	optimizers: sgd


Epoch 1 Loss 1.3908   
Epoch 2 Loss 1.2841   
Epoch 3 Loss 1.1405   
Epoch 4 Loss 1.0847   
Epoch 5 Loss 1.0159   
Epoch 6 Loss 0.8925   


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
validation accuracy,0.00018
_runtime,967.0
_timestamp,1621522699.0
_step,0.0


0,1
validation accuracy,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: cwc5adez with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell: gru
[34m[1mwandb[0m: 	dropouts: 0.2
[34m[1mwandb[0m: 	embedding_size: 12
[34m[1mwandb[0m: 	epochs: 6
[34m[1mwandb[0m: 	initializer: orthogonal
[34m[1mwandb[0m: 	latent_dim: 512
[34m[1mwandb[0m: 	optimizers: sgd


Epoch 1 Loss 1.3911   
Epoch 2 Loss 1.2815   
Epoch 3 Loss 1.1579   
Epoch 4 Loss 1.1243   
Epoch 5 Loss 1.0730   
Epoch 6 Loss 0.9812   


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
validation accuracy,0.0
_runtime,1325.0
_timestamp,1621524033.0
_step,0.0


0,1
validation accuracy,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: 2mhl3mml with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell: gru
[34m[1mwandb[0m: 	dropouts: 0.2
[34m[1mwandb[0m: 	embedding_size: 8
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	initializer: orthogonal
[34m[1mwandb[0m: 	latent_dim: 256
[34m[1mwandb[0m: 	optimizers: nadam


Epoch 1 Loss 0.7813   
Epoch 2 Loss 0.1944   
Epoch 3 Loss 0.1497   


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
validation accuracy,0.46753
_runtime,985.0
_timestamp,1621525028.0
_step,0.0


0,1
validation accuracy,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: 3pvcrlmu with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell: rnn
[34m[1mwandb[0m: 	dropouts: 0.2
[34m[1mwandb[0m: 	embedding_size: 8
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	initializer: orthogonal
[34m[1mwandb[0m: 	latent_dim: 32
[34m[1mwandb[0m: 	optimizers: sgd


Epoch 1 Loss 1.2731   
Epoch 2 Loss 1.0460   
Epoch 3 Loss 0.8152   
Epoch 4 Loss 0.6688   
Epoch 5 Loss 0.5587   


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
validation accuracy,0.16646
_runtime,971.0
_timestamp,1621526010.0
_step,0.0


0,1
validation accuracy,▁
_runtime,▁
_timestamp,▁
_step,▁


[34m[1mwandb[0m: Agent Starting Run: z8m83ulg with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell: gru
[34m[1mwandb[0m: 	dropouts: 0
[34m[1mwandb[0m: 	embedding_size: 12
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	initializer: orthogonal
[34m[1mwandb[0m: 	latent_dim: 64
[34m[1mwandb[0m: 	optimizers: nadam


Epoch 1 Loss 1.1383   
Epoch 2 Loss 0.9415   
Epoch 3 Loss 0.4830   
Epoch 4 Loss 0.2787   
Epoch 5 Loss 0.2265   


# 5B) Calculating test word level accuracy for the best model from the sweep

In [None]:
s2s = Attention("gru",8,256,"nadam",0.2,32,5,"orthogonal")
s2s.create_data()
s2s.run()

Epoch 1 Loss 0.7449   
Epoch 2 Loss 0.2069   
Epoch 3 Loss 0.1652   
Epoch 4 Loss 0.1413   
Epoch 5 Loss 0.1396   


In [None]:
cv_acc = s2s.percentage_of_correct_cv_predictions()

print("Cross Validation Word Accuracy is",cv_acc*100)

Cross Validation Word Accuracy is 43.85007918352983


In [None]:
test_acc = s2s.percentage_of_correct_test_predictions()

print("Test Word Accuracy is",test_acc*100)

Test Word Accuracy is 46.05881329389246


In [None]:
import pandas as  pd
pred_l,actual_l,input_l = [],[],[]

for i in range(len(s2s.decoder_test)):
  input = ""
  actual = ""
  for x in s2s.encoder_test[i][1:]:
    if s2s.inv_input_token_index[x]=="\n":
      break
    input += s2s.inv_input_token_index[x]

  for x in s2s.decoder_test[i][1:]:
    if s2s.inv_target_token_index[x]=="\n":
      break
    actual += s2s.inv_target_token_index[x]

  pred = s2s.evaluate(s2s.encoder_test[i])

  input_l.append(input)
  actual_l.append(actual)
  pred_l.append(pred)  

input_l = pd.Series(input_l)
actual_l = pd.Series(actual_l)
pred_l = pd.Series(pred_l)

df = pd.concat((input_l,actual_l,pred_l),axis=1)
df.columns = ["Actual_Input","Actual_Output","Predicted_Output"]
df.to_csv("predictions_attention.csv",index=False)       

# 5D) Plotting Attention plots.

In [None]:
!pip install --upgrade wandb
import wandb
!wandb login 3c967c63b099a3b2acd600aa30008e7de1ea6498
from wandb.keras import WandbCallback

In [None]:
wandb.init(project='Assignment 3 Question 5',entity='adi00510')

[34m[1mwandb[0m: Currently logged in as: [33madi00510[0m (use `wandb login --relogin` to force relogin)


In [None]:
def translate(sent,attention=False,print_out=True,print_plot=False):
  sent = "\t"+sent+"\n"
  sent_vec = [s2s.input_token_index[i] for i in sent]
  sent_vec = sequence.pad_sequences([sent_vec],maxlen=s2s.max_length_x,padding="post")

  if attention:
      pred,attention = s2s.evaluate(sent_vec[0],True)
      if print_out:
        print("Input:",sent)
        print("Output:",pred)
      
      if print_plot:
        attention_plot(attention[:len(pred),:len(sent)],sent,pred)

      return attention[:len(pred),:len(sent)],pred
      
  else:
      pred = s2s.evaluate(sent_vec[0],False)
      print("Input:",sent)
      print("Output:",pred)

In [None]:
import seaborn as sb
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties as fontp

font_new =  fontp(fname='telugu.ttf')
font_dict = {'fontsize':10}

def attention_plot(attention,actual,pred):
    fig = plt.figure(figsize=(5,5))
    ax = fig.add_subplot(1, 1, 1)
    img = sb.heatmap(attention,cbar=False)
    ax.set_xticklabels(['start']+list(actual[1:-1])+['end'])
    ax.set_yticklabels(list(pred),fontdict=font_dict,fontproperties=font_new)
    fig.savefig("ex.png")
    temp = plt.imread("ex.png")
    plot.append(temp)
    plt.show()


In [None]:
import random

idxs = [i for i in range(0,5747)]
random.shuffle(idxs)
idxs = idxs[:9]
plot = []
for i in idxs:
  input = ""
  for x in s2s.encoder_test[i][1:]:
    if s2s.inv_input_token_index[x]=="\n":
      break
    input += s2s.inv_input_token_index[x]

  att,pred = translate(input,attention=True,print_out=False,print_plot=True)


wandb.log({"Question 5d": [wandb.Image(img) for img in plot]})

**Best Model**

In [None]:
s2s = Attention("gru",8,256,"nadam",0.2,32,5,"orthogonal")
s2s.create_data()
s2s.run()

Epoch 1 Loss 0.8522   
Epoch 2 Loss 0.2910   
Epoch 3 Loss 0.2299   
Epoch 4 Loss 0.1839   
Epoch 5 Loss 0.1535   


In [None]:
cv_acc = s2s.percentage_of_correct_cv_predictions()

print("Cross Validation Word Accuracy is",cv_acc*100)

Cross Validation Word Accuracy is 43.85007918352983


In [None]:
test_acc = s2s.percentage_of_correct_test_predictions()

print("Test Word Accuracy is",test_acc*100)

Test Word Accuracy is 46.05881329389246


# Type a word to translate to telugu

In [None]:
input_text = 'angeekaaram' #@param {type:"string"}
s2s.translate(input_text)

Input: angeekaaram
Output: అంగీకారం


In [None]:
from IPython.display import HTML as html_print
from IPython.display import display

In [None]:
def cstr(s,flag=False, color='black'):
  if flag: 
    return "<text style=color:#000;background-color:{};font-size:31px>{} </text>".format(color, s)
    
  return "<text style=color:#000;background-color:{};font-size:30px>{} </text>".format(color, s)
	
# print html
def print_color(t):
	display(html_print(''.join([cstr(ti, color=ci) if ci != '#f42e2e' else cstr(ti,True,color=ci) for ti,ci in t])))

# get appropriate color for value
def get_clr(value):
	colors = ['#85c2e1', '#89c4e2', '#95cae5', '#99cce6', '#a1d0e8',
		'#b2d9ec', '#baddee', '#c2e1f0', '#eff7fb', '#f9e8e8',
		'#f9e8e8', '#f9d4d4', '#f9bdbd', '#f8a8a8', '#f68f8f',
		'#f47676', '#f45f5f', '#f34343', '#f33b3b', '#f42e2e']
	value = int((value * 100) / 5)
	return colors[value]

In [None]:
def visualize(input, attention_matrix, target_char_index):
  text_colours = []

  for i in range(len(input)+2):
    if i == 0:
      text = ("/start/",get_clr(attention_matrix[target_char_index][i]))
    elif i == len(input)+1:
      text = ("/end/",get_clr(attention_matrix[target_char_index][i]))
    else:
      text = (input[i-1], get_clr(attention_matrix[target_char_index][i]))
    
    text_colours.append(text)
  print_color(text_colours)

def visualize_out(input,idx):
  text_colours = []
  for i in range(len(input)):
    
    if i==idx:
      text = (input[i],'#f42e2e')
    else:
      text = (input[i],'#85c2e1')
    text_colours.append(text)
  print_color(text_colours)


In [None]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

# Type a word to get visualization of connectivity

In [None]:
input_word = 'aditya' #@param {type:"string"}
attention_matrix,pred = translate(input_word,attention=True,print_out=False)

def get_color_code(idx):
    print()
    visualize(input_word,attention_matrix,idx)
    print()
    print()
    visualize_out(pred,idx)

  
interact(get_color_code, idx=(0,len(pred)-1));

interactive(children=(IntSlider(value=2, description='idx', max=5), Output()), _dom_classes=('widget-interact'…

In [None]:
word = 'angeekarinchaadu' #@param {type:"string"}
attention_matrix,pred = translate(word,attention=True,print_out=False)

def get_color_code(idx):
    visualize(word,attention_matrix,idx)
    print()
    print()
    visualize_out(pred,idx)

  
interact(get_color_code, idx=(0,len(pred)-1));