## Installing Faker to generate dates

In [2]:
!pip install Faker 

## Importing necessary modules

In [4]:
from tensorflow.keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from tensorflow.keras.layers import RepeatVector, Dense, Activation, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model, Model
import tensorflow.keras.backend as K
import numpy as np

from faker import Faker
import random
from tqdm import tqdm
from babel.dates import format_date

import matplotlib.pyplot as plt
%matplotlib inline

## Defining some functions for data generating and preprocessing

In [88]:
#loading faker to generate fake dataset
fake = Faker()
Faker.seed(1655)
random.seed(1655)
# Defining format of the data we would like to generate
formats = ['short','medium','long', 'full','full','full','full','full','full','full','full',
           'full','full','d MMM YYY', 'd MMMM YYY','dd MMM YYY','dd MMM, YYY','d MMMM, YYY',
           'dd, MMM YYY','d MM YY','d MMMM YYY','MMMM d YYY','MMMM d, YYY','dd.MM.YY']


def generate_date():
    """
        Loads some fake dates 
    """
    date_obj = fake.date_object()
    try:
        human_readable = format_date(date_obj, format=random.choice(formats),  locale='en_US')
        human_readable = human_readable.lower()
        human_readable = human_readable.replace(',','')
        machine_readable = date_obj.isoformat()
    except AttributeError as e:
        return None, None, None
    return human_readable, machine_readable, date_obj



def load_dataset(m):
    """
    m: the number of examples to generate
    """
    input_vocab = set()
    output_vocab = set()
    dataset = []
    Tx = 30  #maximum input date length
    for i in tqdm(range(m)):  #tqdm is used to visualize progress bar through iteration
        input_date, output_date, _ = generate_date()
        if input_date is not None:
            dataset.append((input_date, output_date))
            input_vocab.update(tuple(input_date))
            output_vocab.update(tuple(output_date))
    input_vocab = dict(zip(sorted(input_vocab) + ['<unk>', '<pad>'], 
                     list(range(len(input_vocab) + 2))))
    inv_output_vocab = dict(enumerate(sorted(output_vocab)))
    output_vocab = {v:k for k,v in inv_output_vocab.items()}
    return dataset, input_vocab, output_vocab, inv_output_vocab



def string_to_int(string, length, vocab):
    """
    string -- input string, e.g. 'Wed 10 Jul 2007'
    length -- the number of time steps you'd like, determines if the output will be padded or cut
    vocab -- vocabulary, dictionary used to index every character of your "string"
    """
    string = string.lower()
    string = string.replace(',','')
    if len(string) > length:
        string = string[:length]
    rep = list(map(lambda x: vocab.get(x, "<unk>"), string))
    if len(string) < length:
        rep = rep + [vocab['<pad>']] * (length - len(string))
    return rep


def int_to_string(ints, inv_vocab):
    """
    ints -- list of integers representing indexes in the vocabulary
    inv_vocab -- dictionary mapping indexes to characters 
    """
    l = [inv_vocab[i] for i in ints]
    return l



def preprocess_data(dataset, input_vocab, output_vocab, Tx, Ty):
    X, Y = zip(*dataset)
    X = np.array([string_to_int(string, Tx, input_vocab) for string in X])
    Y = np.array([string_to_int(string, Ty, output_vocab) for string in Y])
    Xoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(input_vocab)), X)))
    Yoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(output_vocab)), Y)))
    return X, Y, Xoh, Yoh

## Loading dataset

In [129]:
Tx = 30 #maximum input date length
Ty = 10 #maximum output date length (0 to 9 and -)
m = 20000 #training data size
dataset, input_vocab, output_vocab, inv_output_vocab = load_dataset(m)
X, Y, Xoh, Yoh = preprocess_data(dataset, input_vocab, output_vocab, Tx, Ty)

print("X.shape:", X.shape)
print("Y.shape:", Y.shape)
print("Xoh.shape:", Xoh.shape)
print("Yoh.shape:", Yoh.shape)
print("Some samples:")
dataset[:5]

## Custom softmax function that calculates softmax across defined axis

In [130]:
def softmax(x, axis=1):  #axis 1 is time axis
    num_dim = K.ndim(x)
    if num_dim == 2:
        return K.softmax(x)
    elif num_dim > 2:
        e = K.exp(x - K.max(x, axis=axis, keepdims=True)) #max is subtracted for numerical stability.
        #this is subtuction is applied in both numerator and denominator and they cancel out each other.
        s = K.sum(e, axis=axis, keepdims=True)
        return e/s
    else:
        raise ValueError('Cannot apply softmax to a tensor that is 1D')

## Global layers used in attention calculation

In [131]:
'''Global layers used in attention calculation'''
repeatVecLayer = RepeatVector(Tx)
concatLayer = Concatenate(axis=-1)
denseLayer1 = Dense(10, activation = "tanh")
denseLayer2 = Dense(1, activation = "relu")
activationLayer = Activation(softmax, name='attention_weights') # We are using a custom softmax(axis = 1)
dotLayer = Dot(axes = 1)

## Function to calculate attention for each decoder time stamp

In [132]:
def one_step_attention(h, s_prev):
    """
    h --> hidden states of encoder of shape (m, Tx, 2*h_dim)
    s_prev --> previous hidden state of decoder of shape (m, s_dim)
    """
    # Repeator repeats s_prev to be of shape (m, Tx, s_dim) 
    s_prev = repeatVecLayer(s_prev)  
    concat = concatLayer([h, s_prev])  #concat (m, Tx, s_dim + 2*h_dim)
    e = denseLayer1(concat)  #e (m, Tx, 10) 
    e = denseLayer2(e)  #e (m, Tx, 1)
    alphas = activationLayer(e)  #alphas (m, Tx, 1)
    context = dotLayer([alphas, h])   #context (m, 1, 2*h_dim) 
    return context

## Global layers used in decoder section

In [133]:
'''Global layers used in decoder section'''
h_dim = 32
s_dim = 64
decoder_LSTM = LSTM(s_dim, return_state = True)
output_layer = Dense(len(output_vocab), activation=softmax)

## Defining model

In [134]:
def model(Tx, Ty, h_dim, s_dim, input_vocab_size, output_vocab_size):
    
    X = Input(shape=(Tx, input_vocab_size))
    s0 = Input(shape=(s_dim,), name='s0')
    c0 = Input(shape=(s_dim,), name='c0')
    s = s0  #s (m, s_dim)
    c = c0
    outputs = []
    h = Bidirectional(LSTM(h_dim, return_sequences=True),input_shape=(Tx, h_dim))(X)  
    # Iterate for Ty steps
    for t in range(Ty):
        context = one_step_attention(h, s)
        s, _, c = decoder_LSTM(context, initial_state = [s, c]) 
        output = output_layer(s) #output (m, output_vocab_size)
        outputs.append(output) #outputs final dims (Ty, m, output_vocab_size)
        
    model = Model(inputs=[X, s0, c0], outputs=outputs)
    return model

## Creating model

In [135]:
model = model(Tx, Ty, h_dim, s_dim, len(input_vocab), len(output_vocab))
optimizer = Adam(lr=0.005, beta_1=0.9, beta_2=0.999, decay=0.01) 
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()

## Training model

In [136]:
s0 = np.zeros((m, s_dim))
c0 = np.zeros((m, s_dim))
#since the shape of model output (Ty, m, output_vocab_size), we need to swap Yoh axes to be alined with model output
#Yoh has shape (m, Ty, output_vocab_size). we just swap 1st and 2nd axes
Yoh_swapped = list(Yoh.swapaxes(0,1))

In [137]:
model.fit([Xoh, s0, c0], Yoh_swapped, epochs=300, batch_size=200)

## Evaluating model

In [138]:
m = 5000 #testing set size
test_dataset, _, _, _ = load_dataset(m)
_, _, Xoh_test, Yoh_test = preprocess_data(test_dataset, input_vocab, output_vocab, Tx, Ty)
s0 = np.zeros((m, s_dim))
c0 = np.zeros((m, s_dim))
Yoh_test = list(Yoh_test.swapaxes(0,1))
pred_test = model.predict([Xoh_test, s0, c0])
evals = model.evaluate([Xoh_test, s0, c0], Yoh_test)
print("accuracy: ", np.mean(evals[11:]))

## Formatting custom date

In [139]:
s0 = np.zeros((1, s_dim))
c0 = np.zeros((1, s_dim))
test_date = "2 May 1979"
date = string_to_int(test_date, Tx, input_vocab)
date = np.array(date)
date = np.expand_dims(date, axis=0)
date = np.array(list(map(lambda x: to_categorical(x, num_classes=len(input_vocab)), date)))
pred = model.predict([date, s0, c0])
pred = np.array(pred)
pred = np.argmax(pred, axis = -1)
output = [inv_output_vocab[int(i)] for i in pred]

print("output:", ''.join(output))