###Download the Dataset

In [1]:
!!curl -O http://www.manythings.org/anki/ara-eng.zip
!!unzip ara-eng.zip

['Archive:  ara-eng.zip',
 '  inflating: _about.txt              ',
 '  inflating: ara.txt                 ']

###Import the Libraries

In [2]:
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu
import numpy as np
import string
from string import digits
import matplotlib.pyplot as plt
%matplotlib inline
import re
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from keras.layers import Input, LSTM, Embedding, Dense,Dropout
from keras.models import Model
from numpy import array
from numpy import asarray
from numpy import zeros
import keras
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import corpus_bleu

###Dataset Importing and Visualization

In [3]:
# Path to the data txt file on disk.
data_path = "ara.txt"
# open the file eng-spa.txt and read
lines= pd.read_table(data_path,  names =['source', 'target', 'comments'])
#printing sample data from lines
lines.sample(6)

Unnamed: 0,source,target,comments
6879,A dolphin is a kind of mammal.,الدلفين نوع من الثدييات.,CC-BY 2.0 (France) Attribution: tatoeba.org #6...
10702,"Though it was cold, he didn't light the fire.",لم يشعل النار مع أن الجو كان بارداً.,CC-BY 2.0 (France) Attribution: tatoeba.org #2...
6694,I'm not telling you anything.,أنا لا أخبرك أي شئ.,CC-BY 2.0 (France) Attribution: tatoeba.org #1...
10634,He extorted a large amount of money from her.,لقد ابتزّها بمبلغ كبير من المال.,CC-BY 2.0 (France) Attribution: tatoeba.org #3...
4264,I was made to go there.,أُجبرتُ على الذهاب هناك.,CC-BY 2.0 (France) Attribution: tatoeba.org #2...
9034,She put down her thoughts on paper.,دونت ما تفكر به على الورق.,CC-BY 2.0 (France) Attribution: tatoeba.org #3...


In [4]:
lines.shape

(11668, 3)

In [5]:
lines.head(3)

Unnamed: 0,source,target,comments
0,Hi.,مرحبًا.,CC-BY 2.0 (France) Attribution: tatoeba.org #5...
1,Run!,اركض!,CC-BY 2.0 (France) Attribution: tatoeba.org #9...
2,Help!,النجدة!,CC-BY 2.0 (France) Attribution: tatoeba.org #4...


In [6]:
lines[11665:11668]

Unnamed: 0,source,target,comments
11665,A man touched down on the moon. A wall came do...,هبط إنسان على سطح القمر، وأنهار حائط في برلين،...,CC-BY 2.0 (France) Attribution: tatoeba.org #3...
11666,"Ladies and gentlemen, please stand for the nat...",سيداتي و سادتي ، رجاءً قفوا للنشيد الوطني للات...,CC-BY 2.0 (France) Attribution: tatoeba.org #3...
11667,There are mothers and fathers who will lie awa...,وهناك أمهات وآباء سيظلون مستيقظين بعد أن ينام ...,CC-BY 2.0 (France) Attribution: tatoeba.org #3...


In [7]:
lines.source[11665]

'A man touched down on the moon. A wall came down in Berlin. A world was connected by our own science and imagination.'

In [8]:
lines.target[11665]

'هبط إنسان على سطح القمر، وأنهار حائط في برلين، و عالم ترابطت أجزاؤه بعلمنا وخيالنا.'

In [9]:
lines.source[11666]

'Ladies and gentlemen, please stand for the national anthem of the Russian Federation performed by the Sretensky Monastery Choir.'

In [10]:
lines.target[11666]

'سيداتي و سادتي ، رجاءً قفوا للنشيد الوطني للاتحاد الروسي باداء جوقة دير سرتينسكي .'

In [11]:
lines.source[11667]

"There are mothers and fathers who will lie awake after the children fall asleep and wonder how they'll make the mortgage, or pay their doctor's bills, or save enough for their child's college education."

In [12]:
lines.target[11667]

'وهناك أمهات وآباء سيظلون مستيقظين بعد أن ينام أطفالهم، يتساءلون عن كيف سيسددون أقساط الرهن العقاري الذي اشترَوْ به بيتهم، وكيف سيدفعون فواتير أطبائهم، أو توفير ما يحتاجونه من مال لتسديد رسوم تسجيل أبنائهم في الجامعات.'

In [13]:
len(lines)

11668

###Dataset Cleaning

In [14]:
# convert source and target text to Lowercase 
lines.source=lines.source.apply(lambda x: x.lower())
lines.target=lines.target.apply(lambda x: x.lower())
# Remove quotes from source and target text
lines.source=lines.source.apply(lambda x: re.sub("'", '', x))
lines.target=lines.target.apply(lambda x: re.sub("'", '', x))
# create a set of all special characters
special_characters= set(string.punctuation)
# Remove all the special characters
lines.source = lines.source.apply(lambda x: ''.join(char1 for char1 in x if char1 not in special_characters))
lines.target = lines.target.apply(lambda x: ''.join(char1 for char1 in x if char1 not in special_characters))
# Remove digits from source and target sentences
num_digits= str.maketrans('','', digits)
lines.source=lines.source.apply(lambda x: x.translate(num_digits))
lines.target= lines.target.apply(lambda x: x.translate(num_digits))
# Remove extra spaces
lines.source=lines.source.apply(lambda x: x.strip())
lines.target=lines.target.apply(lambda x: x.strip())
lines.source=lines.source.apply(lambda x: re.sub(" +", " ", x))
lines.target=lines.target.apply(lambda x: re.sub(" +", " ", x))
lines.source=lines.source.apply(lambda x: re.sub("[^-9A-Za-z ]", "" , x))

In [15]:
def clean_text_english(text):
    '''Clean text by removing unnecessary characters and altering the format of words.'''

    text = text.lower()
    
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"it's", "it is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "that is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"how's", "how is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"n't", " not", text)
    text = re.sub(r"n'", "ng", text)
    text = re.sub(r"'bout", "about", text)
    text = re.sub(r"'til", "until", text)
    text = re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", text)
    
    return text
lines.source=lines.source.apply(clean_text_english)
lines.source=lines.source.apply(lambda x: re.sub("[.?#@%^&*()@!;:'\/!*]", "", x))

In [16]:
lines.target=lines.target.apply(lambda x: re.sub("[٠١٢٣٤٥٦٧٨٩۱۹۰]", "", x))
lines.target=lines.target.apply(lambda x: re.sub("[ـ،؛؟٫٬٠]", "", x))
lines.target=lines.target.apply(lambda x: re.sub("[abcdefghijklmnopqrstuvwxyz]", "", x))
# adapted from https://github.com/bakrianoo/aravec
# function to clean and normalize text 
def clean_text(text):
    search = ["أ","إ","آ","ة","_","-","/",".","،"," و "," يا ",'"',"ـ","'","ى","\\",'\n', '\t','&quot;','?','؟','!']
    replace = ["ا","ا","ا","ه"," "," ","","",""," و"," يا","","","","ي","",' ', ' ',' ',' ? ',' ؟ ',' ! ']  
    p_tashkeel = re.compile(r'[\u0617-\u061A\u064B-\u0652]')
    text = re.sub(p_tashkeel,"", text)
    p_longation = re.compile(r'(.)\1+')
    subst = r"\1\1"
    text = re.sub(p_longation, subst, text)
    text = text.replace('وو', 'و')
    text = text.replace('يي', 'ي')
    text = text.replace('اا', 'ا')
    
    for i in range(0, len(search)):
        text = text.replace(search[i], replace[i])
        
    text = text.strip()
    
    return text
lines.target=lines.target.apply(clean_text)

In [17]:
def remove_diacritics(text):
    arabic_diacritics = re.compile(""" ّ    | # Tashdid
                             َ    | # Fatha
                             ً    | # Tanwin Fath
                             ُ    | # Damma
                             ٌ    | # Tanwin Damm
                             ِ    | # Kasra
                             ٍ    | # Tanwin Kasr
                             ْ    | # Sukun
                             ـ     # Tatwil/Kashida
                         """, re.VERBOSE)
    text = re.sub(arabic_diacritics, '', str(text))
    return text

In [18]:
lines.target=lines.target.apply(remove_diacritics)

In [19]:
# Add start and end tokens to target sequences
lines.target = lines.target.apply(lambda x : 'START_ '+ x + ' _END')
lines.sample(6)

Unnamed: 0,source,target,comments
9742,the question is where to buy the book,START_ المشكله هي اين يمكن ان يشتري الكتاب _END,CC-BY 2.0 (France) Attribution: tatoeba.org #3...
8836,tom will probably not believe you,START_ من المحتمل ان توم لن يصدقك _END,CC-BY 2.0 (France) Attribution: tatoeba.org #9...
11003,the band upped the number of shows in their tour,START_ زادت الفرقه الموسيقيه عدد الحفلات في جو...,CC-BY 2.0 (France) Attribution: tatoeba.org #3...
4646,i have the ace of clubs,START_ لدي الاس الكوبا _END,CC-BY 2.0 (France) Attribution: tatoeba.org #2...
1675,she was promoted,START_ تم ترقيتها _END,CC-BY 2.0 (France) Attribution: tatoeba.org #3...
7526,my mother plays the piano well,START_ امي تعزف البيانو بمهاره _END,CC-BY 2.0 (France) Attribution: tatoeba.org #2...


###Tokenization

In [20]:
# Find all the source and target words and sort them
# Vocabulary of Source language
all_source_words=set()
for source in lines.source:
    for word in source.split():
        if word not in all_source_words:
            all_source_words.add(word)
# Vocabulary of Target 
all_target_words=set()
for target in lines.target:
    for word in target.split():
        if word not in all_target_words:
            all_target_words.add(word)
# sort all unique source and target words
source_words= sorted(list(all_source_words))
target_words=sorted(list(all_target_words))

In [21]:
for i in source_words:
   print(i)

a
abandon
ability
able
aboard
abomination
abortion
about
above
abroad
absence
absent
absolute
absolutely
absorb
abuse
abused
abusing
accelerated
accent
accept
accepted
accepting
accident
accidents
accompanies
accompany
accomplished
account
accurate
accused
accustomed
ace
aces
ache
achieved
achilles
acknowledge
acquaintance
acquainted
acquisition
across
act
acting
action
actions
active
actor
actress
acts
actually
adapted
add
added
addicted
address
addressed
adjust
adjusting
admire
admit
adopt
ads
adults
advance
advances
advantage
adventure
adversity
advertisements
advice
advise
advised
affair
affect
affected
afford
afraid
africa
after
afterlife
afternoon
again
against
age
agency
ago
agree
agreed
agrees
ahead
aid
aim
aimed
aint
air
airmail
airplane
airport
alarm
albert
alcohol
alert
alexandria
alike
alive
all
allergic
allergy
alley
allow
allowance
allowed
almost
alone
along
aloud
already
also
alter
alternative
although
always
am
amazes
amazing
ambassador
ambition
ambulance
america
americ

In [22]:
for i in target_words:
   print(i)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
خيبت
خيبه
خير
دائره
دائم
دائما
داخل
داخلا
داخلها
داخليه
دار
داروين
داعما
داعمات
داعي
دافئ
دافئا
دافع
دافعوا
دامت
داوم
دبلوماسي
دجاج
دخل
دخلت
دخله
دراجتك
دراجته
دراجتي
دراجه
دراسته
دراسه
درجات
درجتك
درجه
درس
درست
درسني
درسوا
دروس
دع
دعا
دعاني
دعنا
دعني
دعه
دعوت
دعوتك
دعوته
دعونا
دعوه
دعوي
دعيت
دعينا
دفئ
دفتر
دفترا
دفتري
دفع
دفعت
دفعته
دفعه
دفنت
دقائق
دقيق
دقيقه
دقيقيه
دكتورا
دليل
دليلا
دم
دما
دمج
دمر
دمرت
دمشق
دمك
دمه
دموعه
دمي
دميه
دهست
دهسي
دهنت
دهني
دواء
دور
دورا
دورك
دوري
دولار
دولارا
دولارات
دوله
دوما
دون
دونت
دونك
دونها
دير
ديسيمبر
دين
ديوننا
ديونه
ديوني
ذئب
ذا
ذات
ذاته
ذاتها
ذاك
ذاكرت
ذاكرته
ذاكره
ذاهب
ذاهبا
ذاهبه
ذاهبون
ذبلت
ذراعك
ذراعه
ذراعي
ذروتها
ذقت
ذكاءه
ذكرتني
ذكرني
ذكي
ذكيه
ذلك
ذنبي
ذهابي
ذهب
ذهبت
ذهبنا
ذهبوا
ذهنك
ذودتني
ذوقيه
ذيل
رؤيتك
رؤيته
رؤيه
رئيس
رئيسا
رئيسنا
رئيسه
رئيسي
رائحته
رائحه
رائد
رائع
رائعه
رات
راتبه
راتها
راجع
راحتك
راحه
راس
راسك
راسلت
راسه
راسها
راسي
راضيا
راغبا
راقب
راكبا
راكضا
رامي
راهبا
ر

In [23]:
#Find maximum sentence length in  the source and target data
source_length_list=[]
for l in lines.source:
    source_length_list.append(len(l.split(' ')))
max_source_length= max(source_length_list)
print(" Max length of the source sentence",max_source_length)
target_length_list=[]
for l in lines.target:
    target_length_list.append(len(l.split(' ')))
max_target_length= max(target_length_list)
print(" Max length of the target sentence",max_target_length)

 Max length of the source sentence 34
 Max length of the target sentence 38


In [24]:
# creating a word to index(word2idx) for source and target
source_word2idx= dict([(word, i+1) for i,word in enumerate(source_words)])
target_word2idx=dict([(word, i+1) for i, word in enumerate(target_words)])

In [25]:
#creating a dictionary for index to word for source and target vocabulary
source_idx2word= dict([(i, word) for word, i in  source_word2idx.items()])
print(source_idx2word)
target_idx2word =dict([(i, word) for word, i in target_word2idx.items()])



###Glove Embedding

In [39]:
#retreived from https://www.kaggle.com/thanakomsn/glove6b300dtxt
MAX_SENTENCE_LENGTH = 35
MAX_NUM_WORDS = 10000
EMBEDDING_SIZE = 300

In [40]:
embeddings_dictionary = dict()

glove_file = open(r'/content/drive/MyDrive/glove.6B.300d.txt', encoding="utf8")

for line in glove_file:
    records = line.split()
    word = records[0]
    vector_dimensions = asarray(records[1:], dtype='float32')
    embeddings_dictionary[word] = vector_dimensions
glove_file.close()

In [41]:
num_words = min(MAX_NUM_WORDS, len(source_word2idx) + 1)
embedding_matrix = zeros((num_words, EMBEDDING_SIZE))
for word, index in source_word2idx.items():
    embedding_vector = embeddings_dictionary.get(word)
    if embedding_vector is not None:
        embedding_matrix[index] = embedding_vector

In [42]:
embedding_layer = Embedding(num_words, EMBEDDING_SIZE, weights=[embedding_matrix], input_length=max_source_length)

###Train Test Split

In [43]:
#Shuffle the data
lines = shuffle(lines)

In [44]:
# Train - Test Split
X, y = lines.source, lines.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
X_train.shape, X_test.shape

((9334,), (2334,))

##Padding

In [45]:
# Input tokens for encoder zero padded
num_encoder_tokens=len(source_words)+1

In [46]:
# Input tokens for decoder zero padded
num_decoder_tokens=len(target_words) +1

###Seq2Seq Model Definition

In [50]:
def generate_batch(X = X_train, y = y_train, batch_size = 128):
    ''' Generate a batch of data '''
    while True:
        for j in range(0, len(X), batch_size):
            encoder_input_data = np.zeros((batch_size, max_source_length),dtype='float32')
            decoder_input_data = np.zeros((batch_size, max_target_length),dtype='float32')
            decoder_target_data = np.zeros((batch_size, max_target_length, num_decoder_tokens),dtype='float32')
            for i, (input_text, target_text) in enumerate(zip(X[j:j+batch_size], y[j:j+batch_size])):
                for t, word in enumerate(input_text.split()):
                  encoder_input_data[i, t] = source_word2idx[word] 
                for t, word in enumerate(target_text.split()):
                    if t<len(target_text.split())-1:
                        decoder_input_data[i, t] = target_word2idx[word] # decoder input seq
                    if t>0:
                        # decoder target sequence (one hot encoded)
                        # does not include the START_ token
                        # Offset by one timestep
                        #print(word)
                        decoder_target_data[i, t - 1, target_word2idx[word]] = 1.
                    
            yield([encoder_input_data, decoder_input_data], decoder_target_data)

In [51]:
train_samples = len(X_train)
val_samples = len(X_test)
batch_size = 64
epochs = 100
latent_dim=300

In [52]:
# Define an input sequence and process it.
encoder_inputs = Input(shape=(None,))
#enc_emb =  Embedding(num_encoder_tokens, latent_dim, mask_zero = True)(encoder_inputs)
enc_emb =  Embedding(num_words, EMBEDDING_SIZE, weights=[embedding_matrix], input_length=max_source_length)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(enc_emb)
dropout = Dropout(rate=0.5)
encoder_outputs = dropout(encoder_outputs)
# We discard `encoder_outputs` and only keep the states.
encoder_states = [state_h, state_c]

In [53]:
# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(None,))
dec_emb_layer = Embedding(num_decoder_tokens, latent_dim, mask_zero = True)
dec_emb = dec_emb_layer(decoder_inputs)
# We set up our decoder to return full output sequences,
# and to return internal states as well. We don't use the
# return states in the training model, but we will use them in inference.
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(dec_emb,
                                     initial_state=encoder_states)
dropout = Dropout(rate=0.5)
decoder_outputs = dropout(decoder_outputs)
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

In [54]:
# Define the model that takes encoder and decoder input 
# to output decoder_outputs
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

###Seq2Seq Model Training

In [55]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc'])

In [56]:
train_samples = len(X_train) # Total Training samples
val_samples = len(X_test)    # Total validation or test samples
batch_size = 128
epochs = 30

In [57]:
history=model.fit_generator(generator = generate_batch(X_train, y_train, batch_size = batch_size),
                    steps_per_epoch = train_samples//batch_size,
                    epochs=epochs,
                    validation_data = generate_batch(X_test, y_test, batch_size = batch_size),
                    validation_steps = val_samples//batch_size)



Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


###Saving Seq2Seq Model and Weights

In [58]:
model.save('eng-ara-model.hdf5')

In [59]:
model.save('/content/drive/MyDrive/eng-ara-model.hdf5')

In [60]:
model = keras.models.load_model('/content/drive/MyDrive/eng-ara-model.hdf5')

In [61]:
history2=model.fit_generator(generator = generate_batch(X_train, y_train, batch_size = batch_size),
                    steps_per_epoch = train_samples//batch_size,
                    epochs=epochs,
                    validation_data = generate_batch(X_test, y_test, batch_size = batch_size),
                    validation_steps = val_samples//batch_size)



Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [62]:
model.save('/content/drive/MyDrive/eng-ara-model.hdf5')

In [63]:
model.save_weights('eng-ara-weights.hdf5')

In [64]:
model.save_weights('/content/drive/MyDrive/eng-ara-weights.hdf5')

In [65]:
model = keras.models.load_model('/content/drive/MyDrive/eng-ara-model.hdf5')

###Inference Model

In [66]:
# Encode the input sequence to get the "Context vectors"
encoder_model = Model(encoder_inputs, encoder_states)
# Decoder setup
# Below tensors will hold the states of the previous time step
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_state_input = [decoder_state_input_h, decoder_state_input_c]
# Get the embeddings of the decoder sequence
dec_emb2= dec_emb_layer(decoder_inputs)
# To predict the next word in the sequence, set the initial states to the states from the previous time step
decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=decoder_state_input)
decoder_states2 = [state_h2, state_c2]
# A dense softmax layer to generate prob dist. over the target vocabulary
decoder_outputs2 = decoder_dense(decoder_outputs2)
# Final decoder model
decoder_model = Model(
    [decoder_inputs] + decoder_state_input,
    [decoder_outputs2] + decoder_states2)

In [67]:
def decode_sequence(input_seq):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1,1))
    # Populate the first character of 
    #target sequence with the start character.
    target_seq[0, 0] = target_word2idx['START_']
# Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
# Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word =target_idx2word[sampled_token_index]
        decoded_sentence += ' '+ sampled_word
# Exit condition: either hit max length
        # or find stop character.
        if (sampled_word == '_END' or
           len(decoded_sentence) > 50):
            stop_condition = True
# Update the target sequence (of length 1).
        target_seq = np.zeros((1,1))
        target_seq[0, 0] = sampled_token_index
# Update states
        states_value = [h, c]
    return decoded_sentence

### Evaluation on Train Dataset

In [68]:
train_gen = generate_batch(X_train, y_train, batch_size = 1)
k=-1

In [69]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input Source sentence:', X_train[k:k+1].values[0])
print('Actual Target Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Target Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input Source sentence: im not going back
Actual Target Translation:  لن اعود 
Predicted Target Translation:  لن اذهب 
0.816496580927726


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [70]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: im walking with her
Actual Arabic Translation:  اني اتمشي معها 
Predicted Arabic Translation:  انا فقدت سيارتي 
0.7364279629037999


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [71]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: clean the window with a damp cloth
Actual Arabic Translation:  نظف زجاج النافذه بقطعه قماش مبلوله 
Predicted Arabic Translation:  ابق ماري ابق تحت ملء به 
0.727427152512826


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [72]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: theres no reason tom would say something like that
Actual Arabic Translation:  لا سبب يدفع توم لقول شيء كهذا 
Predicted Arabic Translation:  لا يستطيع توم ان يفعل ذلك ما يكفي 
0.7652058832556895


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [73]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: i got wet to the skin
Actual Arabic Translation:  بلغ البلل بدني 
Predicted Arabic Translation:  اخلع نحصل علي 
0.7598356856515925


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [74]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: they are very big
Actual Arabic Translation:  انهم كبار جدا 
Predicted Arabic Translation:  نحن تبدو جدا 
0.8091067115702212


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [75]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: he believes whatever i say
Actual Arabic Translation:  هو يصدق اي شئ اقوله 
Predicted Arabic Translation:  انه يريد ان يكون لا شيء 
0.7521206186172787


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [76]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: please remove your shoes before entering the house
Actual Arabic Translation:  من فضلك اخلع حذاءك قبل ان تدخل البيت 
Predicted Arabic Translation:  من فضلك اخلع حذاءك قبل ان البيت 
0.8471957049969466


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [77]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: we arent going to lose
Actual Arabic Translation:  لن نخسر 
Predicted Arabic Translation:  لن يغير ابدا 
0.7311104457090247


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [78]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: has something happened
Actual Arabic Translation:  هل حدث اي شيء 
Predicted Arabic Translation:  هل شيء ما حدث 
0.9036020036098448


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [79]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_test[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: dont leave me alone please
Actual Arabic Translation:  لا تتركني لوحدي ارجوك 
Predicted Arabic Translation:  لا تنس الباب من فضلك 
0.6904573083274563


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [80]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: tom has a stuffedup nose
Actual Arabic Translation:  انف توم محتقن 
Predicted Arabic Translation:  توم لديه كبيره 
0.7071067811865476


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [81]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: im not allowed to tell what you want to know
Actual Arabic Translation:  لم يؤذن لي باطلاعك علي ما تريد معرفته 
Predicted Arabic Translation:  لا اريد ان اقول شيئا ما لم اكن لي لك 
0.697613262053043


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [82]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: im trying to find a way to make money
Actual Arabic Translation:  احاول العثور علي طريقه لجمع الاموال 
Predicted Arabic Translation:  انا احاول ان اساعدك في الوقت 
0.7186082239261684


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [83]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: the man aimed a gun at the detectives
Actual Arabic Translation:  صوب الرجل مسدسا نحو المحققين 
Predicted Arabic Translation:  لقد لقد كان توم في الغرفه 
0.7801157731069053


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [84]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: california is famous for its fruit
Actual Arabic Translation:  اشتهرت كاليفورنيا بفواكهها 
Predicted Arabic Translation:  ليس وقت كبيره 
0.8801117367933934


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [85]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: im looking forward to your halloween party
Actual Arabic Translation:  انا اتتطلع لحفله الهالوين خاصتك 
Predicted Arabic Translation:  انا اتتطلع لحفله مع توم 
0.8144476398584994


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [86]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: i like studying languages
Actual Arabic Translation:  احب تعلم اللغات 
Predicted Arabic Translation:  احب قراءه الكتب 
0.7707713836060629


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [87]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: wheres your money
Actual Arabic Translation:  اين مالكم 
Predicted Arabic Translation:  اين اقرب 
0.7952707287670506


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [88]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: he narrowly escaped death
Actual Arabic Translation:  نجا من الموت باعجوبه 
Predicted Arabic Translation:  لقد كانت الشرطه علي قدميه 
0.7377879464668811


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [89]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_test[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: at first it is difficult
Actual Arabic Translation:  انها صعبه في البدايه 
Predicted Arabic Translation:  انها في العالم 
0.8660254037844387


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [90]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: speaking english isnt easy
Actual Arabic Translation:  التحدث بالانجليزيه ليس امرا سهلا 
Predicted Arabic Translation:  الانجليزيه ليست الانجليزيه 
0.7730551756939454


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [91]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: because hes sick he cant come
Actual Arabic Translation:  لن يستطيع المجيء لانه مريض 
Predicted Arabic Translation:  لن يكون من الممكن ان يكون يكون ذلك كثيرا 
0.6389431042462724


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [92]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_test[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: stop teasing your brother
Actual Arabic Translation:  توقفي عن غيظ اخيك 
Predicted Arabic Translation:  توقف عن غيظ اخيك 
0.7259795291154771


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [93]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: i do love you
Actual Arabic Translation:  احبك 
Predicted Arabic Translation:  انا احبك 
0.8408964152537145


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [94]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: she rarely goes out on sundays
Actual Arabic Translation:  نادرا ما تخرج ايام الاحد 
Predicted Arabic Translation:  كل ما يكون علي توم في السرير 
0.7186082239261684


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [95]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: you shouldve started by now
Actual Arabic Translation:  كان عليك ان تكون قد بدات الان 
Predicted Arabic Translation:  كان عليك ان تكون في المنزل 
0.7529586373193689


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [96]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)


Input English sentence: my father bought me a bicycle
Actual Arabic Translation:  اشتري لي ابي دراجه 
Predicted Arabic Translation:  ابي ابي ابي علي البيانو 
0.668740304976422


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [97]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: why dont we do that right now
Actual Arabic Translation:  لم لا نقوم بذلك للتو 
Predicted Arabic Translation:  لم لا تنس ذلك بذلك 
0.81903625881272


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [98]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_test[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: bring me a cup of coffee will you
Actual Arabic Translation:  احضر لي فنجانا من القهوه 
Predicted Arabic Translation:  احضر لي كاسا من القهوه 
0.7071067811865476


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [99]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: my son is a rebellious teenager
Actual Arabic Translation:  ابني مراهق متمرد 
Predicted Arabic Translation:  امي امي 
0.816496580927726


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [100]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_test[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: we feel good about it
Actual Arabic Translation:  نتفائل به 
Predicted Arabic Translation:  نحن نحن لديك بك 
0.7364279629037999


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [101]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_train[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: may i interrupt
Actual Arabic Translation:  هل لي ان اقاطع 
Predicted Arabic Translation:  هل يمكنني ان اساعدك 
0.7598356856515925


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [102]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_test[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: this is toms favorite book
Actual Arabic Translation:  هذا كتاب توم المفضل 
Predicted Arabic Translation:  توم هو في هذا المنزل 
0.7765453555044466


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [103]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: dont lie
Actual Arabic Translation:  اياك والكذب 
Predicted Arabic Translation:  لا تلمسني 


In [104]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: i had my brother repair my bicycle
Actual Arabic Translation:  طلبت من اخي اصلاح دراجتي 
Predicted Arabic Translation:  طلبت علي ان اخي طبيبا 


In [105]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: but youre not there
Actual Arabic Translation:  لكنك لست هناك 
Predicted Arabic Translation:  لن يكن هذا شيء 


In [106]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: water freezes at zero degrees celsius doesnt it
Actual Arabic Translation:  يتجمد الماء عند صفر درجه مئويه اليس كذلك 
Predicted Arabic Translation:  لا يستطيع ابي في الجزء ماذا يفعل هذا 


In [107]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: i wonder where tom is and who hes with
Actual Arabic Translation:  انا اتساءل اين توم ومع من 
Predicted Arabic Translation:  انا لا اعرف توم في بوسطن 


In [108]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: i need to charge my cell phone
Actual Arabic Translation:  علي شحن هاتفي الجوال 
Predicted Arabic Translation:  احتاج الي مساعدتك 


In [109]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: you can skate safely on this side of the lake
Actual Arabic Translation:  يمكنك التزلج بامان علي هذا الجانب من البحيره 
Predicted Arabic Translation:  يمكنك ان تدرس في المحطه لا يمكنك او هذا 


In [110]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: she was at the crime scene
Actual Arabic Translation:  انها كانت في موقع الجريمه 
Predicted Arabic Translation:  كان علي وشك في المنزل 


In [111]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: tom doesnt hate you mary
Actual Arabic Translation:  توم لا يكرهك ياماري 
Predicted Arabic Translation:  توم لا يستطيع ان يكون ماري 


In [112]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: thats a good question
Actual Arabic Translation:  هذا سؤال جيد 
Predicted Arabic Translation:  هذا جيد جدا 


In [113]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: please keep this secret
Actual Arabic Translation:  من فضلك ابق ذلك سرا 
Predicted Arabic Translation:  من فضلك ابق هذه الصفحه 


In [114]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: he has been warned on several occasions
Actual Arabic Translation:  قد انذر مرات عديده 
Predicted Arabic Translation:  لقد كان علي ان تقود توم 


In [115]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: they kept it secret that they were in love
Actual Arabic Translation:  ابقو حبهما سرا 
Predicted Arabic Translation:  لقد لقد كنت كنت في الوقت 


In [116]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: if we leave now we should make it
Actual Arabic Translation:  ان غادرنا الان فسنصل علي الوقت 
Predicted Arabic Translation:  علينا ان اكون الوقت ان تكون اكثر من الوقت 


In [117]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: i just wish i knew how to speak french
Actual Arabic Translation:  اتمنتي فقط لو استطعت تكلم الفرنسيه 
Predicted Arabic Translation:  اريد ان اعرف كيف تكون في الفرنسيه 


In [118]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: it was at school
Actual Arabic Translation:  كان في المدرسه 
Predicted Arabic Translation:  كان في المنزل 


In [119]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: the rain is wonderful
Actual Arabic Translation:  المطر رائع 
Predicted Arabic Translation:  توقف عن غيظ ذلك 


In [120]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: do you have a car
Actual Arabic Translation:  هل عندك سياره 
Predicted Arabic Translation:  هل لديك سياره 


In [121]:
k+=1
(input_seq, actual_output), _ = next(train_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_train[k:k+1].values[0])
print('Actual Arabic Translation:', y_train[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: can you see the difference
Actual Arabic Translation:  هل بامكانك ان تري الفرق 
Predicted Arabic Translation:  ايمكنك ان تخبرني معي 


### Evaluation on Test Dataset

In [122]:
val_gen = generate_batch(X_test, y_test, batch_size = 1)
k=-1

In [123]:
test_gen = generate_batch(X_test, y_test, batch_size = 1)
k=10
k+=1
(input_seq, actual_output), _ = next(test_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input Source sentence:', X_test[k:k+1].values[0])
print('Actual Target Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Target Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_test[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input Source sentence: he thinks i love her
Actual Target Translation:  هو يعتقد انني احبها 
Predicted Target Translation:  لديه ثلاثه ثلاثه 
0.7259795291154771


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [124]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_test[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: fish is cheap today
Actual Arabic Translation:  السمك رخيص اليوم 
Predicted Arabic Translation:  لديه ثلاثه ثلاثه 
0.6865890479690392


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [125]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_test[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: tom felt strong
Actual Arabic Translation:  توم شعر بالقوه 
Predicted Arabic Translation:  لا يمكنني ان تكون هذه المساله 
0.6893409630302637


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [126]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_test[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: i think thats what happened
Actual Arabic Translation:  اظن ان هذا ما حدث 
Predicted Arabic Translation:  انها سعيده 
0.8034284189446518


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [127]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: my uncle gave me a camera
Actual Arabic Translation:  اعطاني عمي كاميرا 
Predicted Arabic Translation:  كان توم وماري جدا 


In [128]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_test[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: youre doing it wrong
Actual Arabic Translation:  انت تفعله بطريقه خاطئه 
Predicted Arabic Translation:  دعونا نتقاسم التلفاز 
0.7765453555044466


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [129]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_test[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: i was at a movie theater
Actual Arabic Translation:  كنت في دور عرض للسينما 
Predicted Arabic Translation:  انها تحب كره المضرب 
0.8091067115702212


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [130]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_test[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: their trip was postponed because of the rain
Actual Arabic Translation:  تاجلت رحلتهم بسبب المطر 
Predicted Arabic Translation:  توم ماري ماري في ماري ما 
0.6622152291011697


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [131]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: what do you want now
Actual Arabic Translation:  ما الذي تريده الان 
Predicted Arabic Translation:  الجميع 


In [132]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_test[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: the internet is very useful for knowing the circumstances of each part of the world
Actual Arabic Translation:  الانترنت مفيد جدا لمعرفه ظروف كل جزء من العالم 
Predicted Arabic Translation:  هل سبق ان تقود سياره كبيره 
0.7916963878457504


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [133]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_test[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: i have a car
Actual Arabic Translation:  املك سياره 
Predicted Arabic Translation:  انها ان وسيم 
0.8091067115702212


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [134]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_test[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)

Input English sentence: none of my classmates live near here
Actual Arabic Translation:  لا احد من زملائي يعيشون بالقرب من هنا 
Predicted Arabic Translation:  ما الذي تفعله 
0.8265168183793802


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [135]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: stop it right now
Actual Arabic Translation:  اوقف هذا حالا 
Predicted Arabic Translation:  انه يحب الجميع 


In [136]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: i have no words to express my gratitude
Actual Arabic Translation:  الكلمات لا يمكنها التعبير عن شكري بما يكفي 
Predicted Arabic Translation:  الصين بلد جميل 


In [137]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: this way sir
Actual Arabic Translation:  من هنا سيدي 
Predicted Arabic Translation:  توم يحب السفر 


In [138]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: there is no reason why i shouldnt do it
Actual Arabic Translation:  لا يوجد سبب لئلا افعله 
Predicted Arabic Translation:  اعتقد انه كان ما حدث 


In [139]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: we dont know where well live next year
Actual Arabic Translation:  نحن لا نعرف اين سنعيش في العام المقبل 
Predicted Arabic Translation:  لي لي امي امي 


In [140]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: i cant live without you
Actual Arabic Translation:  لا استطيع العيش من دونك 
Predicted Arabic Translation:  انت انت محق 


In [141]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: im tired of watching tv
Actual Arabic Translation:  انا تعب من مشاهده التلفاز 
Predicted Arabic Translation:  كنت في المنزل جديده 


In [142]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: i had better ask her
Actual Arabic Translation:  من الافضل ان اسئلها 
Predicted Arabic Translation:  بسبب العاصفه بسبب توم 


In [143]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: i want to look rich
Actual Arabic Translation:  اريد ان ابدو غنيا 
Predicted Arabic Translation:  ماذا تريد الان 


In [144]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: would you like to go to the zoo this afternoon
Actual Arabic Translation:  هل تريد ان تذهب الي حديقه الحيوان بعد ظهر هذا اليوم 
Predicted Arabic Translation:  هذه بلد هي العالم جدا 


In [145]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: i dont believe that tom is the killer
Actual Arabic Translation:  لا اصدق ان توم هو القاتل 
Predicted Arabic Translation:  لدي سياره 


In [146]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: you have a bright future
Actual Arabic Translation:  لديك مستقبل واعد 
Predicted Arabic Translation:  لا احد في اي شيء في نفس شيء 


In [147]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: is it possible that its already eight oclock
Actual Arabic Translation:  هل من الممكن انها بالفعل الساعه الثامنه 
Predicted Arabic Translation:  سوف يكون مجددا 


In [148]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: that child is only four but he can already count to
Actual Arabic Translation:  بامكان هذا الطفل ان يعد الي مئه مع انه ما زال لديه اربع سنوات 
Predicted Arabic Translation:  لا يمكنني ان اصدق عن هذه المشكله 


In [149]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: its only a theory
Actual Arabic Translation:  انها مجرد نظريه 
Predicted Arabic Translation:  هذه القلعه جميله 


In [150]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: im really tired
Actual Arabic Translation:  انا متعب جدا 
Predicted Arabic Translation:  لا يوجد اي شيء تريد ان فعل ذلك 


In [151]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: where is the book
Actual Arabic Translation:  اين الكتاب 
Predicted Arabic Translation:  لا اعرف في ما الذي تعمل 


In [152]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: few people know about the plan
Actual Arabic Translation:  القليل يعرف اي شيء عن الخطه 
Predicted Arabic Translation:  لا يمكنني ان تكون غبيا 


In [153]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: i really didnt mean to hurt you
Actual Arabic Translation:  لم اقصد ان اوذيك 
Predicted Arabic Translation:  انا مشغول جدا 


In [154]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: tom paid for the tickets
Actual Arabic Translation:  دفع توم ثمن التذاكر 
Predicted Arabic Translation:  لم يكن لدي الكثير من 


In [155]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: i think youre right
Actual Arabic Translation:  اظن انك محق 
Predicted Arabic Translation:  اريد ان اكون 


In [156]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: is it rainy
Actual Arabic Translation:  هل الجو ممطر 
Predicted Arabic Translation:  هل يمكنني ان تخبرني هذا الكتاب جديده 


In [157]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: im sorry but i dont want to talk about it
Actual Arabic Translation:  المعذره لا اريد التحدث عن الموضوع 
Predicted Arabic Translation:  لا اصدق ان توم هو ذلك 


In [158]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: i know i dont have any chance of winning
Actual Arabic Translation:  اعلم بانه ليس لدي اي فرصه للفوز 
Predicted Arabic Translation:  انت لديك لديك 


In [159]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: can you suggest a better idea
Actual Arabic Translation:  ايمكنك ان تقترح فكره افضل 
Predicted Arabic Translation:  هل لديك اي شيء علي ان يكون توم 


In [160]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: this drawing dates back to the fifteenth century
Actual Arabic Translation:  هذا الرسم يعود تاريخه الي القرن الخامس عشر 
Predicted Arabic Translation:  كل ما هو يستطيع ان يكون توم في الفرنسيه 


In [161]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: how much longer is it going to take
Actual Arabic Translation:  كم من الوقت سوف تستغرق بعد 
Predicted Arabic Translation:  انه ليس هذه كبيره 


In [162]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: she dived into the swimming pool
Actual Arabic Translation:  غطست في المسبح 
Predicted Arabic Translation:  انا سعيد جدا 


In [163]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: its recommended that you dont write your passwords down where others might see them
Actual Arabic Translation:  من الافضل الا تكتب كلمات المرور كي لا يراها الغير 
Predicted Arabic Translation:  اين هي 


In [164]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: i didnt even recognize tom
Actual Arabic Translation:  انا حتي لم اعرف انه توم 
Predicted Arabic Translation:  القليل من دون واحد 


In [165]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: please write with a pen
Actual Arabic Translation:  من فضلك اكتب بقلم حبر 
Predicted Arabic Translation:  لم اكن اعرف كيف اقول 


In [166]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: im going through changes
Actual Arabic Translation:  امر بتغيرات 
Predicted Arabic Translation:  توم كان في الخارج من قبل 


In [167]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: this question is one of great importance
Actual Arabic Translation:  هذا سؤال مهم جدا 
Predicted Arabic Translation:  اظن انك لا استطيع 


In [168]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: turn down the tv
Actual Arabic Translation:  اخفض صوت التلفاز 
Predicted Arabic Translation:  هل الجو بارد 


In [169]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: tom often goes to boston
Actual Arabic Translation:  يذهب توم الي بوسطن كثيرا 
Predicted Arabic Translation:  انا لا اريد ان انسي هذا لا شئ 


In [170]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: i bet everyone is waiting
Actual Arabic Translation:  انا متاكد ان الجميع ينتظر 
Predicted Arabic Translation:  لا اعرف لدي اي شيء من قبل ان لا احب عنه 


In [171]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])

Input English sentence: that book had a lot of pages
Actual Arabic Translation:  صفحات ذلك الكتاب كثيره 
Predicted Arabic Translation:  ايمكنك ان تخبرني كيف تقود سياره 


In [172]:
k+=1
(input_seq, actual_output), _ = next(val_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_test[k:k+1].values[0])
print('Actual Arabic Translation:', y_test[k:k+1].values[0][6:-4])
print('Predicted Arabic Translation:', decoded_sentence[:-4])
score1 = sentence_bleu(y_test[k:k+1].values[0][6:-4],decoded_sentence[:-4])
print(score1)


Input English sentence: what kind of a person are you
Actual Arabic Translation:  اي نوع من الاشخاص انت 
Predicted Arabic Translation:  القطه هذه السياح الي العالم 
0.6744322250214191


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


###BLUE score

In [173]:
actual,predicted=list(),list()
val_gen = generate_batch(X_test, y_test, batch_size = 1)
for k in range(len(X_test)):
  (input_seq, actual_output), _ = next(val_gen)
  decoded_sentence = decode_sequence(input_seq)
  actual.append(y_test[k:k+1].values[0][6:-4])
  predicted.append(decoded_sentence[:-4])

In [174]:
#Blue score
bs=0
for i in range(len(actual)):
  score1 = sentence_bleu(actual[i],predicted[i])
  bs=bs+score1
bs=bs/len(actual)
print('Bleu score on Test set:', bs)

Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


Bleu score on Test set: 0.7759777657294306


In [175]:
  # calculate BLEU score with weights
	print('BLEU-1: %f' % corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0)))
	print('BLEU-2: %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0)))
	print('BLEU-3: %f' % corpus_bleu(actual, predicted, weights=(0.3, 0.3, 0.3, 0)))
	print('BLEU-4: %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))

Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


BLEU-1: 0.347446
BLEU-2: 0.589445
BLEU-3: 0.728226
BLEU-4: 0.767753


###References

Some parts of the code were taken from the following references:
1-https://towardsdatascience.com/how-to-implement-seq2seq-lstm-model-in-keras-shortcutnlp-6f355f3e5639
2-https://github.com/motazsaad/process-arabic-text/blob/master/clean_arabic_text.py
3-https://towardsdatascience.com/implementing-neural-machine-translation-using-keras-8312e4844eb8
4-https://keras.io/examples/nlp/lstm_seq2seq/
5-https://machinelearningmastery.com/develop-neural-machine-translation-system-keras/
6-https://www.kaggle.com/thanakomsn/glove6b300dtxt
7-https://towardsdatascience.com/word-level-english-to-marathi-neural-machine-translation-using-seq2seq-encoder-decoder-lstm-model-1a913f2dc4a7