In [18]:
import tensorflow_hub as hub
import tensorflow_text as text

encoder_URL = "https://www.kaggle.com/models/tensorflow/bert/frameworks/TensorFlow2/variations/bert-en-uncased-l-12-h-768-a-12/versions/2"
preprocessor = 'https://kaggle.com/models/tensorflow/bert/frameworks/TensorFlow2/variations/en-uncased-preprocess/versions/3'
text_preprocessor = hub.KerasLayer(preprocessor) # First layer of the NN

In [19]:
testing_sent = ['hello world', 'wow the world is just beautiful']
processed_text = text_preprocessor(testing_sent)
processed_text.keys()

dict_keys(['input_mask', 'input_type_ids', 'input_word_ids'])

In [20]:
processed_text['input_mask']

# Format of 'input mask' is SLC your_string SEP
# The input_mask just captures the given words and masks the length of the words that exist
# Max sentence length can be 126

<tf.Tensor: shape=(2, 128), dtype=int32, numpy=
array([[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
      dtype=int32)>

In [21]:
processed_text['input_word_ids']
# This is the dictionary value for each of the words passed in. You can see the dictionary value for 'world' is 2088
# preprocesses text by tokenizing and has it ready in numerical form

<tf.Tensor: shape=(2, 128), dtype=int32, numpy=
array([[  101,  7592,  2088,   102,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0, 

In [25]:
bert = hub.KerasLayer(encoder_URL)
bert_model_with_preprocessed = bert(processed_text)
bert_model_with_preprocessed.keys()

dict_keys(['encoder_outputs', 'sequence_output', 'default', 'pooled_output'])

In [28]:
bert_model_with_preprocessed['pooled_output']
# 'pooled_output' this will produce the embeddings or vector for each sentence.
# The embedding represents the meaning behind the sentence using 768 features

<tf.Tensor: shape=(2, 768), dtype=float32, numpy=
array([[ 0.46143854,  0.16888136,  0.9770442 , ..., -0.9247109 ,
         0.32970682,  0.99962413],
       [-0.18566011, -0.4565327 ,  0.991772  , ..., -0.9822351 ,
         0.31540486,  0.9982238 ]], dtype=float32)>

In [30]:
bert_model_with_preprocessed['sequence_output']
# This does it for all the words in each sentence.
# You are able to represent those words with numbers that take in account of the meaning behind the words.
# Shape Analysis: (2, 128, 768)
#.   2 represents the sentences
#    Then each of those sentences the model is going through each of the words that could max be 128
#.   Finally in each word it produces 768 feature values to represent that word.

<tf.Tensor: shape=(2, 128, 768), dtype=float32, numpy=
array([[[ 1.38968721e-01,  3.92372489e-01,  2.20877841e-01, ...,
         -7.50904530e-02, -1.59458995e-01, -3.02657783e-01],
        [-4.81071249e-02,  7.19874620e-01,  4.84751046e-01, ...,
          1.99355274e-01,  1.33780956e-01, -1.23020828e-01],
        [ 6.01866782e-01,  9.01288390e-02,  1.09629035e-01, ...,
          5.26783057e-04,  2.54782945e-01, -1.13370605e-01],
        ...,
        [ 2.21234679e-01,  2.39344776e-01,  3.30342650e-01, ...,
         -2.52432913e-01, -1.81307316e-01, -1.54258817e-01],
        [ 1.02187499e-01,  4.07734334e-01,  2.37250060e-01, ...,
         -1.88559338e-01, -1.58265829e-01, -1.90354176e-02],
        [ 4.62288797e-01,  7.18413115e-01, -1.38182297e-01, ...,
         -9.43192188e-03,  1.22147448e-01, -4.43378448e-01]],

       [[-1.13890581e-02,  3.06114499e-02,  6.04448676e-01, ...,
         -1.62474178e-02,  3.43062103e-01, -4.55624461e-01],
        [ 8.25801611e-01, -1.35105819e-01,  5.15

In [36]:
bert_model_with_preprocessed['encoder_outputs'][-1] == bert_model_with_preprocessed['sequence_output']
# same as the sequence output

<tf.Tensor: shape=(2, 128, 768), dtype=bool, numpy=
array([[[ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        ...,
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True]],

       [[ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        ...,
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True]]])>