<a href="https://colab.research.google.com/github/yiwenwangANU/Tensorflow_Certificate/blob/main/Tensorflow_Certificate_Model_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Data

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers

In [None]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [None]:
with open(path_to_file, 'rb') as f:
  raw_data = f.read().decode('utf-8')

In [None]:
vocab = list(set(raw_data))
len(vocab)

65

In [None]:
all_chars = tf.strings.unicode_split(raw_data, input_encoding='UTF-8')
all_chars[:100]

<tf.Tensor: shape=(100,), dtype=string, numpy=
array([b'F', b'i', b'r', b's', b't', b' ', b'C', b'i', b't', b'i', b'z',
       b'e', b'n', b':', b'\n', b'B', b'e', b'f', b'o', b'r', b'e', b' ',
       b'w', b'e', b' ', b'p', b'r', b'o', b'c', b'e', b'e', b'd', b' ',
       b'a', b'n', b'y', b' ', b'f', b'u', b'r', b't', b'h', b'e', b'r',
       b',', b' ', b'h', b'e', b'a', b'r', b' ', b'm', b'e', b' ', b's',
       b'p', b'e', b'a', b'k', b'.', b'\n', b'\n', b'A', b'l', b'l', b':',
       b'\n', b'S', b'p', b'e', b'a', b'k', b',', b' ', b's', b'p', b'e',
       b'a', b'k', b'.', b'\n', b'\n', b'F', b'i', b'r', b's', b't', b' ',
       b'C', b'i', b't', b'i', b'z', b'e', b'n', b':', b'\n', b'Y', b'o',
       b'u'], dtype=object)>

In [None]:
chars_to_ids = layers.StringLookup(vocabulary=vocab)
ids_to_chars = layers.StringLookup(vocabulary=chars_to_ids.get_vocabulary(), invert=True)
all_ids = chars_to_ids(all_chars)
all_ids[:100],len(all_ids)

(<tf.Tensor: shape=(100,), dtype=int64, numpy=
 array([38, 10, 29, 46, 11,  1, 62, 10, 11, 10, 39, 56,  2, 42, 40, 48, 56,
        49, 16, 29, 56,  1, 59, 56,  1, 23, 29, 16, 60, 56, 56, 34,  1,  6,
         2, 21,  1, 49, 57, 29, 11,  9, 56, 29, 35,  1,  9, 56,  6, 29,  1,
        17, 56,  1, 46, 23, 56,  6, 43, 26, 40, 40, 64, 51, 51, 42, 40,  7,
        23, 56,  6, 43, 35,  1, 46, 23, 56,  6, 43, 26, 40, 40, 38, 10, 29,
        46, 11,  1, 62, 10, 11, 10, 39, 56,  2, 42, 40, 30, 16, 57])>, 1115394)

In [None]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)
ids_dataset

<TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>

In [None]:
seq_length = 100
batched_dataset = ids_dataset.batch(seq_length+1, drop_remainder=True)
batched_dataset

<BatchDataset element_spec=TensorSpec(shape=(101,), dtype=tf.int64, name=None)>

In [None]:
def input_target_split(sequence):
  return sequence[:-1], sequence[1:]

In [None]:
dataset = batched_dataset.map(input_target_split)
dataset

<MapDataset element_spec=(TensorSpec(shape=(100,), dtype=tf.int64, name=None), TensorSpec(shape=(100,), dtype=tf.int64, name=None))>

In [None]:
for inputs, target in dataset.take(1):
  print(tf.strings.reduce_join(ids_to_chars(inputs)))
  print(tf.strings.reduce_join(ids_to_chars(target)))

tf.Tensor(b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou', shape=(), dtype=string)
tf.Tensor(b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou ', shape=(), dtype=string)


In [None]:
###############################forget###############################
prefetched_dataset = dataset.batch(64).prefetch(tf.data.AUTOTUNE) 
prefetched_dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(None, 100), dtype=tf.int64, name=None), TensorSpec(shape=(None, 100), dtype=tf.int64, name=None))>

#Model

In [None]:
vocab_size = len(chars_to_ids.get_vocabulary())
embedding_dims = 256
rnn_units = 1024

In [None]:
class Model_0(tf.keras.Model):

    def __init__(self, vocab_size, embedding_dims, rnn_units):
        super(Model_0, self).__init__()
        self.embed = layers.Embedding(input_dim=vocab_size,
                                      output_dim=embedding_dims,
                                      name='embed')
        self.LSTM = layers.LSTM(units=rnn_units, ###############################?????###############################
                                return_sequences=True,
                                return_state=True,
                                name='LSTM')
        self.Dense = layers.Dense(units=vocab_size, name='Dense')

    def call(self, inputs, return_state=False, fm_state=None, fc_state=None):
        x = self.embed(inputs)  #(batch, 100, embed)

        if(fm_state==None):
          fm_state, fc_state = self.LSTM.get_initial_state(x)
        x, fm_state, fc_state = self.LSTM(x, initial_state=[fm_state, fc_state])  #(batch, seq, vocab)
        outputs = self.Dense(x)
        if(return_state==True):
          return outputs, fm_state, fc_state
        else:
          return outputs


In [None]:
model = Model_0(vocab_size=vocab_size,
                embedding_dims=embedding_dims,
                rnn_units=rnn_units)

In [None]:
for inputs, _ in prefetched_dataset.take(1):
  print(tf.strings.reduce_join(ids_to_chars(inputs[0])))
  ###############################forget###############################
  predicted_ids = tf.squeeze(tf.random.categorical(model(inputs)[0], 1))
  print(tf.strings.reduce_join(ids_to_chars(predicted_ids)))

tf.Tensor(b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou', shape=(), dtype=string)
tf.Tensor(b'VF$armHDkPM&h:,L!Rlrl::N&,rC$l;m\nn:pt&cRFH,WnTxdsnpb!yMiirpZ-V3eEb.hw-WEEpJuq?bMeFfAbkJc:QqYGADDv\nUn', shape=(), dtype=string)


In [None]:
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam())

In [None]:
history = model.fit(prefetched_dataset,
                    epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
for inputs, _ in prefetched_dataset.take(1):
  print(tf.strings.reduce_join(ids_to_chars(inputs[0])))
  predicted_ids = tf.squeeze(tf.random.categorical(model(inputs)[0], 1))
  print(tf.strings.reduce_join(ids_to_chars(predicted_ids)))

tf.Tensor(b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou', shape=(), dtype=string)
tf.Tensor(b'orst Hitizen:\nFugore terpeoueed snd trrther  goar me.spaak,\n\nENl \nHeeak  sheak \n\nMirst Sitizen:\nPou ', shape=(), dtype=string)


In [None]:
inputs='ROMEO:'
#()
input_ids = chars_to_ids(tf.strings.unicode_split(inputs, input_encoding='UTF-8')) #(seq)
output, state = model(tf.expand_dims(input_ids,axis=0), return_state=True) #(batch, seq, vocab)
output = tf.squeeze(output, axis=0) #(seq, vocab)
pred_ids = tf.squeeze(tf.random.categorical(output, 1), axis=-1) #(seq)
pred_chars = ids_to_chars(pred_ids)  #(seq)
last_char = pred_chars[-1]

In [None]:
def predict_next_word(model=model, inputs='ROMEO:', fm_state=None, fc_state=None):
  input_ids = chars_to_ids(tf.strings.unicode_split(inputs, input_encoding='UTF-8')) #(seq)
  output, fm_state, fc_state = model(tf.expand_dims(input_ids,axis=0),
                                return_state=True,
                                fm_state=fm_state,
                                fc_state=fc_state) #(batch, seq, vocab)  ###############################forget###############################
  output = tf.squeeze(output, axis=0) #(seq, vocab)
  pred_ids = tf.squeeze(tf.random.categorical(output, 1), axis=-1) #(seq)
  pred_chars = ids_to_chars(pred_ids)  #(seq)
  next_word = pred_chars[-1]
  return next_word, fm_state, fc_state

In [None]:
predict_next_word(inputs='R')

(<tf.Tensor: shape=(), dtype=string, numpy=b'b'>,
 <tf.Tensor: shape=(1, 1024), dtype=float32, numpy=
 array([[ 0.06281871, -0.05333113, -0.03771185, ..., -0.01375718,
          0.03989689, -0.1401    ]], dtype=float32)>,
 <tf.Tensor: shape=(1, 1024), dtype=float32, numpy=
 array([[ 0.40222946, -0.15128659, -0.06906644, ..., -0.0177927 ,
          0.04723381, -0.5599148 ]], dtype=float32)>)

In [None]:
def make_prediction(model=model, initial_inputs='ROMEO:', fm_state=None, fc_state=None, steps=1000):
  output = [initial_inputs]
  next_word=initial_inputs
  for i in range(steps):
    next_word, fm_state, fc_state = predict_next_word(inputs=next_word,
                                                      fm_state=fm_state,
                                                      fc_state=fc_state)
    output.append(next_word)
  return tf.strings.reduce_join(output)

In [None]:
predictions = make_prediction()
print(predictions.numpy().decode('utf-8'))

ROMEO:
Poor some better, I say, I hardly
the mounted with the sea to the first
Who deserves intit the womb at his deposing?

Will I:
What, how now! who's as him toncume?

QUEEN ELIZABETH:
What, how she shall be wetchmen innocent!

USTiNTIUS:
Cannot but sweet and rose:
Whilst I't! aw! then for me send thy word.

CAMFID:
Nevel, thusses hold; my son Petruchio
Petrour or extermil.

HORTENSIO:
Farewell; and see at thy sword and learn!
O, peace! I have pissed towards:
saw now, sir; well, sirPall and now go melt to have
A little gnazed up mes-ecced.

BRUTUS:
O, sir, and bring to bear which he slew
From him that heavy with Kemporr'd,
When he shall have me in myself.

ANTONIO:
Hath eit regain'd how I'll cun, or told you out:
What, prove is on enter thing we would say they
Were chairls they love to love; he shall decive
As bying a chambel and some seven pent
To take her strive Been Cray. But, sirr, sir!

SEBASTIAN:
How now, sir! have not andiving began?

BIRTAPO:
Ripinion, not a carm, with thems