<a href="https://colab.research.google.com/github/yiwenwangANU/Machine_Learning/blob/main/Tensorflow_Certificate_Model_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Data

In [19]:
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers

In [20]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [21]:
with open(path_to_file, 'rb') as f:
  raw_data = f.read().decode('utf-8')

In [22]:
vocab = list(set(raw_data))
len(vocab)

65

In [23]:
all_chars = tf.strings.unicode_split(raw_data, input_encoding='UTF-8')
all_chars[:100]

<tf.Tensor: shape=(100,), dtype=string, numpy=
array([b'F', b'i', b'r', b's', b't', b' ', b'C', b'i', b't', b'i', b'z',
       b'e', b'n', b':', b'\n', b'B', b'e', b'f', b'o', b'r', b'e', b' ',
       b'w', b'e', b' ', b'p', b'r', b'o', b'c', b'e', b'e', b'd', b' ',
       b'a', b'n', b'y', b' ', b'f', b'u', b'r', b't', b'h', b'e', b'r',
       b',', b' ', b'h', b'e', b'a', b'r', b' ', b'm', b'e', b' ', b's',
       b'p', b'e', b'a', b'k', b'.', b'\n', b'\n', b'A', b'l', b'l', b':',
       b'\n', b'S', b'p', b'e', b'a', b'k', b',', b' ', b's', b'p', b'e',
       b'a', b'k', b'.', b'\n', b'\n', b'F', b'i', b'r', b's', b't', b' ',
       b'C', b'i', b't', b'i', b'z', b'e', b'n', b':', b'\n', b'Y', b'o',
       b'u'], dtype=object)>

In [24]:
chars_to_ids = layers.StringLookup(vocabulary=vocab)
ids_to_chars = layers.StringLookup(vocabulary=chars_to_ids.get_vocabulary(), invert=True)
all_ids = chars_to_ids(all_chars)
all_ids[:100],len(all_ids)

(<tf.Tensor: shape=(100,), dtype=int64, numpy=
 array([ 6, 48, 52, 37, 31, 43, 22, 48, 31, 48, 39, 58,  8, 12, 36, 18, 58,
        57, 59, 52, 58, 43, 44, 58, 43, 17, 52, 59,  5, 58, 58, 62, 43, 28,
         8, 35, 43, 57, 29, 52, 31, 23, 58, 52, 49, 43, 23, 58, 28, 52, 43,
        27, 58, 43, 37, 17, 58, 28, 53,  2, 36, 36, 46, 42, 42, 12, 36, 24,
        17, 58, 28, 53, 49, 43, 37, 17, 58, 28, 53,  2, 36, 36,  6, 48, 52,
        37, 31, 43, 22, 48, 31, 48, 39, 58,  8, 12, 36, 26, 59, 29])>, 1115394)

In [25]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)
ids_dataset

<TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>

In [26]:
seq_length = 100
batched_dataset = ids_dataset.batch(seq_length+1, drop_remainder=True)
batched_dataset

<BatchDataset element_spec=TensorSpec(shape=(101,), dtype=tf.int64, name=None)>

In [27]:
def input_target_split(sequence):
  return sequence[:-1], sequence[1:]

In [28]:
dataset = batched_dataset.map(input_target_split)
dataset

<MapDataset element_spec=(TensorSpec(shape=(100,), dtype=tf.int64, name=None), TensorSpec(shape=(100,), dtype=tf.int64, name=None))>

In [29]:
for inputs, target in dataset.take(1):
  print(tf.strings.reduce_join(ids_to_chars(inputs)))
  print(tf.strings.reduce_join(ids_to_chars(target)))

tf.Tensor(b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou', shape=(), dtype=string)
tf.Tensor(b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou ', shape=(), dtype=string)


In [30]:
###############################forget###############################
prefetched_dataset = dataset.batch(64).prefetch(tf.data.AUTOTUNE) 
prefetched_dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(None, 100), dtype=tf.int64, name=None), TensorSpec(shape=(None, 100), dtype=tf.int64, name=None))>

#Model

In [31]:
vocab_size = len(chars_to_ids.get_vocabulary())
embedding_dims = 256
rnn_units = 1024

In [32]:
class Model_0(tf.keras.Model):

    def __init__(self, vocab_size, embedding_dims, rnn_units):
        super(Model_0, self).__init__()
        self.embed = layers.Embedding(input_dim=vocab_size,
                                      output_dim=embedding_dims,
                                      name='embed')
        self.GRU = layers.GRU(units=rnn_units, ###############################?????###############################
                                return_sequences=True,
                                return_state=True,
                                name='GRU')
        self.Dense = layers.Dense(units=vocab_size, name='Dense')

    def call(self, inputs, return_state=False, state=None):
        x = self.embed(inputs)  #(batch, 100, embed)
        if(state==None):
          state = self.GRU.get_initial_state(x)
        x, state = self.GRU(x, initial_state=state)  #(batch, seq, vocab)
        outputs = self.Dense(x)
        if(return_state==True):
          return outputs, state
        else:
          return outputs


In [33]:
model = Model_0(vocab_size=vocab_size,
                embedding_dims=embedding_dims,
                rnn_units=rnn_units)

In [34]:
for inputs, _ in prefetched_dataset.take(1):
  print(tf.strings.reduce_join(ids_to_chars(inputs[0])))
  ###############################forget###############################
  predicted_ids = tf.squeeze(tf.random.categorical(model(inputs)[0], 1))
  print(tf.strings.reduce_join(ids_to_chars(predicted_ids)))

tf.Tensor(b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou', shape=(), dtype=string)
tf.Tensor(b"$3rvch'UeFUS3-.OF!H$rsyQMvyTZwIZTo3!XBkupzFXqW3:q'E.l,!OitVkQ;rs'R'w,hCVtY-lgpRtlnFQhTIhbnQFVK!bCH.B", shape=(), dtype=string)


In [35]:
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam())

In [36]:
history = model.fit(prefetched_dataset,
                    epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [37]:
for inputs, _ in prefetched_dataset.take(1):
  print(tf.strings.reduce_join(ids_to_chars(inputs[0])))
  predicted_ids = tf.squeeze(tf.random.categorical(model(inputs)[0], 1))
  print(tf.strings.reduce_join(ids_to_chars(predicted_ids)))

tf.Tensor(b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou', shape=(), dtype=string)
tf.Tensor(b'Frst Gotizen:\nWecore;tenfeeveed isd ourther  yoad me speak.\n\nBNl:\ngaeak  gpeak \n\nBLrst Citizen:\nAou ', shape=(), dtype=string)


In [50]:
inputs='ROMEO:'
#()
input_ids = chars_to_ids(tf.strings.unicode_split(inputs, input_encoding='UTF-8')) #(seq)
output, state = model(tf.expand_dims(input_ids,axis=0), return_state=True) #(batch, seq, vocab)
output = tf.squeeze(output, axis=0) #(seq, vocab)
pred_ids = tf.squeeze(tf.random.categorical(output, 1), axis=-1) #(seq)
pred_chars = ids_to_chars(pred_ids)  #(seq)
last_char = pred_chars[-1]

<tf.Tensor: shape=(6,), dtype=string, numpy=array([b'I', b'M', b'E', b'O', b':', b'\n'], dtype=object)>

In [68]:
def predict_next_word(model=model, inputs='ROMEO:', state=None):
  input_ids = chars_to_ids(tf.strings.unicode_split(inputs, input_encoding='UTF-8')) #(seq)
  output, state = model(tf.expand_dims(input_ids,axis=0),
                        return_state=True,
                        state=state) #(batch, seq, vocab)  ###############################forget###############################
  output = tf.squeeze(output, axis=0) #(seq, vocab)
  pred_ids = tf.squeeze(tf.random.categorical(output, 1), axis=-1) #(seq)
  pred_chars = ids_to_chars(pred_ids)  #(seq)
  next_word = pred_chars[-1]
  return next_word, state

In [69]:
predict_next_word(inputs='R')

(<tf.Tensor: shape=(), dtype=string, numpy=b'E'>,
 <tf.Tensor: shape=(1, 1024), dtype=float32, numpy=
 array([[ 0.16130322,  0.32743314,  0.11106565, ..., -0.34415296,
          0.6390653 ,  0.0230303 ]], dtype=float32)>)

In [70]:
def make_prediction(model=model, initial_inputs='ROMEO:', state=None, steps=1000):
  output = [initial_inputs]
  next_word=initial_inputs
  for i in range(steps):
    next_word, state = predict_next_word(inputs=next_word,
                                         state=state)
    output.append(next_word)
  return tf.strings.reduce_join(output)

In [72]:
predictions = make_prediction()
print(predictions.numpy().decode('utf-8'))

ROMEO:
Here is every man of his good sweet Manacl; is, there diest
Into the mirthwation of a book;
And if I warrant thee, hortensio has needful,
the cedral at the ganey a fet your wedding-day.
Borrog and Serioved villain!

WARWICK:
When I show 'tis for traimon! nay, the tymant's chamber; oath
in field up seeds how her not to denied.

PETRUCHIO:
The bridegroom young Word, do you know her father trembling dark,
And woman'd loss she ampured saking.

ANTONIO:
A Gedler, dream'd!

a pieck of reason,
Five suste think it the moon shines for thee, for thou say'st o' the tyrant.

First Servant:
Patience!

AUTOLYCUS:
Well, for, ere we may paintly?

BRUTUS:
Go too! of such affair!

KATHARINA:
Tark here, my son cell the duke as Vardial?

KATHARINA:
What is't your dute?

CLAUDOO:
O, ale! am Conquest there?

Pedant:
She will get she went discontinent
But might suit: determine!

PETRUCHIO:
Come, come, tell me, Somerset: as foolish sou King Edward,
Is nothing on the plebate can have them very fair.

SA