**Praktikum 2**

**Generator Teks dengan RNN**

In [1]:
# Import library
import tensorflow as tf
import numpy as np
import os
import time

In [2]:
# Download Dataset Shakespeare
path_to_file = tf.keras.utils.get_file('shakespeare.txt','https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [3]:
# Membaca teks dari file menggunakan mode 'rb' (binary mode) dan mendekode dengan encoding 'utf-8'
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')

# Panjang teks adalah jumlah karakter dalam teks tersebut
print(f'Length of text: {len(text)} characters')

Length of text: 1115394 characters


In [4]:
# Mencetak 250 karakter pertama dalam teks
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [5]:
# Mengidentifikasi karakter-karakter unik dalam teks
vocab = sorted(set(text))

# Mencetak jumlah karakter unik
print(f'{len(vocab)} unique characters')

65 unique characters


In [6]:
# Olah Teks: Vectorize Teks

# Daftar teks contoh
example_texts = ['abcdefg', 'xyz']

# Memecah teks menjadi karakter-karakter Unicode
chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')

# Menampilkan hasil karakter-karakter Unicode
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [7]:
# Membuat lapisan StringLookup untuk mengonversi karakter menjadi ID numerik
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab),  # Daftar karakter-karakter yang ingin diindeks
    mask_token=None  # Token masking (jika ada), dalam hal ini, tidak ada masking
)

In [8]:
# Mengonversi karakter-karakter Unicode menjadi ID numerik
ids = ids_from_chars(chars)

# Menampilkan hasil ID numerik
ids

<tf.RaggedTensor [[40, 41, 42, 43, 44, 45, 46], [63, 64, 65]]>

In [9]:
# Membuat lapisan StringLookup untuk mengonversi ID numerik ke karakter-karakter Unicode
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(),  # Menggunakan vocabulary yang telah diindeks sebelumnya
    invert=True,  # Mengatur invert ke True untuk mengonversi kembali dari ID ke karakter
    mask_token=None  # Token masking (jika ada), dalam hal ini, tidak ada masking
)

In [10]:
tf.strings.reduce_join(chars, axis=-1).numpy()

array([b'abcdefg', b'xyz'], dtype=object)

In [11]:
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [12]:
#Prediksi

#Membuat Trianing Set dan Target
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([19, 48, 57, ..., 46,  9,  1])>

In [13]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [14]:
for ids in ids_dataset.take(10):
  print(chars_from_ids(ids).numpy().decode('utf-8'))

F
i
r
s
t
 
C
i
t
i


In [15]:
seq_length = 100

In [16]:
# Metode batch  mengonversi karakter individual menjadi urutan ukuran yang diinginkan.
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))

tf.Tensor(
[b'F' b'i' b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':'
 b'\n' b'B' b'e' b'f' b'o' b'r' b'e' b' ' b'w' b'e' b' ' b'p' b'r' b'o'
 b'c' b'e' b'e' b'd' b' ' b'a' b'n' b'y' b' ' b'f' b'u' b'r' b't' b'h'
 b'e' b'r' b',' b' ' b'h' b'e' b'a' b'r' b' ' b'm' b'e' b' ' b's' b'p'
 b'e' b'a' b'k' b'.' b'\n' b'\n' b'A' b'l' b'l' b':' b'\n' b'S' b'p' b'e'
 b'a' b'k' b',' b' ' b's' b'p' b'e' b'a' b'k' b'.' b'\n' b'\n' b'F' b'i'
 b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':' b'\n' b'Y'
 b'o' b'u' b' '], shape=(101,), dtype=string)


In [17]:
# Akan lebih mudah jika menggabungkan token kembali menjadi string
for seq in sequences.take(5):
    print(text_from_ids(seq).numpy())

b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
b'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
b"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
b"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
b'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [18]:
# Untuk pelatihan, memerlukan kumpulan data pasangan (input, label)

# Fungsi yang mengambil urutan sebagai masukan
def split_input_target(sequence):
  input_text = sequence[:-1]
  target_text = sequence[1:]
  return input_text, target_text

In [19]:
split_input_target(list("Tensorflow"))

(['T', 'e', 'n', 's', 'o', 'r', 'f', 'l', 'o'],
 ['e', 'n', 's', 'o', 'r', 'f', 'l', 'o', 'w'])

In [20]:
dataset = sequences.map(split_input_target)

In [21]:
for input_example, target_example in dataset.take(1):
  print("Input :", text_from_ids(input_example).numpy())
  print("Target:", text_from_ids(target_example).numpy())

Input : b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target: b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [22]:
# Membuat batch training

# Batch size (ukuran batch) yang digunakan selama pelatihan
BATCH_SIZE = 64

# Buffer size (ukuran buffer) untuk mengacak urutan dataset
# TensorFlow data dirancang untuk bekerja dengan urutan yang mungkin tak terbatas,
# sehingga tidak mencoba untuk mengacak seluruh urutan di dalam memori.
# Sebaliknya, ia mempertahankan buffer di mana ia mengacak elemen.
BUFFER_SIZE = 10000

# Mengonfigurasi dataset dengan mengacak urutan, mengatur ukuran batch,
# dan menggunakan prefetch untuk optimalisasi
dataset = (
    dataset
    .shuffle(BUFFER_SIZE)  # Mengacak urutan dataset
    .batch(BATCH_SIZE, drop_remainder=True)  # Mengatur ukuran batch dengan menghapus sisa data yang tidak cukup untuk satu batch
    .prefetch(tf.data.experimental.AUTOTUNE)  # Menggunakan prefetch untuk optimalisasi
)

# Menampilkan dataset yang telah dikonfigurasi
dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

In [23]:
# Buat model

# Jumlah kata dalam vocabulary pada lapisan StringLookup
vocab_size = len(ids_from_chars.get_vocabulary())

# Dimensi embedding
embedding_dim = 256

# Jumlah unit RNN (Recurrent Neural Network)
rnn_units = 1024

In [24]:
# Mendefinisikan kelas model khusus MyModel
class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)

    # Lapisan embedding untuk mengonversi ID numerik menjadi vektor embedding
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)

    # Lapisan GRU (Gated Recurrent Unit) dengan return_sequences dan return_state
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)

    # Lapisan dense (sepenuhnya terhubung) dengan vocab_size output
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs

    # Menggunakan lapisan embedding
    x = self.embedding(x, training=training)

    if states is None:
      # Mendapatkan initial_state dari lapisan GRU jika states adalah None
      states = self.gru.get_initial_state(x)

    # Melakukan langkah propagasi pada lapisan GRU
    x, states = self.gru(x, initial_state=states, training=training)

    # Melakukan langkah propagasi pada lapisan dense
    x = self.dense(x, training=training)

    if return_state:
      # Mengembalikan output dan states jika return_state adalah True
      return x, states
    else:
      # Mengembalikan hanya output jika return_state adalah False
      return x

In [25]:
model = MyModel(
    vocab_size=vocab_size,  # Jumlah kata dalam vocabulary
    embedding_dim=embedding_dim,  # Dimensi embedding
    rnn_units=rnn_units  # Jumlah unit dalam lapisan GRU
)

In [26]:
# Uji model
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 66) # (batch_size, sequence_length, vocab_size)


In [27]:
model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  16896     
                                                                 
 gru (GRU)                   multiple                  3938304   
                                                                 
 dense (Dense)               multiple                  67650     
                                                                 
Total params: 4022850 (15.35 MB)
Trainable params: 4022850 (15.35 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [28]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices= tf.squeeze(sampled_indices, axis=-1).numpy()

In [29]:
sampled_indices

array([31, 40, 24, 14, 63, 21, 27, 59, 16,  7, 52,  2, 64,  8,  3, 53, 14,
       47, 22, 49, 27,  0, 19, 48, 38, 36, 29, 49, 38, 39, 12,  1, 43, 58,
       33, 17, 25, 33, 43, 12, 17, 56, 45, 58, 53, 60, 18, 60, 20, 58, 45,
       23, 51, 57, 45,  4, 56, 19, 19, 37, 60, 62, 36, 58, 24, 22, 22, 29,
       53, 20, 25, 39, 39, 58, 31, 58, 59, 46, 38, 28, 36,  2, 10, 16, 58,
       17,  4, 11, 26, 56,  6,  1,  9, 64, 47, 63, 29, 57, 30, 29])

In [30]:
# Melihat teks yang diprediksi oleh model tidak terlatih
print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())

Input:
 b"lding-anchor lost,\nAnd half our sailors swallow'd in the flood?\nYet lives our pilot still. Is't meet"

Next Char Predictions:
 b"RaKAxHNtC,m y-!nAhIjN[UNK]FiYWPjYZ;\ndsTDLTd;DqfsnuEuGsfJlrf$qFFXuwWsKIIPnGLZZsRstgYOW 3CsD$:Mq'\n.yhxPrQP"


In [31]:
# Train Model

# Tambahan optimizer dan fungsi loss
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [32]:
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)

Prediction shape:  (64, 100, 66)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.1888885, shape=(), dtype=float32)


In [33]:
# Memeriksa bahwa eksponensial dari loss rata-rata harus sama dengan ukuran kosakata
tf.exp(example_batch_mean_loss).numpy()

65.949455

In [34]:
# Konfigurasikan prosedur pelatihan
model.compile(optimizer='adam', loss=loss)

In [35]:
# Konfigurasi Checkpoints

# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [36]:
# Lakukan Proses Training
EPOCHS = 10

In [37]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [38]:
# Generate Teks
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [39]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [40]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

ROMEO:
Not a little father, awlitted him; and come into
me that is not his adointance, woman,
I had it meet the time to all hibsely
For contration. Thine eyem,
Brothering the whole nostims?

JULIET:
He will should sow on.

ROMEO:
Nay, ruther comes hence, that skids you so she kingly you of
this mustering. 
First Murderer:
Condemn'd you, not my gan't world; despair
I have in happy thrives of malice scorns,
My father do that do I but bewn shadows,
I'll fall for over was heart the other
And been the beed have more from heathemiest:
Thou husbann'd impatience, shall not
have a shore of them veins, diemer that
I cannot dust we becue to save
The Lords, of gold Becknow hope: my takents of love
Meanning my hator am he looks upon the whole.
Here in smilones, even to draw mock thee face?

HENRY BOLINGBROKE:
What, ho! I? that who,
Stanley, I peas more particle! and moder more than woo
My good lord, althat and thy shape was hungers to't.

PETRUCHIO:
Retumes a giand we last none bada: speak of her a

In [41]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result, '\n\n' + '_'*80)
print('\nRun time:', end - start)

tf.Tensor(
[b"ROMEO:\nwhen he forgetsions you till?\n\nClown:\nWe will answear the dete! more incline, kiss the other\nRedain the thing and ride to move the times soft\nWhich my fatter her brother Clifford, and, blunt show\nWe shall have matter with the fride.'\n\nAUTOLYCUS:\nI see there shall forget the mock he hast too,\nWhom's point, inclanifated town, never sit\nwith me begin the other like here almost Rovers:\nMerely not to sweeter to in, and letters earm, I am fools,\nAgainst our particaster! his!\n\nhere England's day no laster be entreated shadour tongues\nAnd young Broken provokes above Green borne\nI am do believe but to call me thinks,\nSome lodgen names are committed to any horse:\nUpon a hair! Do in the king: for whom you, know\nMy shopon'd a loyal opacinuing:\nThe wonders whose intent of honour, by ank you\nSo less expeed my impose on's battle all the weeds on.\nProvide me too late? forget my love!\n\nCLARENCE:\nI'll see the light of flatesting earth to have out report\nC

In [42]:
# Ekspor Model Generator
tf.saved_model.save(one_step_model, 'one_step')
one_step_reloaded = tf.saved_model.load('one_step')



In [43]:
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(100):
  next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
  result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

ROMEO:
Draw, nor wars? what, little rest, ensies down from the verge
Our hatred hath needs, or how he ofte


**TUGAS**

In [44]:
class CustomTraining(MyModel):
    @tf.function
    def train_step(self, inputs):
        # Unpack inputs
        inputs, labels = inputs

        # Use GradientTape to compute gradients
        with tf.GradientTape() as tape:
            predictions = self(inputs, training=True)
            loss = self.loss(labels, predictions)

        # Compute gradients and apply them using optimizer
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

        # Return a dictionary with the loss for monitoring
        return {'loss': loss}

Kode di atas menerapkan metode `train_step` sesuai dengan konvensi `train_step` dalam Keras. Meskipun bersifat opsional, tetapi pendekatan ini dapat untuk menyesuaikan perilaku langkah pelatihan sambil tetap menggunakan metode `compile` dan `fit` dari model Keras.

In [45]:
# Model custom training
model = CustomTraining(
     vocab_size=len(ids_from_chars.get_vocabulary()),
     embedding_dim=embedding_dim,
     rnn_units=rnn_units)

# Compile the model
optimizer = tf.keras.optimizers.Adam()
loss_function = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer=optimizer, loss=loss_function)

# Training dataset
epochs = 1
model.fit(dataset, epochs=epochs)



<keras.src.callbacks.History at 0x7f986dd097e0>

Atau jika ingin lebih mengetahui dalamnya, kita bisa membuat custom training loop sendiri

In [46]:
EPOCHS = 10
mean = tf.metrics.Mean()

# Training loop over epochs
for epoch in range(EPOCHS):
    start = time.time()

    # Reset the mean metric for each epoch
    mean.reset_states()

    # Iterate over batches in the dataset
    for batch_n, (inp, target) in enumerate(dataset):
        # Perform a training step and get the loss
        logs = model.train_step([inp, target])

        # Update the mean metric with the current batch loss
        mean.update_state(logs['loss'])

        # Print batch loss every 50 batches
        if batch_n % 50 == 0:
            template = f"Epoch {epoch+1} Batch {batch_n} Loss {logs['loss']:.4f}"
            print(template)

        # Save (checkpoint) the model every 5 epochs
        if (epoch + 1) % 5 == 0:
            model.save_weights(checkpoint_prefix.format(epoch=epoch))

    # Print epoch loss and time taken
    print()
    print(f'Epoch {epoch+1} Loss: {mean.result().numpy():.4f}')
    print(f'Time taken for 1 epoch {time.time() - start:.2f} sec')
    print("_" * 80)

# Save the final model weights after all epochs
model.save_weights(checkpoint_prefix.format(epoch=epoch))

Epoch 1 Batch 0 Loss 2.1636
Epoch 1 Batch 50 Loss 2.0160
Epoch 1 Batch 100 Loss 1.9310
Epoch 1 Batch 150 Loss 1.8849

Epoch 1 Loss: 1.9884
Time taken for 1 epoch 11.82 sec
________________________________________________________________________________
Epoch 2 Batch 0 Loss 1.7970
Epoch 2 Batch 50 Loss 1.7763
Epoch 2 Batch 100 Loss 1.6790
Epoch 2 Batch 150 Loss 1.6709

Epoch 2 Loss: 1.7106
Time taken for 1 epoch 10.35 sec
________________________________________________________________________________
Epoch 3 Batch 0 Loss 1.6104
Epoch 3 Batch 50 Loss 1.5943
Epoch 3 Batch 100 Loss 1.5675
Epoch 3 Batch 150 Loss 1.5169

Epoch 3 Loss: 1.5498
Time taken for 1 epoch 11.27 sec
________________________________________________________________________________
Epoch 4 Batch 0 Loss 1.4895
Epoch 4 Batch 50 Loss 1.4622
Epoch 4 Batch 100 Loss 1.4680
Epoch 4 Batch 150 Loss 1.4238

Epoch 4 Loss: 1.4509
Time taken for 1 epoch 11.30 sec
_____________________________________________________________________

**Perbedaan dengan praktikum 2:**

Pada Praktikum 2, digunakan pendekatan pelatihan yang lebih umum dan sederhana dengan metode model.fit yang sudah terintegrasi dengan TensorFlow. Metode ini mengelola sebagian besar aspek pelatihan, seperti perhitungan loss, perhitungan gradien, dan pembaruan bobot model secara otomatis.

Sementara itu, pada kode tugas praktikum, diterapkan pendekatan pelatihan yang lebih spesifik dan kompleks. Dalam pendekatan ini, terdapat definisi metode train_step dalam model turunan yang mengatur dengan jelas perhitungan loss, perhitungan gradien, dan pembaruan bobot model. Selain itu, objek tf.metrics.Mean digunakan untuk menghitung rata-rata loss selama pelatihan. Pendekatan ini memberikan lebih banyak kemampuan untuk mengendalikan dan menyesuaikan pelatihan model, yang sangat berguna untuk tugas-tugas yang membutuhkan penyesuaian yang spesifik. Dengan demikian, perbedaan utama terletak pada tingkat kontrol yang lebih tinggi dan lebih banyak opsi kustomisasi dalam pendekatan pelatihan pada tugas praktikum.