In [12]:
import tensorflow as tf
import numpy as np
import time, os, logging
import sample_tf2, model_tf2, encoder

In [13]:
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

In [14]:
tf.test.gpu_device_name()

'/device:GPU:0'

In [15]:
CHECKPOINT_ROOT = './checkpoint'
SEQ_LEN = 1024

In [16]:
gpt2 = model_tf2.GPT2(model_tf2.HPARAMS['117M'])

In [17]:
X = tf.convert_to_tensor(np.array([[35, 789], [98, 69]]))
# mask = model_tf2.create_look_ahead_mask(2)

In [18]:
logits, presents, _ = gpt2(X, None)

In [19]:
gpt2.summary()

Model: "gpt2_tf2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      multiple                  39383808  
_________________________________________________________________
decoder (Decoder)            multiple                  85056000  
_________________________________________________________________
look_up_1 (LookUp)           multiple                  38597376  
Total params: 124,439,808
Trainable params: 124,439,808
Non-trainable params: 0
_________________________________________________________________


In [8]:
print("Total {} trainable variables.".format(
    len(gpt2.trainable_variables)))
for v in gpt2.trainable_variables:
    print(v.name, v.shape, v.dtype)

Total 148 trainable variables.
gpt2_tf2/wpe:0 (1024, 768) <dtype: 'float32'>
gpt2_tf2/wte:0 (50257, 768) <dtype: 'float32'>
gpt2_tf2/decoder/h0/attn/c_attn/kernel:0 (768, 2304) <dtype: 'float32'>
gpt2_tf2/decoder/h0/attn/c_attn/bias:0 (2304,) <dtype: 'float32'>
gpt2_tf2/decoder/h0/attn/c_proj/kernel:0 (768, 768) <dtype: 'float32'>
gpt2_tf2/decoder/h0/attn/c_proj/bias:0 (768,) <dtype: 'float32'>
gpt2_tf2/decoder/h0/mlp/c_fc/kernel:0 (768, 3072) <dtype: 'float32'>
gpt2_tf2/decoder/h0/mlp/c_fc/bias:0 (3072,) <dtype: 'float32'>
gpt2_tf2/decoder/h0/mlp/c_proj/kernel:0 (3072, 768) <dtype: 'float32'>
gpt2_tf2/decoder/h0/mlp/c_proj/bias:0 (768,) <dtype: 'float32'>
gpt2_tf2/decoder/h0/ln_1/gamma:0 (768,) <dtype: 'float32'>
gpt2_tf2/decoder/h0/ln_1/beta:0 (768,) <dtype: 'float32'>
gpt2_tf2/decoder/h0/ln_2/gamma:0 (768,) <dtype: 'float32'>
gpt2_tf2/decoder/h0/ln_2/beta:0 (768,) <dtype: 'float32'>
gpt2_tf2/decoder/h1/attn/c_attn/kernel:0 (768, 2304) <dtype: 'float32'>
gpt2_tf2/decoder/h1/attn/c_at

## Load from TF1 Checkpoint

In [9]:
ckpt_directory = "../models/117M"

In [10]:
ckpt_vars = tf.train.list_variables(ckpt_directory)
names = []
tensors = []

for name, shape in ckpt_vars:
    print("Loading TF weight {} with shape {}".format(name, shape))
    tensor = tf.train.load_variable(ckpt_directory, name)
    names.append(str(name[6:].split("/")))
    tensors.append(tensor)
assert len(names) == len(tensors)
print("{} vars loaded from ckpt {}".format(len(ckpt_vars), ckpt_directory))

Loading TF weight model/h0/attn/c_attn/b with shape [2304]
Loading TF weight model/h0/attn/c_attn/w with shape [1, 768, 2304]
Loading TF weight model/h0/attn/c_proj/b with shape [768]
Loading TF weight model/h0/attn/c_proj/w with shape [1, 768, 768]
Loading TF weight model/h0/ln_1/b with shape [768]
Loading TF weight model/h0/ln_1/g with shape [768]
Loading TF weight model/h0/ln_2/b with shape [768]
Loading TF weight model/h0/ln_2/g with shape [768]
Loading TF weight model/h0/mlp/c_fc/b with shape [3072]
Loading TF weight model/h0/mlp/c_fc/w with shape [1, 768, 3072]
Loading TF weight model/h0/mlp/c_proj/b with shape [768]
Loading TF weight model/h0/mlp/c_proj/w with shape [1, 3072, 768]
Loading TF weight model/h1/attn/c_attn/b with shape [2304]
Loading TF weight model/h1/attn/c_attn/w with shape [1, 768, 2304]
Loading TF weight model/h1/attn/c_proj/b with shape [768]
Loading TF weight model/h1/attn/c_proj/w with shape [1, 768, 768]
Loading TF weight model/h1/ln_1/b with shape [768]
Lo

In [11]:
ckpt_vmap = dict(zip(names, tensors))

In [12]:
vname_mapping = {"kernel": "w", "bias": "b", "gamma": "g", "beta": "b"}

In [13]:
for v in gpt2.trainable_variables:
    tf2_vname = v.name[9:-2].split("/")
    if tf2_vname[0] == "decoder":
        tf2_vname = tf2_vname[1:]
    if tf2_vname[-1] in vname_mapping:
        tf2_vname[-1] = vname_mapping[tf2_vname[-1]]
    tf2_vvalue = np.squeeze(ckpt_vmap[str(tf2_vname)])
    assert v.shape == tf2_vvalue.shape, \
        "{} has different shape: gpt2_tf2 {} vs gpt2_tf1 {}" \
        .format(v.name, str(v.shape), str(tf2_vvalue.shape))
    v.assign(tf2_vvalue)

## Test loaded gpt2_tf2 model

In [14]:
enc = encoder.get_encoder("117M", "../models")

In [15]:
context = enc.encode("Can we be friends?")
context = tf.convert_to_tensor(context,dtype=tf.int32)
context = tf.expand_dims(context, axis=0)

In [16]:
output = sample_tf2.sample_sequence(
    gpt2_model=gpt2,
    length=200,
    context=context,
    top_p=0,
    top_k=40,
    batch_size=1
)

In [17]:
enc.decode(output[0].numpy())

"Can we be friends?\n\n\n[22]\n\nYou're a guy, you're a guy.\n\nBut the world needs your man.\n\nYou want me to get married?\n\n[3]\n\nYou are like a lot more than a lot of guys.\n\n[31]\n\nHe's not as smart as a pretty\n\n[3]\n\nBut like a lot more than a lot of guys.\n\n[31]\n\nThat's for sure. I'm like a lot more than a lot of guys.\n\n[31]\n\nI got a bit of a lass.\n\n[31]\n\nIt's very hard for a lot of guys to do.\n\n[31]\n\nI feel a little bit like a lot of guys.\n\n[33]\n\nHow you guys like a lot more than a lot of guys is\n\npretty cool I feel a little bit like…\n\nGah"

## Save model

In [19]:
gpt2.save_weights("../models/117M_tf2/pretrained_weights.h5")

---
# Load the saved model

In [1]:
import tensorflow as tf
import numpy as np
import time, os, logging
import sample_tf2, model_tf2, encoder

In [2]:
gpt2_loaded = model_tf2.GPT2(model_tf2.HPARAMS['117M'])

In [3]:
X = tf.convert_to_tensor(np.array([[35, 789], [98, 69]]))

In [4]:
gpt2_loaded(X, None)

(<tf.Tensor: shape=(2, 2, 50257), dtype=float32, numpy=
 array([[[-0.8403097 ,  0.63453525,  0.07967057, ..., -0.10889839,
           0.85170555,  0.8436543 ],
         [-0.3696621 ,  0.6419816 , -0.18931939, ...,  0.36509135,
           1.0463271 , -0.19214429]],
 
        [[-0.39902788,  0.5571195 ,  0.05451128, ..., -0.8557698 ,
          -0.0439152 ,  0.18174838],
         [-0.20500249,  0.73908824,  0.29277343, ..., -0.00735546,
           0.5092921 , -0.09561932]]], dtype=float32)>,
 <tf.Tensor: shape=(2, 12, 2, 12, 2, 64), dtype=float32, numpy=
 array([[[[[[-7.12434798e-02, -1.84988201e-01,  3.80043268e-01, ...,
              3.41989934e-01,  7.28234172e-01,  1.31256014e-01],
            [-1.03298712e+00,  6.30757689e-01, -7.59895802e-01, ...,
              4.98690188e-01, -2.98087478e-01,  2.68680334e-01]],
 
           [[-9.56493616e-03,  2.11605728e-01, -1.09561317e-01, ...,
             -5.15036762e-01,  2.61269659e-01, -4.64162678e-02],
            [-7.83998489e-01, -3.1193

In [5]:
gpt2_loaded.load_weights("../models/117M_tf2/pretrained_weights.h5")

In [7]:
gpt2_loaded.summary()

Model: "gpt2_tf2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        multiple                  39383808  
_________________________________________________________________
decoder (Decoder)            multiple                  85056000  
_________________________________________________________________
look_up (LookUp)             multiple                  38597376  
Total params: 124,439,808
Trainable params: 124,439,808
Non-trainable params: 0
_________________________________________________________________


## Test loaded gpt2_tf2 model

In [8]:
enc = encoder.get_encoder("117M", "../models")

In [9]:
context = enc.encode("Can we be friends?")
context = tf.convert_to_tensor(context,dtype=tf.int32)
context = tf.expand_dims(context, axis=0)

In [10]:
output = sample_tf2.sample_sequence(
    gpt2_model=gpt2_loaded,
    length=200,
    context=context,
    top_p=0,
    top_k=40,
    batch_size=1
)

In [11]:
enc.decode(output[0].numpy())

'Can we be friends?\n\n\nThe moment\n\nThe moment was also known as the "in the moment". We are all connected by the moment. The moment is the moment. The moment is everything that is happening within every moment. The moment is the moment for you.\n\nThe moment was that happened before. The moment is the moment when you think, the moment is a moment you live by feeling. The moment is the moment of the moment without being, being the moment that you feel the moment before. The moment is the moment that you feel, feeling.\n\nThe moment you feel the moment is the moment when you have nothing. The moment is the moment when you believe something but you realize it doesn\'t exist, are completely lost. The moment is the moment that you let go of. The moment is the moment that you realize you don\'t exist that nothing exists. The moment is the moment when you come up with a plan, your thought and actions make you realize that reality exists.'