In [1]:
# %load_ext autoreload
# %autoreload 2

In [2]:
!{sys.executable} -m pip install --upgrade tensorflow-gpu tensorflow_datasets tensorflow_text



In [3]:
import sys

# Use this if running this notebook from within its place in the truera repository.
sys.path.insert(0, "..")

from IPython.display import display
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
import tensorflow_text as text 

In [4]:
print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("Hub version: ", hub.__version__)
print("GPU is", "available" if tf.config.list_physical_devices("GPU") else "NOT AVAILABLE")

Version:  2.8.0
Eager mode:  True
Hub version:  0.12.0
GPU is available


In [5]:
preprocessor = hub.KerasLayer(
    "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3"
)
encoder = hub.KerasLayer(
    "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4",
    trainable=True
)

In [6]:
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string)
encoder_inputs = preprocessor(text_input, training=True)
print(encoder_inputs)
outputs = encoder(encoder_inputs)
pooled_output = outputs["pooled_output"]      # [batch_size, 768].
sequence_output = outputs["sequence_output"]  # [batch_size, seq_length, 768].

{'input_type_ids': <KerasTensor: shape=(None, 128) dtype=int32 (created by layer 'keras_layer')>, 'input_mask': <KerasTensor: shape=(None, 128) dtype=int32 (created by layer 'keras_layer')>, 'input_word_ids': <KerasTensor: shape=(None, 128) dtype=int32 (created by layer 'keras_layer')>}


In [7]:
[tw.name for tw in encoder.trainable_weights]

['word_embeddings/embeddings:0',
 'position_embedding/embeddings:0',
 'type_embeddings/embeddings:0',
 'embeddings/layer_norm/gamma:0',
 'embeddings/layer_norm/beta:0',
 'transformer/layer_0/self_attention/query/kernel:0',
 'transformer/layer_0/self_attention/query/bias:0',
 'transformer/layer_0/self_attention/key/kernel:0',
 'transformer/layer_0/self_attention/key/bias:0',
 'transformer/layer_0/self_attention/value/kernel:0',
 'transformer/layer_0/self_attention/value/bias:0',
 'transformer/layer_0/self_attention/attention_output/kernel:0',
 'transformer/layer_0/self_attention/attention_output/bias:0',
 'transformer/layer_0/self_attention_layer_norm/gamma:0',
 'transformer/layer_0/self_attention_layer_norm/beta:0',
 'transformer/layer_0/intermediate/kernel:0',
 'transformer/layer_0/intermediate/bias:0',
 'transformer/layer_0/output/kernel:0',
 'transformer/layer_0/output/bias:0',
 'transformer/layer_0/output_layer_norm/gamma:0',
 'transformer/layer_0/output_layer_norm/beta:0',
 'trans

In [8]:
preprocessor(tf.constant(["word word word"]))

{'input_type_ids': <tf.Tensor: shape=(1, 128), dtype=int32, numpy=
 array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
       dtype=int32)>,
 'input_mask': <tf.Tensor: shape=(1, 128), dtype=int32, numpy=
 array([[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 

In [9]:
embedding_weights_list = [tw for tw in encoder.trainable_weights if "word_embeddings" in tw.name]
assert len(embedding_weights_list) == 1

In [10]:
embedding_weights = embedding_weights_list[0]

In [18]:
with tf.GradientTape(persistent=True) as gt:
    gt.watch(embedding_weights)

    inputs = preprocessor(tf.constant(["word one word two word"]))
    outputs = encoder(inputs)
    output = tf.reduce_sum(outputs['sequence_output'])


In [19]:
grad = gt.gradient(outputs, embedding_weights)
grad

<tensorflow.python.framework.indexed_slices.IndexedSlices at 0x7f63e3bee750>

In [20]:
grad.indices

<tf.Tensor: shape=(128,), dtype=int32, numpy=
array([ 101, 2773, 2028, 2773, 2048, 2773,  102,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0], dtype=int32)>

In [21]:
grad.values.shape

TensorShape([128, 768])

In [22]:
grad.values[1]

<tf.Tensor: shape=(768,), dtype=float32, numpy=
array([-1.25007851e+02, -1.16616158e+02,  4.70853271e+02,  7.50748749e+01,
       -1.41797577e+02,  3.54099182e+02,  6.12814102e+01, -2.27727142e+02,
        2.12141602e+02,  1.58753143e+02, -2.50591125e+02,  8.39112473e+01,
        2.54393024e+01,  2.45204391e+02, -1.79735508e+01, -7.83793182e+01,
        1.69526321e+02, -3.80983124e+02, -2.30037811e+02,  1.25235664e+02,
        1.10139015e+02, -2.44703064e+02,  1.14823318e+02,  2.01582642e+02,
       -3.26937790e+01, -1.17500191e+02, -1.96446091e+02,  2.36987686e+02,
        4.09021393e+02, -3.59425259e+00, -2.85753784e+02, -1.92004532e+02,
        2.05063843e+02, -8.95009003e+01, -1.44399338e+02,  4.08184692e+02,
        6.16020508e+01, -2.84077637e+02, -3.11698742e+01, -1.50711761e+02,
       -6.22343407e+01,  3.16134003e+02, -9.59540100e+01,  2.02207993e+02,
       -1.96841640e+01,  1.62697281e+02,  2.61211945e+02,  4.65689812e+01,
       -3.49468414e+02, -1.50411469e+02, -1.37040222

In [23]:
grad.values[1] - grad.values[3]

<tf.Tensor: shape=(768,), dtype=float32, numpy=
array([-1.63791214e+02, -2.96862106e+01,  2.37953033e+02, -2.36763306e+01,
        9.02274933e+01, -6.28977661e+01,  9.78566818e+01,  4.04305115e+01,
       -8.64241333e+01,  8.92923279e+01, -1.31505280e+01,  3.17405396e+01,
       -1.85641968e+02, -1.12369843e+01,  1.79926739e+01,  2.91771393e+01,
        5.22227402e+01, -4.59380798e+01, -9.42918701e+01, -6.48062057e+01,
       -6.60432358e+01, -7.02353821e+01, -3.01473999e+01,  7.32581940e+01,
       -1.62444534e+01, -1.42260437e+01, -1.79798721e+02,  3.48776245e+01,
       -2.03058472e+01,  1.13589907e+01,  1.19159149e+02, -1.25444031e+02,
        9.39758759e+01,  6.46292725e+01, -2.16967743e+02, -4.48544617e+01,
       -9.76019287e+01,  7.02752991e+01, -1.44512299e+02, -1.16094818e+01,
        1.30071602e+01, -3.82174377e+01,  7.39243164e+01, -1.10922699e+01,
       -3.86641617e+01, -1.36322632e+01,  7.25027771e+01,  1.80844822e+01,
        1.31689758e+02, -2.20262756e+01, -1.77777390

In [32]:
# graph retrieval using @tf.function

@tf.function
def evaluate_pipeline(texts):
    inputs = preprocessor(texts)
    outputs = encoder(inputs)
    return tf.reduce_sum(outputs['pooled_output'])

In [33]:
evaluate_pipeline(tf.constant(["this is a sentence"]))

<tf.Tensor: shape=(), dtype=float32, numpy=6.03055>

In [34]:
evaluate_pipeline

<tensorflow.python.eager.def_function.Function at 0x7ff21db35e10>

In [35]:
f = evaluate_pipeline.get_concrete_function(tf.constant(['this is a sentence']))
f

<ConcreteFunction evaluate_pipeline(texts) at 0x7FF21DB351D0>

In [36]:
f.graph

<tensorflow.python.framework.func_graph.FuncGraph at 0x7ff2cc116be0>

In [39]:
for op in f.graph.get_operations():
    print(op.name)
    for ins in op.inputs:
        print("-->", ins)
    for outs in op.outputs:
        print("<--", outs)

texts
<-- Tensor("texts:0", shape=(1,), dtype=string)
keras_layer/99662
<-- Tensor("keras_layer/99662:0", shape=(), dtype=resource)
keras_layer/99664
<-- Tensor("keras_layer/99664:0", shape=(), dtype=int64)
keras_layer/StatefulPartitionedCall
--> Tensor("texts:0", shape=(1,), dtype=string)
--> Tensor("keras_layer/99662:0", shape=(), dtype=resource)
--> Tensor("keras_layer/99664:0", shape=(), dtype=int64)
<-- Tensor("keras_layer/StatefulPartitionedCall:0", shape=(1, 128), dtype=int32)
<-- Tensor("keras_layer/StatefulPartitionedCall:1", shape=(1, 128), dtype=int32)
<-- Tensor("keras_layer/StatefulPartitionedCall:2", shape=(1, 128), dtype=int32)
keras_layer_1/99669
<-- Tensor("keras_layer_1/99669:0", shape=(), dtype=resource)
keras_layer_1/99671
<-- Tensor("keras_layer_1/99671:0", shape=(), dtype=resource)
keras_layer_1/99673
<-- Tensor("keras_layer_1/99673:0", shape=(), dtype=resource)
keras_layer_1/99675
<-- Tensor("keras_layer_1/99675:0", shape=(), dtype=resource)
keras_layer_1/99677
<

In [40]:
op = f.graph.get_operation_by_name("keras_layer_1/StatefulPartitionedCall")
# These are probably pooled output and the per token outputs.clear
some_embeddings = op.outputs

In [41]:
with tf.GradientTape(persistent=True) as gt:
    gt.watch(some_embeddings)
    out = f(tf.constant(["this is a sentence"]))

In [42]:
print(gt.gradient(out, some_embeddings))













[None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, Non

In [43]:
imported = tf.saved_model.load("/home/piotrm/tfhub_modules/d760773f85f64fc84ae0b47310f7cfe3bcec4868")
f = imported.signatures['serving_default']
print(f)

ConcreteFunction signature_wrapper(*, input_word_ids, input_mask, input_type_ids)
  Args:
    input_mask: int32 Tensor, shape=(None, None)
    input_type_ids: int32 Tensor, shape=(None, None)
    input_word_ids: int32 Tensor, shape=(None, None)
  Returns:
    {'bert_encoder': <1>, 'bert_encoder_1': <2>, 'bert_encoder_10': <3>, 'bert_encoder_11': <4>, 'bert_encoder_12': <5>, 'bert_encoder_13': <6>, 'bert_encoder_14': <7>, 'bert_encoder_2': <8>, 'bert_encoder_3': <9>, 'bert_encoder_4': <10>, 'bert_encoder_5': <11>, 'bert_encoder_6': <12>, 'bert_encoder_7': <13>, 'bert_encoder_8': <14>, 'bert_encoder_9': <15>}
      <1>: float32 Tensor, shape=(None, 768)
      <2>: float32 Tensor, shape=(None, None, 768)
      <3>: float32 Tensor, shape=(None, None, 768)
      <4>: float32 Tensor, shape=(None, None, 768)
      <5>: float32 Tensor, shape=(None, None, 768)
      <6>: float32 Tensor, shape=(None, 768)
      <7>: float32 Tensor, shape=(None, None, 768)
      <8>: float32 Tensor, shape=(None, 

In [44]:
for op in f.graph.get_operations():
    print(op.name, op.outputs)

input_mask [<tf.Tensor 'input_mask:0' shape=(None, None) dtype=int32>]
input_type_ids [<tf.Tensor 'input_type_ids:0' shape=(None, None) dtype=int32>]
input_word_ids [<tf.Tensor 'input_word_ids:0' shape=(None, None) dtype=int32>]
unknown [<tf.Tensor 'unknown:0' shape=() dtype=resource>]
unknown_0 [<tf.Tensor 'unknown_0:0' shape=() dtype=resource>]
unknown_1 [<tf.Tensor 'unknown_1:0' shape=() dtype=resource>]
unknown_2 [<tf.Tensor 'unknown_2:0' shape=() dtype=resource>]
unknown_3 [<tf.Tensor 'unknown_3:0' shape=() dtype=resource>]
unknown_4 [<tf.Tensor 'unknown_4:0' shape=() dtype=resource>]
unknown_5 [<tf.Tensor 'unknown_5:0' shape=() dtype=resource>]
unknown_6 [<tf.Tensor 'unknown_6:0' shape=() dtype=resource>]
unknown_7 [<tf.Tensor 'unknown_7:0' shape=() dtype=resource>]
unknown_8 [<tf.Tensor 'unknown_8:0' shape=() dtype=resource>]
unknown_9 [<tf.Tensor 'unknown_9:0' shape=() dtype=resource>]
unknown_10 [<tf.Tensor 'unknown_10:0' shape=() dtype=resource>]
unknown_11 [<tf.Tensor 'unkno

In [45]:
for capture in f.captured_inputs:
    print(capture)

tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(<Resource Tensor>, shape=(), dtype=resource)
tf.Tensor(

In [46]:
emb = f.graph.variables[0]

In [47]:
emb

<tf.Variable 'word_embeddings/embeddings:0' shape=(30522, 768) dtype=float32, numpy=
array([[-0.01018257, -0.06154883, -0.02649689, ..., -0.01985357,
        -0.03720997, -0.00975152],
       [-0.01170495, -0.06002603, -0.03233192, ..., -0.01681456,
        -0.04009988, -0.0106634 ],
       [-0.01975381, -0.06273633, -0.03262176, ..., -0.01650258,
        -0.04198876, -0.00323178],
       ...,
       [-0.02176224, -0.0556396 , -0.01346345, ..., -0.00432698,
        -0.0151355 , -0.02489496],
       [-0.04617237, -0.05647721, -0.00192082, ...,  0.01568751,
        -0.01387033, -0.00945213],
       [ 0.00145601, -0.08208051, -0.01597912, ..., -0.00811687,
        -0.04746607,  0.07527421]], dtype=float32)>

In [50]:
opu = f.graph.get_operation_by_name("unknown_1")

In [51]:
opu.op_def

name: "Placeholder"
output_arg {
  name: "output"
  type_attr: "dtype"
}
attr {
  name: "dtype"
  type: "type"
}
attr {
  name: "shape"
  type: "shape"
  default_value {
    shape {
      unknown_rank: true
    }
  }
}

In [52]:
for op in f.graph.get_operations():
    if op.type != "StatefulPartitionedCall": continue
    print(op.name, op.type)
    print(dir(op))
    #print(op.values())
    for inp in op.inputs:
        print("--> ", inp)
    for outp in op.outputs:
        print("<-- ", outp)
    # print(op.f)
    # print(op)
    #print(op)
    #for inp in op.inputs:
    #    print("  ", inp.name)
    #if emb in op.inputs:
    #    print(op)

StatefulPartitionedCall StatefulPartitionedCall
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__tf_tensor__', '__weakref__', '_add_control_input', '_add_control_inputs', '_add_outputs', '_add_while_inputs', '_c_op', '_clear_attr', '_colocation_code_locations', '_colocation_dict', '_control_flow_context', '_control_flow_post_processing', '_control_outputs', '_device_assignments', '_device_code_locations', '_get_attr_bool', '_get_attr_int', '_get_attr_type', '_get_control_flow_context', '_gradient_function', '_graph', '_id', '_id_value', '_input_types', '_inputs_val', '_is_stateful', '_original_op', '_output_types', '_outputs', '_remove_all_control_inputs', '_set_attr', '_set_attr_with_buf', '_set_control_fl

In [54]:
t = f.graph.get_tensor_by_name("input_word_ids:0")
op = t.consumers()[0]
op.name

'StatefulPartitionedCall'

In [55]:
op.op_def.SerializeToString()

b'\n\x17StatefulPartitionedCall\x12\x0b\n\x04args2\x03Tin\x1a\x0e\n\x06output2\x04Tout"\x13\n\x03Tin\x12\nlist(type)(\x01"\x14\n\x04Tout\x12\nlist(type)(\x01"\t\n\x01f\x12\x04func"\x14\n\x06config\x12\x06string\x1a\x02\x12\x00"\x1a\n\x0cconfig_proto\x12\x06string\x1a\x02\x12\x00"\x1b\n\rexecutor_type\x12\x06string\x1a\x02\x12\x00\x88\x01\x01\xa8\x01\x01'

In [56]:
opf = op.op_def.attr[2]

In [57]:
opf

name: "f"
type: "func"

In [58]:
dir(opf)

['ByteSize',
 'Clear',
 'ClearExtension',
 'ClearField',
 'CopyFrom',
 'DESCRIPTOR',
 'DiscardUnknownFields',
 'Extensions',
 'FindInitializationErrors',
 'FromString',
 'HasExtension',
 'HasField',
 'IsInitialized',
 'ListFields',
 'MergeFrom',
 'MergeFromString',
 'ParseFromString',
 'RegisterExtension',
 'SerializePartialToString',
 'SerializeToString',
 'SetInParent',
 'UnknownFields',
 'WhichOneof',
 '_CheckCalledFromGeneratedFile',
 '_SetListener',
 '__class__',
 '__deepcopy__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__unicode__',
 '_extensions_by_name',
 '_extensions_by_number',
 'allowed_values',
 'default_value',
 'description',
 'has_minimum',


In [None]:
dir(op)

In [None]:
for op in op._graph.get_operations():
    print(op.name)

In [None]:
tensor = f.captured_inputs[0]
tensor

In [None]:
og = f.graph.outer_graph

for op in og.collections:
    print(op)
    for thing in og.get_collection_ref(op):
        print(thing)

In [None]:
og.as_graph_def()

In [None]:
# Some info about DT_RESOURCE
# - https://www.youtube.com/watch?v=uaRO0AV6Tto
# StatefulPartitionedCall
# - https://www.tensorflow.org/api_docs/python/tf/raw_ops/StatefulPartitionedCall
# - https://github.com/onnx/tensorflow-onnx/issues/755

In [None]:
f.graph.get_operation_by_name("word_embeddings/embeddings")

In [None]:
input = tf.constant(["hello there"])

vs = []

with tf.GradientTape(persistent=True) as gt:
    encoded = preprocessor(input)

    for v in encoder.variables:
        if "word_embedding" not in v.name: continue
        print("watching", v.name)
        gt.watch(v)

    output = tf.math.reduce_sum(encoder(encoded)['pooled_output'])

In [None]:
d = gt.gradient(output, vs)
print(d)

In [None]:
@tf.function
def run_model(text):
    encoder_inputs = preprocessor(text)
    outputs = encoder(encoder_inputs)
    return outputs["pooled_output"]

In [None]:
g = run_model.get_concrete_function(input).graph

In [None]:
# g.get_tensor_by_name("word_embeddings/embeddings:0")
g.get_tensor_by_name("transformer/layer_0:0")

In [None]:
encoder_inputs['input_mask'].g

In [None]:
for c in g.collections:
    print(c, g.get_collection_ref(c))

In [None]:
for c in g.outer_graph.collections:
    print(c, g.outer_graph.get_collection_ref(c))

In [None]:
g.outer_graph.as_graph_def()

In [None]:
g.get_operation_by_name("keras_layer_1/55213").outputs

In [None]:
for op in g.get_operations():
    if "word_embedding" in op.name:
        print(op)


In [None]:
vs = g.get_collection_ref(('__varscope',))[0]

In [None]:
vs.current_scope.trainable_variable

In [None]:
model = tf.keras.Model(text_input, sequence_output)
model_encoder = tf.keras.Model(text_input, encoder_inputs)

In [None]:
input = tf.constant(["hello there"])
model_encoder(input)

In [None]:
g = encoder.build(input_shape=input.shape)

In [None]:
from tensorflow.python.framework.ops import disable_eager_execution

disable_eager_execution()

In [None]:
g = out1.graph
all_op_outputs = []
for op in g.get_operations():
    try:
        for o in op.outputs:
            if o.dtype != tf.float32: continue
            all_op_outputs.append(o)
    except:
        print(f"failed to get tensor for output of {op.name}")

In [None]:
with tf.GradientTape(persistent=True) as gt:
    inputs = preprocessor(tf.constant(["this is a sentence"]))

    gt.watch(all_op_outputs)

    outputs = encoder(inputs, training=True)

    print(outputs['pooled_output'][0].graph == g)

    gt.watch(outputs)

grads = gt.gradient(tf.reduce_sum(outputs['encoder_outputs'][2]), all_op_outputs)
print(grads)

In [None]:
inputs = preprocessor(tf.constant(["this is a sentence"]))
outputs = encoder(inputs, training=True)

for v1 in all_op_outputs:
    for v2 in all_op_outputs:
        if v1 == v2: continue
        grads_lazy = tf.gradients(v1, v2)#tf.reduce_sum(outputs['pooled_output']), vs)

        if v1.graph != v2.graph:
            raise ValueError("vars' graphs are not the same!")

        if grads_lazy[0] is not None:
            print(v1, v2, grads_lazy)
        else:
            print(".", end='')