In [None]:

import numpy as np
import tensorflow as tf

"""
Adapted from https://github.com/gpeyre/SinkhornAutoDiff
and from https://github.com/dfdazac/wassdistance/blob/master/layers.py
and from https://github.com/michaelsdr/sinkformers/blob/main/nlp-tutorial/text-classification-transformer/sinkhorn.py
"""

def shape_list(x, out_type=tf.int32):
  """Deal with dynamic shape in tensorflow cleanly."""
  static = x.shape.as_list()
  dynamic = tf.shape(x, out_type=out_type)
  return [dynamic[i] if s is None else s for i, s in enumerate(static)]

def sinkhorn_distance(input_tensor, eps, max_iter, 
                  reduction='none',
                  stopThr=1e-2):
  
  C = input_tensor
  C_shape = shape_list(C)

  x_points = C_shape[-2]
  y_points = C_shape[-1]
  batch_size = C_shape[0]
    
  # both marginals are fixed with equal weights
  mu = 1.0 / x_points * tf.ones((batch_size, x_points))
  nu = 1.0 / y_points * tf.ones((batch_size, y_points))

  u = tf.zeros_like(mu)
  v = tf.zeros_like(nu)

  cpt = tf.constant(0)
  err = tf.constant(1.0)

  c = lambda cpt, u, v, err: tf.logical_and(cpt < max_iter, err > stopThr)

  def M( C, u, v):
    "Modified cost for logarithmic updates"
    "$M_{ij} = (-c_{ij} + u_i + v_j) / \epsilon$"
    return (-C + tf.expand_dims(u, -1) + tf.expand_dims(v, -2) )/eps

  def loop_func(cpt, u, v, err):
    u1 = tf.identity(u)  # useful to check the update

    cpt = cpt + 1

    u = eps * (tf.log(mu+1e-8) - tf.reduce_logsumexp(M(C, u, v), axis=-1)) + u
    v = eps * (tf.log(nu+1e-8) - tf.reduce_logsumexp(tf.transpose(M(C, u, v), [0, 2, 1]), axis=-1)) + v

    err = tf.reduce_mean(tf.reduce_sum(tf.abs(u - u1), axis=-1))

    return cpt, u, v, err

  _, u_final, v_final, _ = tf.while_loop(c, loop_func, loop_vars=[cpt, u, v, err])
  U, V = tf.identity(u_final), tf.identity(v_final)

  # Transport plan pi = diag(a)*K*diag(b)
  pi = tf.exp(M(C, U, V))

  cost = tf.reduce_sum(pi * C, axis=(-2, -1))

  return pi, C, U, V, cost



In [None]:
eps = 1.0
max_iter = 10
stopThr = 1e-10

In [None]:
def _cost_matrix(x, y, p=2):
    "Returns the matrix of $|x_i-y_j|^p$."
    x_col = tf.expand_dims(x, axis=-2)
    y_lin = tf.expand_dims(y, axis=-3)
    C = tf.reduce_sum((tf.abs(x_col - y_lin)) ** p, -1)
    return C

In [None]:
x = np.random.random((1, 10, 32)).astype(np.float32)
y = np.random.random((1, 16, 32)).astype(np.float32)

In [None]:
C = _cost_matrix(tf.constant(x), tf.constant(y), p=2)

[pi, C_, U, V, final_cost] = sinkhorn_distance(C, eps, max_iter, 
                  reduction='none',
                  stopThr=1e-10)

In [None]:
sess = tf.Session()
resp = sess.run([pi, C, U, V, final_cost])

In [None]:
resp[0].sum(axis=-2)

In [None]:
import torch
import torch.nn as nn

# Adapted from https://github.com/gpeyre/SinkhornAutoDiff
class SinkhornDistance(nn.Module):
    r"""
    Given two empirical measures each with :math:`P_1` locations
    :math:`x\in\mathbb{R}^{D_1}` and :math:`P_2` locations :math:`y\in\mathbb{R}^{D_2}`,
    outputs an approximation of the regularized OT cost for point clouds.
    Args:
        eps (float): regularization coefficient
        max_iter (int): maximum number of Sinkhorn iterations
        reduction (string, optional): Specifies the reduction to apply to the output:
            'none' | 'mean' | 'sum'. 'none': no reduction will be applied,
            'mean': the sum of the output will be divided by the number of
            elements in the output, 'sum': the output will be summed. Default: 'none'
    Shape:
        - Input: :math:`(N, P_1, D_1)`, :math:`(N, P_2, D_2)`
        - Output: :math:`(N)` or :math:`()`, depending on `reduction`
    """
    def __init__(self, eps, max_iter, reduction='none'):
        super(SinkhornDistance, self).__init__()
        self.eps = eps
        self.max_iter = max_iter
        self.reduction = reduction

    def forward(self, x, y):
        # The Sinkhorn algorithm takes as input three variables :
        C = self._cost_matrix(x, y)  # Wasserstein cost function
        x_points = x.shape[-2]
        y_points = y.shape[-2]
        if x.dim() == 2:
            batch_size = 1
        else:
            batch_size = x.shape[0]

        # both marginals are fixed with equal weights
        mu = torch.empty(batch_size, x_points, dtype=torch.float,
                         requires_grad=False).fill_(1.0 / x_points).squeeze()
        nu = torch.empty(batch_size, y_points, dtype=torch.float,
                         requires_grad=False).fill_(1.0 / y_points).squeeze()

        u = torch.zeros_like(mu)
        v = torch.zeros_like(nu)
        # To check if algorithm terminates because of threshold
        # or max iterations reached
        actual_nits = 0
        # Stopping criterion
        thresh = 1e-1

        # Sinkhorn iterations
        for i in range(self.max_iter):
            u1 = u  # useful to check the update
            u = self.eps * (torch.log(mu+1e-8) - torch.logsumexp(self.M(C, u, v), dim=-1)) + u
            v = self.eps * (torch.log(nu+1e-8) - torch.logsumexp(self.M(C, u, v).transpose(-2, -1), dim=-1)) + v
            err = (u - u1).abs().sum(-1).mean()

            actual_nits += 1
            if err.item() < thresh:
                break

        U, V = u, v
        # Transport plan pi = diag(a)*K*diag(b)
        pi = torch.exp(self.M(C, U, V))
        # Sinkhorn distance
        cost = torch.sum(pi * C, dim=(-2, -1))

        if self.reduction == 'mean':
            cost = cost.mean()
        elif self.reduction == 'sum':
            cost = cost.sum()

        return cost, pi, C

    def M(self, C, u, v):
        "Modified cost for logarithmic updates"
        "$M_{ij} = (-c_{ij} + u_i + v_j) / \epsilon$"
        return (-C + u.unsqueeze(-1) + v.unsqueeze(-2)) / self.eps

    @staticmethod
    def _cost_matrix(x, y, p=2):
        "Returns the matrix of $|x_i-y_j|^p$."
        x_col = x.unsqueeze(-2)
        y_lin = y.unsqueeze(-3)
        C = torch.sum((torch.abs(x_col - y_lin)) ** p, -1)
        return C

    @staticmethod
    def ave(u, u1, tau):
        "Barycenter subroutine, used by kinetic acceleration through extrapolation."
        return tau * u + (1 - tau) * u1

In [None]:
sink = SinkhornDistance(eps=eps, max_iter=max_iter)

In [None]:

cost, pi, C = sink.forward(torch.tensor(x), torch.tensor(y))

In [None]:
pi.sum()

In [None]:
from tokenizers import (ByteLevelBPETokenizer,
      CharBPETokenizer,
      SentencePieceBPETokenizer,
      BertWordPieceTokenizer)

vocab = '/data/xuht/uncased_L-12_H-768_A-12_ilm_v1/vocab_uncased_en.txt'

chinese_bpe_tokenizer = BertWordPieceTokenizer(
    vocab, 
    lowercase=True)

In [None]:
help(chinese_bpe_tokenizer.decode)

In [None]:
sess.run(tf.range(10))

In [None]:
np.exp(-1000)

In [None]:
sess.run(tf.not_equal([1.,2.,3.,0.], 0))

In [None]:
import sklearn.preprocessing
a = [1,0,3]
label_binarizer = sklearn.preprocessing.LabelBinarizer()
label_binarizer.fit(range(max(a)+1))
b = label_binarizer.transform(a)
print('{0}'.format(b))

In [None]:
label = np.random.randint(1, 4, size=[2,5])

In [None]:
label_tf = tf.one_hot(label, depth=4)

In [None]:
one_hot_label = sess.run(label_tf)

In [None]:
label

In [None]:
(one_hot_label.sum(axis=0) !=0)*1

In [None]:
one_hot_label

In [None]:
24*8

In [None]:
def _generate_relative_positions_matrix_t5(length, max_relative_position,
                                        num_buckets=32,
                                        bidirectional=True):
  
  """
  https://github.com/bojone/bert4keras/blob/master/bert4keras/layers.py
  https://github.com/tensorflow/mesh/blob/master/mesh_tensorflow/transformer/transformer_layers.py
  # _relative_position_bucket
  https://gist.github.com/huchenxucs/c65524185e8e35c4bcfae4059f896c16
  """

  tf.logging.info("** apply all distance mat **")
  range_vec = tf.range(length)

  q_idxs = tf.expand_dims(range_vec, 1)
  v_idxs = tf.expand_dims(range_vec, 0)

  distance_mat = v_idxs - q_idxs  
  # range_mat = tf.reshape(tf.tile(range_vec, [length]), [length, length])
  # distance_mat = range_mat - tf.transpose(range_mat)
    
  num_buckets = num_buckets
  max_distance = max_relative_position
  ret = 0
  n = -distance_mat
  if bidirectional:
    num_buckets //= 2
    ret += tf.cast(tf.less(n, 0), 'int32') * num_buckets
    n = tf.abs(n)
  else:
    n = tf.maximum(n, 0)
  # now n is in the range [0, inf)
  max_exact = num_buckets // 2
  is_small = tf.less(n, max_exact)
  val_if_large = max_exact + tf.cast(
      tf.log(tf.cast(n, dtype=tf.float32) / max_exact) /
      tf.log(max_distance / max_exact) * (num_buckets - max_exact),
      'int32',
  )
  val_if_large = tf.minimum(val_if_large, num_buckets - 1)
  tf_switch = (tf.cast(is_small, dtype=tf.int32)) * n + (1-tf.cast(is_small, dtype=tf.int32)) * val_if_large
  ret += tf_switch #tf.switch(is_small, n, val_if_large)
  # ret += tf.where(is_small, n, val_if_large)

  return ret

length=64
max_relative_position=32
num_buckets=32
bidirectional=True

ret_bi = _generate_relative_positions_matrix_t5(length, max_relative_position,
                                        num_buckets=num_buckets,
                                        bidirectional=bidirectional)

ret_uni = _generate_relative_positions_matrix_t5(length, max_relative_position,
                                        num_buckets=num_buckets,
                                        bidirectional=False)

ret = sess.run([ret_bi, ret_uni])



In [None]:
ret[0]

In [None]:
ret[1][-1]

In [None]:
def _relative_position_bucket_(relative_position, bidirectional=True, num_buckets=32, max_distance=128):
        """
        Adapted from Mesh Tensorflow:
        https://github.com/tensorflow/mesh/blob/0cb87fe07da627bf0b7e60475d59f95ed6b5be3d/mesh_tensorflow/transformer/transformer_layers.py#L593
        Translate relative position to a bucket number for relative attention.
        The relative position is defined as memory_position - query_position, i.e.
        the distance in tokens from the attending position to the attended-to
        position.  If bidirectional=False, then positive relative positions are
        invalid.
        We use smaller buckets for small absolute relative_position and larger buckets
        for larger absolute relative_positions.  All relative positions >=max_distance
        map to the same bucket.  All relative positions <=-max_distance map to the
        same bucket.  This should allow for more graceful generalization to longer
        sequences than the model has been trained on.
        Args:
            relative_position: an int32 Tensor
            bidirectional: a boolean - whether the attention is bidirectional
            num_buckets: an integer
            max_distance: an integer
        Returns:
            a Tensor with the same shape as relative_position, containing int32
            values in the range [0, num_buckets)
        """
        ret = 0
        n = -relative_position
        if bidirectional:
            num_buckets //= 2
            ret += (n < 0).to(torch.long) * num_buckets  # mtf.to_int32(mtf.less(n, 0)) * num_buckets
            n = torch.abs(n)
        else:
            n = torch.max(n, torch.zeros_like(n))
        # now n is in the range [0, inf)

        # half of the buckets are for exact increments in positions
        max_exact = num_buckets // 2
        is_small = n < max_exact

        # The other half of the buckets are for logarithmically bigger bins in positions up to max_distance
        val_if_large = max_exact + (
            torch.log(n.float() / max_exact) / math.log(max_distance / max_exact) * (num_buckets - max_exact)
        ).to(torch.long)
        val_if_large = torch.min(val_if_large, torch.full_like(val_if_large, num_buckets - 1))

        ret += torch.where(is_small, n, val_if_large)
        return ret

In [None]:
import torch

In [None]:
context_position = torch.arange(64, dtype=torch.long)[:, None]
memory_position = torch.arange(64, dtype=torch.long)[None, :]
relative_position = memory_position - context_position  # shape (qlen, klen)
resp = _relative_position_bucket_(relative_position, bidirectional=False, num_buckets=32, max_distance=32)

In [None]:
resp[0]

In [None]:
ret[0][25]

In [None]:
(1-segment_ids) * resp[25].numpy()

In [None]:
segment_ids

In [None]:
context_position = torch.arange(64, dtype=torch.long)[:, None]
memory_position = torch.arange(64, dtype=torch.long)[None, :]
relative_position = memory_position - context_position  # shape (qlen, klen)
s1 = _relative_position_bucket_(relative_position, bidirectional=True, num_buckets=32, max_distance=32)

In [None]:
context_position = torch.arange(64, dtype=torch.long)[:, None]
memory_position = torch.arange(64, dtype=torch.long)[None, :]
relative_position = memory_position - context_position  # shape (qlen, klen)
s3 = _relative_position_bucket_(relative_position, bidirectional=False, num_buckets=32, max_distance=32)

In [None]:
context_position = torch.arange(32, dtype=torch.long)[:, None]
memory_position = torch.arange(32, dtype=torch.long)[None, :]
relative_position = memory_position - context_position  # shape (qlen, klen)
s2 = _relative_position_bucket_(relative_position, bidirectional=True, num_buckets=32, max_distance=32)

In [None]:
s1[31]

In [None]:
s2[-1]

In [None]:
s3[32]

In [None]:
a1 = s1 * (1-segment_ids[None, :]) * (1-segment_ids[:, None]) + s3 * (segment_ids[:, None])

In [None]:

segment_ids = [0]*25+[1]*39

segment_mask = tf.cast(np.array([segment_ids, segment_ids]), dtype=tf.int32)
relative_positions_matrix_bi = tf.constant(ret[0])
relative_positions_matrix_uni = tf.constant(ret[1])

# handle mixture of bi and uni-direction relative position
# [1, seq_len, seq_len]
relative_positions_matrix_bi = tf.expand_dims(relative_positions_matrix_bi, axis=0)
relative_positions_matrix_uni = tf.expand_dims(relative_positions_matrix_uni, axis=0)

# s1 * (1-segment_ids[None, :]) * (1-segment_ids[:, None]) + s3 * (segment_ids[:, None])
# [batch, seq_len, seq_len]
relative_positions_matrix = relative_positions_matrix_bi * (1-tf.expand_dims(segment_mask, axis=1)) * (1-tf.expand_dims(segment_mask, axis=-1)) + relative_positions_matrix_uni * (tf.expand_dims(segment_mask, axis=-1))
  

In [None]:
final = sess.run(relative_positions_matrix)

In [None]:
63356*768-21228*512*4

In [56]:
import numpy as np
import tensorflow as tf
init_np = np.random.random((4, 2, 3))
update_np = np.random.random((1, 2, 3))

In [70]:
graph = tf.Graph()
with graph.as_default():
    
    with tf.variable_scope("test", reuse=tf.AUTO_REUSE):
        queue = tf.get_variable('queue', 
                      [4, 2, 3], 
                      dtype=tf.float32,
                      initializer=tf.constant_initializer(-1e10),
                      trainable=False)
    
    
    sess = tf.Session()
    queue_op = queue.assign(tf.concat([tf.constant((update_np+np.random.random((1, 2, 3))).astype(np.float32)), queue[:-1, :, :]], axis=0))
    with tf.control_dependencies([queue_op]):
        loss = tf.reduce_sum(queue)
    #     p = queue + 1
        f = tf.identity(queue)
        Z = tf.reduce_logsumexp(queue, axis=0)

    sess.run(tf.global_variables_initializer())

In [71]:
with graph.as_default():
    print(sess.run(loss))
#     print(sess.run(f))
#     print(sess.run(loss))

-180000000000.0


In [64]:
a = [1,2,3]
for i in [4,5,6]:
    a = [i]+a[:-1]
    print(a)
print(a)

[4, 1, 2]
[5, 4, 1]
[6, 5, 4]
[6, 5, 4]


In [None]:
queue_mask = tf.cast(tf.not_equal(queue, 0), dtype=tf.float32)
Z = tf.reduce_logsumexp(queue-(1-queue_mask)*1e10, axis=0)


In [107]:
b = np.random.random((2, 3)).astype(np.float32)

In [108]:
a = np.array([[1.0,0.0], [0.0,1.0]]).astype(np.float32)
with graph.as_default():
    resp = (sess.run(tf.einsum('nc,ck->nck', tf.constant(a), tf.constant(b))))

In [109]:
a

array([[1., 0.],
       [0., 1.]], dtype=float32)

In [110]:
b

array([[0.9684313 , 0.755596  , 0.557807  ],
       [0.70687485, 0.7513403 , 0.15060379]], dtype=float32)

In [111]:
np.log(np.exp(resp).sum(axis=1))

array([[1.2902815 , 1.1406752 , 1.0104504 ],
       [1.1077851 , 1.1377815 , 0.77128166]], dtype=float32)

In [113]:
resp.sum(axis=1)

array([[0.9684313 , 0.755596  , 0.557807  ],
       [0.70687485, 0.7513403 , 0.15060379]], dtype=float32)

In [112]:
with graph.as_default():
    print(sess.run(tf.reduce_logsumexp(tf.einsum('nc,ck->nck', tf.constant(a), tf.constant(b)), axis=1)))

[[1.2902817 1.1406751 1.0104502]
 [1.107785  1.1377815 0.7712816]]


In [95]:
with graph.as_default():
    print(sess.run(tf.nn.softmax(tf.nn.l2_normalize(np.array([[1., 2., 3., 0.]]), axis=-1))))

[[0.20927122 0.27338809 0.3571492  0.16019149]]


In [114]:
def get_tensor(reader, src_name):
  tensor = reader.get_tensor(src_name)
  return tensor, src_name

reader = tf.train.load_checkpoint('/data/xuht/roberta_base/roberta_base/roberta_base.ckpt')
var_values, var_dtypes = {}, {}
import numpy as np
for (name, _) in tf.train.list_variables('/data/xuht/roberta_base/roberta_base/roberta_base.ckpt'):
  # skip global_step and optimizer states in src ckpt if not FLAGS.retain_all

  tensor, tgt_name = get_tensor(reader, name)
  var_values[tgt_name] = np.array(tensor).astype(np.float32)
  var_dtypes[tgt_name] = tensor.dtype

In [115]:
var_dtypes

{'additional_emb': dtype('float32'),
 'lm_head/bias': dtype('float32'),
 'lm_head/decoder/bias': dtype('float32'),
 'lm_head/decoder/kernel': dtype('float32'),
 'lm_head/dense/bias': dtype('float32'),
 'lm_head/dense/kernel': dtype('float32'),
 'lm_head/layer_norm/bias': dtype('float32'),
 'lm_head/layer_norm/kernel': dtype('float32'),
 'roberta/embeddings/LayerNorm/beta': dtype('float32'),
 'roberta/embeddings/LayerNorm/gamma': dtype('float32'),
 'roberta/embeddings/position_embeddings': dtype('float32'),
 'roberta/embeddings/position_ids': dtype('int64'),
 'roberta/embeddings/token_type_embeddings': dtype('float32'),
 'roberta/embeddings/word_embeddings': dtype('float32'),
 'roberta/encoder/layer_0/attention/output/LayerNorm/beta': dtype('float32'),
 'roberta/encoder/layer_0/attention/output/LayerNorm/gamma': dtype('float32'),
 'roberta/encoder/layer_0/attention/output/dense/bias': dtype('float32'),
 'roberta/encoder/layer_0/attention/output/dense/kernel': dtype('float32'),
 'roberta

In [124]:
var_values['additional_emb']

array([[-0.00720883, -0.03820204, -0.00852567, ..., -0.0142212 ,
         0.00777425,  0.02203818],
       [-0.00958817,  0.01995133, -0.00972462, ..., -0.00903561,
         0.02401158, -0.01534905],
       [ 0.01020542, -0.01633411, -0.00990577, ..., -0.01082997,
        -0.00271305,  0.00275441],
       ...,
       [ 0.01659554,  0.01053478, -0.00044641, ..., -0.00437227,
        -0.00624541,  0.02083383],
       [-0.0223963 ,  0.00948278,  0.00989949, ..., -0.00331239,
         0.01337678,  0.00422556],
       [-0.00702481,  0.00379791, -0.00395953, ..., -0.00576761,
        -0.02389995,  0.00698871]], dtype=float32)

In [122]:
var_values['lm_head/decoder/kernel']

array([[ 0.14758301, -0.03649902,  0.07531738, ..., -0.00227928,
         0.01724243, -0.00158501],
       [ 0.015625  ,  0.00759125, -0.01183319, ..., -0.00222015,
         0.00807953, -0.015625  ],
       [-0.034729  , -0.08728027, -0.01800537, ...,  0.11743164,
        -0.0098114 , -0.03549194],
       ...,
       [ 0.03044128,  0.05044556, -0.03068542, ...,  0.03768921,
         0.00956726,  0.00836182],
       [ 0.06228638, -0.05960083,  0.03071594, ..., -0.09197998,
         0.10803223, -0.01832581],
       [ 0.12585449, -0.01449585,  0.03317261, ...,  0.01206207,
         0.03421021,  0.0168457 ]], dtype=float32)

In [123]:
var_values['roberta/embeddings/word_embeddings']

array([[ 0.14758301, -0.03649902,  0.07531738, ..., -0.00227928,
         0.01724243, -0.00158501],
       [ 0.015625  ,  0.00759125, -0.01183319, ..., -0.00222015,
         0.00807953, -0.015625  ],
       [-0.034729  , -0.08728027, -0.01800537, ...,  0.11743164,
        -0.0098114 , -0.03549194],
       ...,
       [ 0.01659554,  0.01053478, -0.00044641, ..., -0.00437227,
        -0.00624541,  0.02083383],
       [-0.0223963 ,  0.00948278,  0.00989949, ..., -0.00331239,
         0.01337678,  0.00422556],
       [-0.00702481,  0.00379791, -0.00395953, ..., -0.00576761,
        -0.02389995,  0.00698871]], dtype=float32)

In [137]:
def read_tsv(input_file, quotechar=None, max_lines=None):
  """Reads a tab separated value file."""
  with tf.io.gfile.GFile(input_file, "r") as f:
    reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
    lines = []
    for i, line in enumerate(reader):
      if max_lines and i >= max_lines:
        break
      lines.append(line)
    return lines
import csv

lines = read_tsv('/data/xuht/glue/MNLI/dev_matched.tsv', max_lines=100)

In [140]:
# This file comes originally from https://github.com/google-research/bert/blob/master/tokenization.py

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections
import re
import unicodedata
import six
import tensorflow as tf


def validate_case_matches_checkpoint(do_lower_case, init_checkpoint):
  """Checks whether the casing config is consistent with the checkpoint name."""

  # The casing has to be passed in by the user and there is no explicit check
  # as to whether it matches the checkpoint. The casing information probably
  # should have been stored in the bert_config.json file, but it's not, so
  # we have to heuristically detect it to validate.

  if not init_checkpoint:
    return

  m = re.match("^.*?([A-Za-z0-9_-]+)/bert_model.ckpt", init_checkpoint)
  if m is None:
    return

  model_name = m.group(1)

  lower_models = [
      "uncased_L-24_H-1024_A-16", "uncased_L-12_H-768_A-12",
      "multilingual_L-12_H-768_A-12", "chinese_L-12_H-768_A-12"
  ]

  cased_models = [
      "cased_L-12_H-768_A-12", "cased_L-24_H-1024_A-16",
      "multi_cased_L-12_H-768_A-12"
  ]

  is_bad_config = False
  if model_name in lower_models and not do_lower_case:
    is_bad_config = True
    actual_flag = "False"
    case_name = "lowercased"
    opposite_flag = "True"

  if model_name in cased_models and do_lower_case:
    is_bad_config = True
    actual_flag = "True"
    case_name = "cased"
    opposite_flag = "False"

  if is_bad_config:
    raise ValueError(
        "You passed in `--do_lower_case=%s` with `--init_checkpoint=%s`. "
        "However, `%s` seems to be a %s model, so you "
        "should pass in `--do_lower_case=%s` so that the fine-tuning matches "
        "how the model was pre-training. If this error is wrong, please "
        "just comment out this check." % (actual_flag, init_checkpoint,
                                          model_name, case_name, opposite_flag))


def convert_to_unicode(text):
  """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
  if six.PY3:
    if isinstance(text, str):
      return text
    elif isinstance(text, bytes):
      return text.decode("utf-8", "ignore")
    else:
      raise ValueError("Unsupported string type: %s" % (type(text)))
  elif six.PY2:
    if isinstance(text, str):
      return text.decode("utf-8", "ignore")
    elif isinstance(text, unicode):
      return text
    else:
      raise ValueError("Unsupported string type: %s" % (type(text)))
  else:
    raise ValueError("Not running on Python2 or Python 3?")


def printable_text(text):
  """Returns text encoded in a way suitable for print or `tf.logging`."""

  # These functions want `str` for both Python2 and Python3, but in one case
  # it's a Unicode string and in the other it's a byte string.
  if six.PY3:
    if isinstance(text, str):
      return text
    elif isinstance(text, bytes):
      return text.decode("utf-8", "ignore")
    else:
      raise ValueError("Unsupported string type: %s" % (type(text)))
  elif six.PY2:
    if isinstance(text, str):
      return text
    elif isinstance(text, unicode):
      return text.encode("utf-8")
    else:
      raise ValueError("Unsupported string type: %s" % (type(text)))
  else:
    raise ValueError("Not running on Python2 or Python 3?")


def load_vocab(vocab_file):
  """Loads a vocabulary file into a dictionary."""
  vocab = collections.OrderedDict()
  index = 0
  with tf.gfile.GFile(vocab_file, "r") as reader:
    while True:
      token = convert_to_unicode(reader.readline())
      if not token:
        break
      token = token.strip()
      vocab[token] = index
      index += 1
  return vocab


def convert_by_vocab(vocab, items):
  """Converts a sequence of [tokens|ids] using the vocab."""
  output = []
  for item in items:
    output.append(vocab[item])
  return output


def convert_tokens_to_ids(vocab, tokens):
  return convert_by_vocab(vocab, tokens)


def convert_ids_to_tokens(inv_vocab, ids):
  return convert_by_vocab(inv_vocab, ids)


def whitespace_tokenize(text):
  """Runs basic whitespace cleaning and splitting on a piece of text."""
  text = text.strip()
  if not text:
    return []
  tokens = text.split()
  return tokens


class FullTokenizer(object):
  """Runs end-to-end tokenziation."""

  def __init__(self, vocab_file, do_lower_case=True):
    self.vocab = load_vocab(vocab_file)
    self.inv_vocab = {v: k for k, v in self.vocab.items()}
    self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
    self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)

  def tokenize(self, text):
    split_tokens = []
    for token in self.basic_tokenizer.tokenize(text):
      for sub_token in self.wordpiece_tokenizer.tokenize(token):
        split_tokens.append(sub_token)

    return split_tokens

  def convert_tokens_to_ids(self, tokens):
    return convert_by_vocab(self.vocab, tokens)

  def convert_ids_to_tokens(self, ids):
    return convert_by_vocab(self.inv_vocab, ids)


class BasicTokenizer(object):
  """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""

  def __init__(self, do_lower_case=True):
    """Constructs a BasicTokenizer.

    Args:
      do_lower_case: Whether to lower case the input.
    """
    self.do_lower_case = do_lower_case

  def tokenize(self, text):
    """Tokenizes a piece of text."""
    text = convert_to_unicode(text)
    text = self._clean_text(text)

    # This was added on November 1st, 2018 for the multilingual and Chinese
    # models. This is also applied to the English models now, but it doesn't
    # matter since the English models were not trained on any Chinese data
    # and generally don't have any Chinese data in them (there are Chinese
    # characters in the vocabulary because Wikipedia does have some Chinese
    # words in the English Wikipedia.).
    text = self._tokenize_chinese_chars(text)

    orig_tokens = whitespace_tokenize(text)
    split_tokens = []
    for token in orig_tokens:
      if self.do_lower_case:
        token = token.lower()
        token = self._run_strip_accents(token)
      split_tokens.extend(self._run_split_on_punc(token))

    output_tokens = whitespace_tokenize(" ".join(split_tokens))
    return output_tokens

  def _run_strip_accents(self, text):
    """Strips accents from a piece of text."""
    text = unicodedata.normalize("NFD", text)
    output = []
    for char in text:
      cat = unicodedata.category(char)
      if cat == "Mn":
        continue
      output.append(char)
    return "".join(output)

  def _run_split_on_punc(self, text):
    """Splits punctuation on a piece of text."""
    chars = list(text)
    i = 0
    start_new_word = True
    output = []
    while i < len(chars):
      char = chars[i]
      if _is_punctuation(char):
        output.append([char])
        start_new_word = True
      else:
        if start_new_word:
          output.append([])
        start_new_word = False
        output[-1].append(char)
      i += 1

    return ["".join(x) for x in output]

  def _tokenize_chinese_chars(self, text):
    """Adds whitespace around any CJK character."""
    output = []
    for char in text:
      cp = ord(char)
      if self._is_chinese_char(cp):
        output.append(" ")
        output.append(char)
        output.append(" ")
      else:
        output.append(char)
    return "".join(output)

  def _is_chinese_char(self, cp):
    """Checks whether CP is the codepoint of a CJK character."""
    # This defines a "chinese character" as anything in the CJK Unicode block:
    #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
    #
    # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
    # despite its name. The modern Korean Hangul alphabet is a different block,
    # as is Japanese Hiragana and Katakana. Those alphabets are used to write
    # space-separated words, so they are not treated specially and handled
    # like the all of the other languages.
    if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
        (cp >= 0x3400 and cp <= 0x4DBF) or  #
        (cp >= 0x20000 and cp <= 0x2A6DF) or  #
        (cp >= 0x2A700 and cp <= 0x2B73F) or  #
        (cp >= 0x2B740 and cp <= 0x2B81F) or  #
        (cp >= 0x2B820 and cp <= 0x2CEAF) or
        (cp >= 0xF900 and cp <= 0xFAFF) or  #
        (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
      return True

    return False

  def _clean_text(self, text):
    """Performs invalid character removal and whitespace cleanup on text."""
    output = []
    for char in text:
      cp = ord(char)
      if cp == 0 or cp == 0xfffd or _is_control(char):
        continue
      if _is_whitespace(char):
        output.append(" ")
      else:
        output.append(char)
    return "".join(output)


class WordpieceTokenizer(object):
  """Runs WordPiece tokenziation."""

  def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=200):
    self.vocab = vocab
    self.unk_token = unk_token
    self.max_input_chars_per_word = max_input_chars_per_word

  def tokenize(self, text):
    """Tokenizes a piece of text into its word pieces.

    This uses a greedy longest-match-first algorithm to perform tokenization
    using the given vocabulary.

    For example:
      input = "unaffable"
      output = ["un", "##aff", "##able"]

    Args:
      text: A single token or whitespace separated tokens. This should have
        already been passed through `BasicTokenizer.

    Returns:
      A list of wordpiece tokens.
    """

    text = convert_to_unicode(text)

    output_tokens = []
    for token in whitespace_tokenize(text):
      chars = list(token)
      if len(chars) > self.max_input_chars_per_word:
        output_tokens.append(self.unk_token)
        continue

      is_bad = False
      start = 0
      sub_tokens = []
      while start < len(chars):
        end = len(chars)
        cur_substr = None
        while start < end:
          substr = "".join(chars[start:end])
          if start > 0:
            substr = "##" + substr
          if substr in self.vocab:
            cur_substr = substr
            break
          end -= 1
        if cur_substr is None:
          is_bad = True
          break
        sub_tokens.append(cur_substr)
        start = end

      if is_bad:
        output_tokens.append(self.unk_token)
      else:
        output_tokens.extend(sub_tokens)
    return output_tokens


def _is_whitespace(char):
  """Checks whether `chars` is a whitespace character."""
  # \t, \n, and \r are technically contorl characters but we treat them
  # as whitespace since they are generally considered as such.
  if char == " " or char == "\t" or char == "\n" or char == "\r":
    return True
  cat = unicodedata.category(char)
  if cat == "Zs":
    return True
  return False


def _is_control(char):
  """Checks whether `chars` is a control character."""
  # These are technically control characters but we count them as whitespace
  # characters.
  if char == "\t" or char == "\n" or char == "\r":
    return False
  cat = unicodedata.category(char)
  if cat in ("Cc", "Cf"):
    return True
  return False


def _is_punctuation(char):
  """Checks whether `chars` is a punctuation character."""
  cp = ord(char)
  # We treat all non-letter/number ASCII as punctuation.
  # Characters such as "^", "$", and "`" are not in the Unicode
  # Punctuation class but we treat them as punctuation anyways, for
  # consistency.
  if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
      (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
    return True
  cat = unicodedata.category(char)
  if cat.startswith("P"):
    return True
  return False


In [215]:
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# Copyright Tor Vergata, University of Rome. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Data processor for the QC dataset

import os
import csv
import tensorflow as tf
from collections import OrderedDict
import random
import numpy as np

SEED = 0
np.random.seed(SEED)
tf.set_random_seed(SEED)
random.seed(SEED)

class InputExample(object):
  """A single training/test example for simple sequence classification."""

  def __init__(self, guid, text_a, text_b=None, label=None, 
              label_mask=None):
    """Constructs a InputExample.

    Args:
      guid: Unique id for the example.
      text_a: string. The untokenized text of the first sequence. For single
        sequence tasks, only this sequence must be specified.
      text_b: (Optional) string. The untokenized text of the second sequence.
        Only must be specified for sequence pair tasks.
      label: (Optional) string. The label of the example. This should be
        specified for train and dev examples, but not for test examples.
    """
    self.guid = guid
    self.text_a = text_a
    self.text_b = text_b
    self.label = label
    self.label_mask = label_mask


class PaddingInputExample(object):
  """Fake example so the num input examples is a multiple of the batch size.

  When running eval/predict on the TPU, we need to pad the number of examples
  to be a multiple of the batch size, because the TPU requires a fixed batch
  size. The alternative is to drop the last batch, which is bad because it means
  the entire output data won't be generated.

  We use this class instead of `None` because treating `None` as padding
  battches could cause silent errors.
  """


class InputFeatures(object):
  """A single set of features of data."""

  def __init__(self,
               input_ids,
               input_mask,
               segment_ids,
               label_id,
               label_mask=1,
               is_real_example=True):
    self.input_ids = input_ids
    self.input_mask = input_mask
    self.segment_ids = segment_ids
    self.label_id = label_id
    self.is_real_example = is_real_example
    self.label_mask = label_mask


class DataProcessor(object):
  """Base class for data converters for sequence classification data sets."""
  
  def __init__(self, double_unordered):
    self.double_unordered = True

  def get_examples(self, data_dir, split):
    """See base class."""
    return self._create_examples(
        self._read_tsv(os.path.join(data_dir, split + ".tsv")), split)

  def _create_examples(self, lines, split):
    pass 

  def get_labeled_examples(self, input_examples, label_ratio):
    """Gets a collection of `InputExample`s for the train set."""
    label_dict = OrderedDict({})
    for index, example in enumerate(input_examples):
      if example.label not in label_dict:
        label_dict[example.label] = []
      label_dict[example.label].append(index)

    labeled_examples = []
    for label in label_dict:
      label_count = len(label_dict[label])
      label_example = label_dict[label][0:int(label_count*label_ratio)]
      for index in label_example:
        input_examples[index].label_mask = 1
        labeled_examples.append(input_examples[index])

    random.shuffle(labeled_examples)
    return labeled_examples

  def get_unlabeled_examples(self, input_examples, label_ratio):
    """Gets a collection of `InputExample`s for the dev set."""
    label_dict = OrderedDict({})
    for index, example in enumerate(input_examples):
      if example.label not in label_dict:
        label_dict[example.label] = []
      label_dict[example.label].append(index)

    unlabeled_examples = []
    for label in label_dict:
      label_count = len(label_dict[label])
      unlabel_example = label_dict[label][int(label_count*label_ratio):]
      for index in unlabel_example:
        input_examples[index].label_mask = 0
        unlabeled_examples.append(input_examples[index])
    random.shuffle(unlabeled_examples)
    return unlabeled_examples

  def get_labels(self):
    """Gets the list of labels for this data set."""
    raise NotImplementedError()

  def _get_dummy_label(self):
    raise NotImplementedError()

  @classmethod
  def _read_tsv(cls, input_file, quotechar=None):
    """Reads a tab separated value file."""
    with tf.gfile.GFile(input_file, "r") as f:
      reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
      lines = []
      for line in reader:
        lines.append(line)
      random.shuffle(lines)
      return lines

  def _load_glue(self, lines, split, text_a_loc, text_b_loc, label_loc,
                 name,
                 skip_first_line=False, eid_offset=0, swap=False):
    examples = []
    for (i, line) in enumerate(lines):
      try:
        if i == 0 and skip_first_line:
          continue
        eid = i - (1 if skip_first_line else 0) + eid_offset
        text_a = convert_to_unicode(line[text_a_loc])
        if text_b_loc is None:
          text_b = None
        else:
          text_b = convert_to_unicode(line[text_b_loc])
        if "test" in split or "diagnostic" in split:
          label = self._get_dummy_label()
        else:
          label = convert_to_unicode(line[label_loc])
        if swap:
          text_a, text_b = text_b, text_a
        examples.append(InputExample(guid=eid, text_a=text_a, text_b=text_b, label=label))
      except Exception as ex:
        tf.logging.info("Error constructing example from line", i,
                  "for task", name + ":", ex)
        tf.logging.info("Input causing the error:", line)
    random.shuffle(examples)
    return examples

class QcFineProcessor(DataProcessor):
  """Processor for the MultiNLI data set (GLUE version)."""

  def __init__(self, double_unordered):
    super(QcFineProcessor, self).__init__(double_unordered)

  def get_labeled_examples(self, data_dir):
      """See base class."""
      return self._create_examples(os.path.join(data_dir, "labeled.tsv"), "train")

  def get_unlabeled_examples(self, data_dir):
      """See base class."""
      return self._create_examples(os.path.join(data_dir, "unlabeled.tsv"), "train")

  def get_test_examples(self, data_dir):
      """See base class."""
      return self._create_examples(os.path.join(data_dir, "test.tsv"), "test")

  def get_labels(self):
      """See base class."""
      return ["UNK_UNK", "ABBR_abb", "ABBR_exp", "DESC_def", "DESC_desc", "DESC_manner", "DESC_reason", "ENTY_animal", "ENTY_body", "ENTY_color", "ENTY_cremat", "ENTY_currency", "ENTY_dismed", "ENTY_event", "ENTY_food", "ENTY_instru", "ENTY_lang", "ENTY_letter", "ENTY_other", "ENTY_plant", "ENTY_product", "ENTY_religion", "ENTY_sport", "ENTY_substance", "ENTY_symbol", "ENTY_techmeth", "ENTY_termeq", "ENTY_veh", "ENTY_word", "HUM_desc", "HUM_gr", "HUM_ind", "HUM_title", "LOC_city", "LOC_country", "LOC_mount", "LOC_other", "LOC_state", "NUM_code", "NUM_count", "NUM_date", "NUM_dist", "NUM_money", "NUM_ord", "NUM_other", "NUM_perc", "NUM_period", "NUM_speed", "NUM_temp", "NUM_volsize", "NUM_weight"]

  def _create_examples(self, input_file, split):
      """Creates examples for the training and dev sets."""
      examples = []

      with tf.gfile.GFile(input_file, "r") as f:
          contents = f.read()
          file_as_list = contents.splitlines()
          for line in file_as_list[1:]:
              split = line.split(" ")
              question = ' '.join(split[1:])

              guid = "%s-%s" % (split, tokenization.convert_to_unicode(line))
              text_a = tokenization.convert_to_unicode(question)
              inn_split = split[0].split(":")
              label = inn_split[0] + "_" + inn_split[1]
              examples.append(InputExample(guid=guid, text_a=text_a, text_b=None, label=label))

      random.shuflle(examples)
      return examples

  def _get_dummy_label(self):
    labels = self.get_labels()
    return labels[0]

class SST(DataProcessor):
  """Stanford Sentiment Treebank."""
  def __init__(self, double_unordered):
    super(SST, self).__init__(double_unordered)

  def get_labels(self):
    """See base class."""
    return ["0", "1"]

  def _get_dummy_label(self):
    labels = self.get_labels()
    return labels[0]

  def get_test_splits(self):
    return ["test"]

  def get_dev_splits(self):
    return ["dev"]

  def _create_examples(self, lines, split):
    if "test" in split:
      return self._load_glue(lines, split, 1, None, None, True)
    else:
      return self._load_glue(lines, split, 0, None, 1, True)

class MNLI(DataProcessor):
  """Processor for the MultiNLI data set (GLUE version)."""
  def __init__(self, double_unordered):
    super(MNLI, self).__init__(double_unordered)

  def get_labels(self):
    """See base class."""
    return ["contradiction", "entailment", "neutral"]

  def _get_dummy_label(self):
    labels = self.get_labels()
    return labels[0]

  def _create_examples(self, lines, split):
    if split == "diagnostic":
      return self._load_glue(lines, split, 1, 2, None, True)
    else:
      return self._load_glue(lines, split, 8, 9, -1, True)

  def get_test_splits(self):
    return ["test_matched", "test_mismatched", "diagnostic"]

  def get_dev_splits(self):
    return ["dev_matched", "dev_mismatched"]

class WNLI(DataProcessor):
  """Processor for the MultiNLI data set (GLUE version)."""
  def __init__(self, double_unordered):
    super(WNLI, self).__init__(double_unordered)

  def get_labels(self):
    """See base class."""
    return ["0", "1"]

  def _get_dummy_label(self):
    labels = self.get_labels()
    return labels[0]

  def get_test_splits(self):
    return ["test"]

  def get_dev_splits(self):
    return ["dev"]

  def _create_examples(self, lines, split):
    if "test" in split:
      return self._load_glue(lines, split, 1, 2, None, True)
    else:
      return self._load_glue(lines, split, 1, 2, -1, True)

class MRPC(DataProcessor):
  """Processor for the MRPC data set (GLUE version)."""
  def __init__(self, double_unordered=True):
    super(MRPC, self).__init__(double_unordered)

  def get_labels(self):
    """See base class."""
    return ["0", "1"]

  def _get_dummy_label(self):
    labels = self.get_labels()
    return labels[0]

  def get_test_splits(self):
    return ["test"]

  def get_dev_splits(self):
    return ["dev"]

  def _create_examples(self, lines, split):
    examples = []
    examples += self._load_glue(lines, split, 3, 4, 0, True)
    if self.double_unordered and split == "train":
      examples += self._load_glue(
          lines, split, 3, 4, 0, True, len(examples), True)
    return examples

class COLA(DataProcessor):
  """Processor for the CoLA data set (GLUE version)."""
  def __init__(self, double_unordered=False):
    super(COLA, self).__init__(double_unordered)

  def get_labels(self):
    """See base class."""
    return ["0", "1"]

  def _get_dummy_label(self):
    labels = self.get_labels()
    return labels[0]

  def get_test_splits(self):
    return ["test"]

  def get_dev_splits(self):
    return ["dev"]

  def _create_examples(self, lines, split):
    return self._load_glue(lines, split, 1 if split == "test" else 3,
                           None, 1, split == "test")

class QQP(DataProcessor):
  """Processor for the CoLA data set (GLUE version)."""
  def __init__(self, double_unordered=True):
    super(QQP, self).__init__(double_unordered)

  def get_labels(self):
    """See base class."""
    return ["0", "1"]

  def _get_dummy_label(self):
    labels = self.get_labels()
    return labels[0]

  def get_test_splits(self):
    return ["test"]

  def get_dev_splits(self):
    return ["dev"]

  def _create_examples(self, lines, split):
    return self._load_glue(lines, split, 1 if split == "test" else 3,
                           2 if split == "test" else 4, 5, True)

class RTE(DataProcessor):
  """Recognizing Textual Entailment."""
  def __init__(self, double_unordered=False):
    super(RTE, self).__init__(double_unordered)
  
  def _create_examples(self, lines, split):
    return self._load_glue(lines, split, 1, 2, 3, True)

  def _get_dummy_label(self):
    labels = self.get_labels()
    return labels[0]

  def get_labels(self):
    """See base class."""
    return ["entailment", "not_entailment"]

  def get_test_splits(self):
    return ["test"]

  def get_dev_splits(self):
    return ["dev"]
 
class QNLI(DataProcessor):
  """Question NLI."""
  def __init__(self, double_unordered=False):
    super(QNLI, self).__init__(double_unordered)
  
  def _create_examples(self, lines, split):
    return self._load_glue(lines, split, 1, 2, 3, True)

  def _get_dummy_label(self):
    labels = self.get_labels()
    return labels[0]

  def get_labels(self):
    return ["entailment", "not_entailment"]

  def get_test_splits(self):
    return ["test"]

  def get_dev_splits(self):
    return ["dev"]


In [216]:
qnli = QNLI(False)

In [217]:
import os
print(os.listdir('/data/xuht/glue/'))
# exampls = qnli.get_examples('/data/xuht/glue/QNLI/', 'test' )

['CoLA', '.DS_Store', 'SST-2', 'diagnostic', 'WNLI', 'QQP', 'MRPC', 'MNLI', 'STS-B', 'QNLI', 'RTE']


In [218]:
task_mapping = {
    'CoLA':COLA,
    'STS-B': SST,
    'WNLI': WNLI,
    'QQP': QQP,
    'MRPC': MRPC,
    'MNLI': MNLI,
    'QNLI': QNLI,
    'RTE': RTE
}
for task in os.listdir('/data/xuht/glue/'):
    if task not in task_mapping:
        continue
    api = task_mapping[task](False)
    train_examples = api.get_examples('/data/xuht/glue/'+task, 'train' )
    unlabeled = api.get_unlabeled_examples(train_examples, 0.1)
    labeled = api.get_labeled_examples(train_examples, 0.1)
    for dev in api.get_dev_splits():
        dev_examples = api.get_examples('/data/xuht/glue/'+task, dev )
    for test in api.get_test_splits():
        test_examples = api.get_examples('/data/xuht/glue/'+task, dev )
    
        
    print("** Succeeded in testing:%s **"%(task))


** Succeeded in testing:CoLA **
** Succeeded in testing:WNLI **
** Succeeded in testing:QQP **
** Succeeded in testing:MRPC **
** Succeeded in testing:MNLI **
** Succeeded in testing:STS-B **
** Succeeded in testing:QNLI **
** Succeeded in testing:RTE **


In [219]:
len(unlabeled), len(labeled), len(train_examples)

(2243, 248, 2491)

In [225]:
unlabeled[0].text_a, unlabeled[0].text_b, unlabeled[0].label, unlabeled[0].guid

('Even while accepting the Russian plan, IMF Managing Director Michel Camdessus noted that the efficiency of Russia\'s State Taxation Service "is declining rapidly."',
 'Michel Camdessus is managing director of IMF.',
 'entailment',
 1998)

In [226]:
for example in train_examples:
    if example.guid == 1998:
        print(example)
        break

<__main__.InputExample object at 0x7f57bccf95c0>


In [230]:
example.text_a, example.text_b, example.label_mask

('Even while accepting the Russian plan, IMF Managing Director Michel Camdessus noted that the efficiency of Russia\'s State Taxation Service "is declining rapidly."',
 'Michel Camdessus is managing director of IMF.',
 0)