In [None]:
import tensorflow as tf

In [None]:
import os
import sys
sys.path.insert(0, "/Users/xinran.he/GitProjects/mahjong")

from model.embedding_layer import EmbeddingSharedWeights
from model.input_ops import get_feature_seq_embedding
from model.dataset import input_function
from model.params import PARAMETERS

In [None]:
TRAINING_DATA_PATTERN = "/Users/xinran.he/GitProjects/mahjong/data/tfrecord/20180101.gz"

In [None]:
tf.reset_default_graph()
with tf.Session() as sess:
    features, labels = input_function([TRAINING_DATA_PATTERN], True, PARAMETERS)()
    
    pos_emb_layer = EmbeddingSharedWeights("pos_emb", 40, 32)
    hai_emb_layer = EmbeddingSharedWeights("pos_emb", 71, 32)
    feature_emb = get_feature_seq_embedding(features)
    pos_emb = pos_emb_layer(tf.sparse.to_dense(features["pos_seq"]))
    hai_emb = hai_emb_layer(tf.sparse.to_dense(features["hai_seq"]))

    sess.run(tf.initializers.global_variables())
    results = sess.run([tf.sparse.to_dense(features["hai_seq"]), feature_emb, pos_emb, hai_emb])
    #f, l = sess.run([features, labels])
    #c_emb = sess.run(context_embedding_mtx)

In [None]:
def get_parse_spec():
    parse_spec = {
        "hai_seq": tf.VarLenFeature(tf.int64),
        "pos_seq": tf.VarLenFeature(tf.int64),
        "feature_seq": tf.VarLenFeature(tf.int64),
        "label": tf.FixedLenFeature([14], tf.float32),
    }
    parse_spec["current_field"] = tf.FixedLenFeature([1], tf.int64)
    parse_spec["round"] = tf.FixedLenFeature([1], tf.int64)
    parse_spec["center_field"] = tf.FixedLenFeature([1], tf.int64)
    parse_spec["center_oya"] = tf.FixedLenFeature([1], tf.int64)
    for pid in xrange(3):
        parse_spec["player%d_oya" % pid] = tf.FixedLenFeature([1], tf.int64)
        parse_spec["player%d_field" % pid] = tf.FixedLenFeature([1], tf.int64)
        parse_spec["player%d_riichi" % pid] = tf.FixedLenFeature([1], tf.int64)
        parse_spec["player%d_claim" % pid] = tf.FixedLenFeature([1], tf.int64)
        parse_spec["player%d_order" % pid] = tf.FixedLenFeature([1], tf.int64)
        parse_spec["player%d_score" % pid] = tf.FixedLenFeature([1], tf.int64)
    return parse_spec

In [None]:
def _embedding_column(key, num_values, num_dim):
    column = tf.feature_column.categorical_column_with_identity(key, num_values)
    column = tf.feature_column.embedding_column(column, num_dim)
    return column

def _numerical_column(key):
    return tf.feature_column.numeric_column(key)

def get_feature_columns():
    columns = {}
    # numerical columns
    columns["center_oya"] = _numerical_column("center_oya")
    for pid in xrange(3):
        columns["player%d_oya" % pid] = _numerical_column("player%d_oya" % pid)
        columns["player%d_riichi" % pid] = _numerical_column("player%d_riichi" % pid)
    
    # individual embedding columes
    columns["round"] = _embedding_column("round", 4, 2)
    for pid in xrange(3):
        columns["player%d_claim" % pid] = _embedding_column("player%d_claim" % pid, 4, 2)
        columns["player%d_order" % pid] = _embedding_column("player%d_order" % pid, 7, 4)
        columns["player%d_score" % pid] = _embedding_column("player%d_score" % pid, 161, 15)
        
    # shared_embedding column
    input_columns = []
    input_columns.append(tf.feature_column.categorical_column_with_identity("current_field", 4))
    input_columns.append(tf.feature_column.categorical_column_with_identity("center_field", 4))
    for pid in xrange(3):
        input_columns.append(tf.feature_column.categorical_column_with_identity("player%d_field" % pid, 4))
    shared_embedding_columns = tf.feature_column.shared_embedding_columns(input_columns, 2)
    columns["current_field"] = shared_embedding_columns[0]
    columns["center_field"] = shared_embedding_columns[1]
    for pid in xrange(3):
        columns["player%d_field" % pid] = shared_embedding_columns[pid + 2]
    return columns

In [None]:
def _tfrecord_parse_fn(example_proto):
    parsed_features = tf.parse_single_example(example_proto, get_parse_spec())
    return parsed_features, parsed_features["label"]

In [None]:
def get_context_embedding_mtx(parsed_features):
    """
    Takes input features and returns the player context features
    embedding lookup matrix.
    
    returns: context_embedding_mtx of size (batch, 4, embedding_dim (32)) 
    """
    all_columns = get_feature_columns()
    features = []
    for pid in xrange(3):
        columns = [
            all_columns["center_oya"],
            all_columns["round"],
            all_columns["current_field"],
            all_columns["center_field"],
            all_columns["player%d_oya" % pid],
            all_columns["player%d_riichi" % pid],
            all_columns["player%d_claim" % pid],
            all_columns["player%d_order" % pid],
            all_columns["player%d_score" % pid],
            all_columns["player%d_field" % pid],
        ]
        features.append(tf.feature_column.input_layer(parsed_features, columns))
    context_embedding_mtx = tf.stack([
        tf.zeros_like(features[0], dtype=tf.float32),
        features[0],
        features[1],
        features[2]
    ])
    context_embedding_mtx = tf.transpose(context_embedding_mtx, perm=[1, 0, 2])
    context_embedding_mtx = tf.reshape(context_embedding_mtx, [-1, tf.shape(context_embedding_mtx)[2]])
    return context_embedding_mtx

In [None]:
def get_feature_seq_embedding(features):
    context_embedding_mtx = get_context_embedding_mtx(features)
    feature_seq = features["feature_seq"]
    dense_feature_seq = tf.sparse.to_dense(feature_seq)
    multiplier = tf.range(0, feature_seq.dense_shape[0]) * 4
    mask = tf.to_int64(tf.not_equal(dense_feature_seq, 0))
    feature_seq = dense_feature_seq + mask * tf.expand_dims(multiplier, axis=1)
    context_seq_embedding = tf.nn.embedding_lookup(context_embedding_mtx, feature_seq)
    return context_seq_embedding

In [None]:
TRAINING_DATA_PATTERN = "/Users/xinran.he/GitProjects/mahjong/data/tfrecord/20180101.gz"

In [None]:
tf.reset_default_graph()
with tf.Session() as sess:
    dataset = tf.data.TFRecordDataset(TRAINING_DATA_PATTERN, compression_type="GZIP")
    dataset = dataset.map(_tfrecord_parse_fn)
    dataset = dataset.batch(2)
    iterator = dataset.make_one_shot_iterator()
    features, labels = iterator.get_next()    
    
    pos_emb_layer = EmbeddingSharedWeights("pos_emb", 40, 32)
    hai_emb_layer = EmbeddingSharedWeights("pos_emb", 71, 32)
    feature_emb = get_feature_seq_embedding(features)
    pos_emb = pos_emb_layer(tf.sparse.to_dense(features["pos_seq"]))
    hai_emb = hai_emb_layer(tf.sparse.to_dense(features["hai_seq"]))

    sess.run(tf.initializers.global_variables())
    results = sess.run([tf.sparse.to_dense(features["hai_seq"]), feature_emb, pos_emb, hai_emb])
    #f, l = sess.run([features, labels])
    #c_emb = sess.run(context_embedding_mtx)

In [None]:
hais = results[3]
print results[0]
print hais[0, 10, :]
print hais[0, 11, :]

In [None]:
print context_seq_embedding[4, :]

In [None]:
print c_emb.shape

In [None]:
print f["player0_oya"]

In [None]:
print f["feature_seq"]

In [None]:
print f["pos_seq"]

In [None]:
tf.reset_default_graph()
with tf.Session() as sess: