In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import beta
%matplotlib inline  

In [2]:
n = 5001 # number of observations in the LB
T = 500 # number of submission 
batch = 20 
p_y = 0.5 # prior distribution for y, true labels

In [3]:
# submission matrix. hyper-parameters
Phat_uniform = np.random.rand(n,T)
Phat_beta = np.maximum(np.minimum(beta.rvs(0.5,0.5,size=(n,T)),1-1e-4),1e-4)

In [4]:
def GeneratorFun(batch,Phat,p_y,nStandardScaler=5000):
    # return a simulator for simulating Y and LB-scores
    # nStandardScaler is the number of obs used to standardize score
    n = Phat.shape[0]
    logP = np.log(Phat)
    log1_P = np.log(1-Phat)
    Y = np.random.rand(nStandardScaler,n)>p_y
    score = (np.dot(Y,logP) + np.dot((1-Y),log1_P))/n    
    mean_s, std_s = np.mean(score,0), np.std(score,0)
    mean_y, std_y = np.mean(Y,0), np.std(Y,0)
    def Generator():
        Y = np.random.rand(batch,n)>p_y
        score = (np.dot(Y,logP) + np.dot((1-Y),log1_P))/n
        score = (score - mean_s)/std_s
        Y = (Y - mean_y)/std_y
        return Y.astype(np.float32),score
    return Generator

In [5]:
gen1 = GeneratorFun(batch,Phat_uniform,p_y)

Build graph for masked autoencoder for distribution estimation

In [6]:
learning_rate = 1e-2
epoch = 100
depth = 3
hiddenLayersShape = [T+n,2*n,n,n]
actFun = [tf.nn.relu,tf.nn.relu,tf.nn.sigmoid]

In [7]:
tf.reset_default_graph()
score = tf.placeholder(tf.float32, [batch, T], name='score')
Y = tf.placeholder(tf.float32, [batch, n], name='Y')
masks = [tf.placeholder(tf.float32, [hiddenLayersShape[i],hiddenLayersShape[i+1]]) for i in range(depth)]

In [8]:
X = tf.concat([score,Y],1)

In [9]:
for i in range(depth):
    weights = tf.Variable(tf.truncated_normal([hiddenLayersShape[i],hiddenLayersShape[i+1]],
                            stddev=1.0 / np.sqrt(hiddenLayersShape[i])),name='weights_'+str(i))
    biases = tf.Variable(tf.zeros([hiddenLayersShape[i+1]]),
                         name='biases_'+str(i))
    X = actFun[i](tf.matmul(X, weights*masks[i]) + biases)

In [10]:
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=Y,logits=X))

In [11]:
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

<br>

Sample the masks

In [12]:
masks_np,masks_num = [],[]

In [13]:
masks_num.append(np.array(T*[0]+range(1,n+1)))
low = 0
high = n-1

In [14]:
for i in range(depth-1):
    masks_num.append(np.random.randint(low,high,hiddenLayersShape[i+1]))
    low = np.min(masks_num[-1])

In [15]:
masks_num.append(np.array(range(n)))

In [16]:
for i in range(depth):
    masks_np.append((np.reshape(masks_num[i],[-1,1])<=masks_num[i+1]).astype(np.float32))

Training

In [17]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [18]:
base_dict = {m:m_np for m,m_np in zip(masks,masks_np)}

In [19]:
for i in range(epoch):
    y_np,score_np = gen1()
    feed_dict = {score:score_np, Y:y_np}
    feed_dict.update(base_dict)
    _ = sess.run([train_op],feed_dict)

    if i%10 == 0:
        y_np,score_np = gen1()
        feed_dict = {score:score_np, Y:y_np}
        feed_dict.update(base_dict)
        loss_val = sess.run([loss],feed_dict)

        print "iteration:{}, Val loss:{}".format(i,loss_val[0])

iteration:0, Val loss:[0.73246992]
iteration:10, Val loss:[0.69504511]
iteration:20, Val loss:[0.69408321]
iteration:30, Val loss:[0.69398314]
iteration:40, Val loss:[0.69369155]
iteration:50, Val loss:[0.69365662]
iteration:60, Val loss:[0.69355166]
iteration:70, Val loss:[0.69373757]
iteration:80, Val loss:[0.69364828]
iteration:90, Val loss:[0.6935277]
