In [1]:
import numpy as np
import scipy as sp
import pandas as pd
from sys import path
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [2]:
from hsvi.tensorflow import Hierarchy_SVI
from hsvi.tensorflow.distributions import Gamma, Normal,TransformedDistribution,Bernoulli

In [3]:
ds = tf.contrib.distributions

In [4]:
def simulator(N,M,T,r_per_p):
    reviews = pd.DataFrame(columns=['pid','rid','score'])
    pid = np.arange(N)
    reviews.pid = np.repeat(pid,r_per_p) # paper id of each review
    reviews.rid = np.random.choice(M,size=r_per_p*N) # reviewer id of each review
    #reviews.score = np.random.choice(np.arange(1,T+1),size=R)  #score 

    quality = np.random.normal(T/2+1,1., size=N)
    bias = np.random.normal(loc=0.,scale=0.5,size=[M,1])
    theta0 = np.array(np.arange(T)+1,ndmin=2,dtype=np.float32)
    theta0 = np.repeat(theta0,M,axis=0)

    delta = np.repeat(quality[reviews.pid].reshape(-1,1), T,axis=1) - (theta0 + np.repeat(bias,T,axis=1))[reviews.rid]

    y = (delta >= 0).astype(np.float32)

    reviews.score = y.sum(axis=1)
    return quality, bias, y, reviews

In [5]:
data_type = 'simulation' ## can be simulation or file

## Load and preprocess data

In [6]:
if data_type == 'simulation':
    N = 2000 # number of papers
    M = 100  # number of reviewers
    T = 4   # number of score levels
    r_per_p = 4 # number of reviews per submission
    true_quality, true_bias, y_data, reviews = simulator(N,M,T,r_per_p)
    id_map = pd.DataFrame(index=reviews.pid)
    id_map['id'] = reviews.pid.values
    y_data = y_data.transpose()
else:
    data = pd.read_csv('./review_data.csv')
    ### form each entry as paper ID, reviewer ID, and score given by the reviewer to the paper ###
    reviews = pd.DataFrame(columns=['pid','rid','score'])
    reviews.pid = np.repeat(data.PaperID.values,2)
    for s in data.PaperID.values:
        reviews.loc[reviews.pid==s,'rid'] = data.loc[data.PaperID==s,['Rev1ID','Rev2ID']].values
        reviews.loc[reviews.pid==s,'score'] = data.loc[data.PaperID==s,['Rev1Score','Rev2Score']].values

    ### transform paper ID and reviewer ID to numbers ###
    reviews.pid = reviews.pid.map(lambda x: int(x[1:])-1)
    reviews.rid = reviews.rid.map(lambda x: int(x[1:])-1)
    ### generate mapping from pid to concecutive ID ###
    pid = data.PaperID.map(lambda x: int(x[1:])-1)
    id_map = pd.DataFrame(index=pid)
    id_map['id'] = data.index.values
    
    ### define hyper-parameters according to the data set ###
    N = data.shape[0] #number of papers
    R = reviews.shape[0] #number of reviews
    M = len(reviews.rid.unique()) #number of reviewers
    T = reviews.score.values.max() #number of score levels
    r_per_p = 2 # number of reviews per submission
    
    ### generate ovservations of y ###
    y_data = np.ones((R,T))*np.arange(T)+1
    y_data = (y_data <= reviews.score.values.reshape(-1,1)).astype(dtype=np.int32)
    y_data = y_data.transpose()


In [7]:
niter = 3000 # number of training iterations
local_iter = 1 # number of local iterations

## Define the Reviewer-Bias IRT model

In [8]:
p_bias = Normal(0.,1.) # prior of bias
m = reviews.score.mean().astype(np.float32) # empirical mean of score level 
p_quality = Normal(loc=m,scale=2.,sample_shape=[N]) # prior mean set to empirical mean

theta0 = np.array(np.arange(T)+1,ndmin=2,dtype=np.float32)
theta0= np.repeat(theta0,M,axis=0)

if data_type == 'simulation':
    idx = reviews.pid.values
else:
    idx = id_map.loc[reviews.pid.values,'id'].values

with tf.variable_scope('reviewer'):
    q_bias = Normal(loc=tf.Variable(tf.random_normal([M],0.,0.2)),scale=tf.nn.softplus(tf.Variable(tf.ones([M])))) # posterior of bias
    theta = Normal(loc=tf.transpose(theta0)+q_bias,scale=1.) # generate theta by bias
with tf.variable_scope('paper'):
    #idx = id_map.loc[reviews.pid.values,'id'].values
    q_quality = Normal(loc=tf.Variable(p_quality),scale=tf.nn.softplus(tf.Variable(tf.ones_like(p_quality))),name='qquality') # posterior of quality
    score_quality = tf.gather(q_quality,idx) # quality to each paper 
    
roft = tf.gather(tf.transpose(theta.loc),reviews.rid.values)
d = Normal(score_quality-tf.transpose(roft), tf.sqrt(tf.square(theta.scale)+tf.square(tf.gather(q_quality.scale,idx))))
y = Bernoulli(1.-d.cdf(0.))

## Define inference method for the model

In [9]:
inference = Hierarchy_SVI(latent_vars={'reviewer':{p_bias:q_bias},'paper':{p_quality:q_quality}},data={'reviewer':{y:y_data},'paper':{y:y_data}},train_size=N*r_per_p)

start init hsvi
reviewer KLqp
paper KLqp
config optimizer in scope reviewer
config optimizer in scope paper


## Training process

In [10]:
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
tf.global_variables_initializer().run(session=sess)
for _ in range(niter):
        
    for __ in range(local_iter):
        info_dict = inference.update(scope='paper',sess=sess)
    info_dict = inference.update(scope='reviewer',sess=sess)
    if (_+1)%500==0 or _==0:
        print(info_dict['loss'])

0.6331052
0.6095075
0.6047498
0.60610044
0.60532326
0.6044391
0.6059532


## Check results

In [11]:
### inferred bias of each reviewer ###
rbias = sess.run(q_bias.loc)
rb = pd.DataFrame(columns=['RVID','bias'])
rb.RVID = np.arange(M)
rb.bias = rbias
if data_type == 'simulation':
    rb['true_bias'] = true_bias
rb

Unnamed: 0,RVID,bias,true_bias
0,0,-0.874883,-0.570505
1,1,0.442532,0.221209
2,2,0.850857,0.597326
3,3,1.406914,0.977837
4,4,-1.127512,-0.881207
...,...,...,...
95,95,0.488256,0.397564
96,96,-0.303279,0.068354
97,97,-0.061103,-0.067042
98,98,-0.745966,-0.252379


In [12]:
### inferred quality of papers ###
quality=sess.run(q_quality.loc)
if data_type == 'simulation':
    qlt = pd.DataFrame(columns=['PID','quality','true_quality'])
    qlt.PID = np.arange(N)
    qlt.true_quality = true_quality
else:
    qlt = pd.DataFrame(columns=['PID','quality','avg_score'])
    qlt.PID = id_map.index.values
qlt.quality = quality
### compare the quality with average score ###
for i in qlt.PID:
    qlt.loc[qlt.PID==i,'avg_score'] = reviews.loc[reviews.pid==i,'score'].mean()

qlt.head()

Unnamed: 0,PID,quality,true_quality,avg_score
0,0,2.835375,3.523021,3.0
1,1,2.168483,2.088381,1.5
2,2,2.347447,2.486145,1.75
3,3,1.520859,0.610126,0.25
4,4,3.207958,4.661201,3.75
