In [1]:
import numpy as np
import scipy as sp
import pandas as pd
from sys import path
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [2]:
from hsvi.tensorflow import Hierarchy_SVI
from hsvi.tensorflow.distributions import Gamma, Normal,TransformedDistribution,Bernoulli

In [3]:
ds = tf.contrib.distributions

## Load and preprocess data

In [4]:
data = pd.read_csv('./review_data.csv')
data.head()

Unnamed: 0,PaperID,Rev1ID,Rev1Score,Rev2ID,Rev2Score
0,p1,r3,2,r10,3
1,p2,r3,3,r12,3
2,p3,r3,2,r10,4
3,p4,r3,2,r13,3
4,p6,r4,2,r6,3


In [5]:
### form each entry as paper ID, reviewer ID, and score given by the reviewer to the paper ###
reviews = pd.DataFrame(columns=['pid','rid','score'])
reviews.pid = np.repeat(data.PaperID.values,2)
for s in data.PaperID.values:
    reviews.loc[reviews.pid==s,'rid'] = data.loc[data.PaperID==s,['Rev1ID','Rev2ID']].values
    reviews.loc[reviews.pid==s,'score'] = data.loc[data.PaperID==s,['Rev1Score','Rev2Score']].values

### transform paper ID and reviewer ID to numbers ###
reviews.pid = reviews.pid.map(lambda x: int(x[1:])-1)
reviews.rid = reviews.rid.map(lambda x: int(x[1:])-1)

reviews.head()    

Unnamed: 0,pid,rid,score
0,0,2,2
1,0,9,3
2,1,2,3
3,1,11,3
4,2,2,2


In [6]:
### generate mapping from pid to concecutive ID ###
pid = data.PaperID.map(lambda x: int(x[1:])-1)
id_map = pd.DataFrame(index=pid)
id_map['id'] = data.index.values
id_map

Unnamed: 0_level_0,id
PaperID,Unnamed: 1_level_1
0,0
1,1
2,2
3,3
5,4
...,...
228,214
229,215
230,216
231,217


In [7]:
### define hyper-parameters according to the data set ###
S = data.shape[0] #number of submissions
R = reviews.shape[0] #number of reviews
J = len(reviews.rid.unique()) #number of reviewers
T = reviews.score.values.max() #number of score levels
r_per_s = 2 # number of reviews per submission

In [8]:
niter = 500 # number of training iterations
local_iter = 2 # number of local iterations

## Define the Reviewer-Bias IRT model

In [9]:
bias = Normal(0.,.5) # prior of bias
m = reviews.score.mean().astype(np.float32) # empirical mean of score level 
quality = Normal(loc=m,scale=1.,sample_shape=[S]) # prior mean set to empirical mean

theta0 = np.array(np.arange(T)+1,ndmin=2,dtype=np.float32)
theta0= np.repeat(theta0,J,axis=0)

with tf.variable_scope('reviewer'):
    q_bias = Normal(loc=tf.Variable(tf.random_normal([J])),scale=tf.nn.softplus(tf.Variable(tf.ones([J])))) # posterior of bias
    theta = Normal(loc=tf.transpose(theta0)+q_bias,scale=1.) # generate theta by bias
with tf.variable_scope('paper'):
    q_quality = Normal(loc=tf.Variable(quality),scale=tf.nn.softplus(tf.Variable(tf.ones_like(quality))),name='qquality') # posterior of quality
    score_quality = tf.gather(q_quality,id_map.loc[reviews.pid.values,'id'].values) # quality to each paper 
    score_precision = 4.
    score = Normal(loc=score_quality,scale=tf.sqrt(1./score_precision)) # posterior of score
    
roft = tf.gather(tf.transpose(theta.loc),reviews.rid.values)
d = Normal(score.loc-tf.transpose(roft), tf.sqrt(tf.square(theta.scale)+tf.square(score.scale)))
y = Bernoulli(1.-d.cdf(0.))

In [10]:
### generate ovservations of y ###
y_data = np.ones((R,T))*np.arange(T)+1
y_data = (y_data <= reviews.score.values.reshape(-1,1)).astype(dtype=np.int32)
y_data = y_data.transpose()
y_data[:,:5]

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [0, 1, 1, 1, 0],
       [0, 0, 0, 0, 0]], dtype=int32)

## Define inference method for the model

In [11]:
inference = Hierarchy_SVI(latent_vars={'reviewer':{bias:q_bias},'paper':{quality:q_quality}},data={'reviewer':{y:y_data},'paper':{y:y_data}})

start init hsvi
reviewer KLqp
paper KLqp
config optimizer in scope reviewer
config optimizer in scope paper


## Training process

In [12]:
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
tf.global_variables_initializer().run(session=sess)
for _ in range(niter):
        
    for __ in range(local_iter):
        info_dict = inference.update(scope='paper',sess=sess)
    info_dict = inference.update(scope='reviewer',sess=sess)
    if (_+1)%100==0 or _==0:
        print(info_dict['loss'])

0.58878285
0.5358701
0.507748
0.5134309
0.5162466
0.5052854


## Check results

In [13]:
### inferred bias of each reviewer ###
rbias = sess.run(q_bias.loc)
rb = pd.DataFrame(columns=['RVID','bias'])
rb.RVID = np.argsort(rbias)+1
rb.bias = np.sort(rbias)
rb

Unnamed: 0,RVID,bias
0,1,-0.533729
1,12,-0.501524
2,10,-0.405264
3,6,-0.343962
4,5,-0.295789
5,8,-0.280845
6,7,-0.206306
7,2,-0.197351
8,14,-0.170828
9,4,-0.160773


In [14]:
### inferred quality of papers ###
quality=sess.run(q_quality.loc)
qlt = pd.DataFrame(columns=['PID','quality','avg_score'])
qlt.PID = id_map.index.values
qlt.quality = quality
### compare the quality with average score ###
for i in qlt.PID:
    qlt.loc[qlt.PID==i,'avg_score'] = reviews.loc[reviews.pid==i,'score'].mean()
qlt.PID = qlt.PID+1
qlt.head()

Unnamed: 0,PID,quality,avg_score
0,1,2.979479,2.5
1,2,3.061986,3.0
2,3,3.280523,3.0
3,4,2.945215,2.5
4,6,3.019066,2.5
