In [1]:
require "hdf5"
require "optim"
require "nn"

In [3]:
f = hdf5.open("data.hdf5", "r")

X_train = f:read("X_train"):all()
Y_train = f:read("Y_train"):all()
X_valid = f:read("X_valid"):all()
Y_valid = f:read("Y_valid"):all()
X_test = f:read("X_test"):all()
nwords = f:read("nwords"):all()[1]
nclasses = f:read("nclasses"):all()[1]
nfeatures = f:read("nfeatures"):all()[1]

--sentences
X_valid_sen = f:read("X_valid_sen"):all()
X_test_sen = f:read("X_test_sen"):all()

In [4]:
--Fits the count-based model with given smoothing parameters
    --X : sequence features
    --Y : sequence labels
    --alpha1 : additive alpha for class counts
    --alpha2 : additive alpha for class-conditional feature counts
function fit(X, Y, alpha1, alpha2)
    --count matrix of class transitions: p(y_i|y_{i-1},\theta)
    local C_trans = torch.ones(nclasses,nclasses)*alpha1
    --count matrix of class-conditional features: p(x_i|y_i,\theta)
    local C_emi = torch.ones(nclasses, nwords)*alpha2
    
    for i = 2,X:size(1) do
        local y_curr = Y[i]
        local y_prev = Y[i-1]
        local x_curr = X[i]
        C_trans[y_prev][y_curr] = C_trans[y_prev][y_curr] + 1
        C_emi[y_curr][x_curr] = C_emi[y_curr][x_curr] + 1
    end
    return C_trans, C_emi
end

--Returns a distribution over the various tags
    --y_prev : previous class tag
    --x_curr : current feature 
    --C_trans : transition count matrix
    --C_emi : emission count matrix
function predict_distri(y_prev, x_curr, C_trans, C_emi)
    --compute transition and emission distributions
    local trans = C_trans[y_prev]/torch.sum(C_trans[y_prev])
    local emi = C_emi[{{},{x_curr,x_curr}}]/torch.sum(C_emi[{{},{x_curr,x_curr}}])
    return torch.log(trans) + torch.log(emi)
end

In [5]:
--fits the count-based model with given smoothing parameters
ct, ce = fit(X_train, Y_train, 1, 1)

In [7]:
p = predict_distri(2, 15, ct, ce)

In [8]:
p

 -0.6424
 -6.3617
-13.5395
-13.5395
-12.8463
-13.5395
-13.5395
 -9.0968
-13.5395
[torch.DoubleTensor of size 9]



In [17]:
X_test_sen:size()

 1646
  110
[torch.LongStorage of size 2]



In [18]:
X_train:size()

 52616
[torch.LongStorage of size 1]



In [20]:
ce:size()

     9
 13428
[torch.LongStorage of size 2]

