In [10]:
require "hdf5"
require "optim"
require "nn"

In [11]:
f = hdf5.open("data.hdf5", "r")

X_train = f:read("X_train"):all()
Y_train = f:read("Y_train"):all()
X_valid = f:read("X_valid"):all()
Y_valid = f:read("Y_valid"):all()
X_test = f:read("X_test"):all()
nwords = f:read("nwords"):all()[1]
nclasses = f:read("nclasses"):all()[1]
nfeatures = f:read("nfeatures"):all()[1]

--sentences
X_valid_sen = f:read("X_valid_sen"):all()
X_test_sen = f:read("X_test_sen"):all()

In [15]:
--Fits the count-based model with given smoothing parameters
--X : sequence features
--Y : sequence labels
--alpha1 : additive alpha for class counts
--alpha2 : additive alpha for class-conditional feature counts
function fit(X, Y, alpha1, alpha2)
    --count matrix of class transitions: p(y_i|y_{i-1},\theta)
    local C_trans = torch.ones(nclasses,nclasses)*alpha1
    --count matrix of class-conditional features: p(x_i|y_i,\theta)
    local C_emi = torch.ones(nclasses, nwords)*alpha2
    
    for i = 2,X:size(1) do 
        local y_curr = Y[i]
        local y_prev = Y[i-1]
        local x_curr = X[i]
        C_trans[y_prev][y_curr] = C_trans[y_prev][y_curr] + 1
        C_emi[y_curr][x_curr] = C_emi[y_curr][x_curr] + 1
    end
    C_trans:cdiv((C_trans:sum(2)):expand(C_trans:size(1),C_trans:size(2)))
    C_emi:cdiv((C_emi:sum(2)):expand(C_emi:size(1),C_emi:size(2)))
    return C_trans:log():t(), C_emi:log():t()
end

-- log-scores of transition and emission
-- corresponds to the vector y in the lecture notes
-- i: timestep for the computed score
function score_hmm(observations, emission, transition, i)
    local observation_emission = (emission[observations[i]]:reshape(nclasses, 1)):expand(nclasses, nclasses)
    return observation_emission + transition
end

In [18]:
-- Viterbi algorithm.
-- observations: a sequence of observations, represented as integers
-- logscore: the edge scoring function over classes and observations in a history-based model
function viterbi(observations, logscore, transition, emission)
    local initial = torch.Tensor({{1.0},{0.0},{0.0},{0.0},{0.0},{0.0},{0.0},{0.0},{0.0}}):log()
    local n = torch.nonzero(observations):size(1)
    --local observations = observations:sub(1,n)
    local max_table = torch.Tensor(n, nclasses)
    local backpointer_table = torch.Tensor(n, nclasses)
    
    -- first timestep
    -- the initial most likely paths are the initial state distribution
    -- NOTE: another unnecessary Tensor allocation here
    local maxes, backpointers = (initial + emission[observations[1]]):max(2)
    max_table[1] = maxes
    
    -- remaining timesteps ("forwarding" the maxes)
    for i=2,n do
        -- precompute edge scores
        local y = logscore(observations, emission, transition, i)
        local scores = y + maxes:view(1, nclasses):expand(nclasses, nclasses)
        -- compute new maxes (NOTE: another unnecessary Tensor allocation here)
        maxes, backpointers = scores:max(2)
        -- record
        max_table[i] = maxes
        backpointer_table[i] = backpointers
    end
    -- follow backpointers to recover max path
    local classes = torch.Tensor(n)
    maxes, classes[n] = maxes:max(1)
    for i=n,2,-1 do
        classes[i-1] = backpointer_table[{i, classes[i]}]
    end
    return classes
end

In [21]:
ct, ce = fit(X_train, Y_train, 1, 1)
index = 2
sen = X_valid_sen[index]:sub(1,torch.nonzero(X_valid_sen[index]):size(1))
print(viterbi(sen, score_hmm, ct, ce))
--print(viterbi(X_valid_sen[20], score_hmm, ct, ce))

 1
 3
 1
[torch.DoubleTensor of size 3]



In [95]:
X_valid_sen[1]:sub(1,torch.nonzero(X_valid_sen[1]):size(1))

    1
 1964
  630
 8763
 6403
 1924
 1617
 8764
 2093
 8765
 8766
   11
[torch.DoubleTensor of size 12]



In [97]:
X_valid_sen[1]

    1
 1964
  630
 8763
 6403
 1924
 1617
 8764
 2093
 8765
 8766
   11
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
    0
[torch.DoubleTensor of size 63]



In [None]:
--Returns a distribution over the various tags
--y_prev : previous class tag
--x_curr : current feature 
--C_trans : transition count matrix
--C_emi : emission count matrix
function predict_distri(y_prev, x_curr, C_trans, C_emi)
    --compute transition and emission distributions
    local trans = C_trans[y_prev]/torch.sum(C_trans[y_prev])
    local emi = C_emi[{{},{x_curr,x_curr}}]/torch.sum(C_emi[{{},{x_curr,x_curr}}])
    return torch.log(trans) + torch.log(emi)
end

--Returns a log-probability table for a given single sentence and labels
--sen : a single sentence with word features
--labels : corresponding target labels for a sentence
--C_trans : transition count matrix
--C_emi : emission count matrix
function predict_table(sen, labels, C_trans, C_emi)
    local table = torch.ones(X_valid_sen:size()[2],nclasses)
    for j=1,sen:size()[1] do
        local feat = sen[j]
        --end of sentence
        if feat == 0 then
            break
        end
        table[j] = predict_distri(2, 15, C_trans, C_emi)
    end
    return table:t()
end

function viterbi(proba_table)
    
end