In [1]:
require "hdf5"
require "optim"
require "nn"

In [2]:
f = hdf5.open("data.hdf5", "r")

X_train = f:read("X_train"):all()
Y_train = f:read("Y_train"):all()
X_valid = f:read("X_valid"):all()
Y_valid = f:read("Y_valid"):all()
X_test = f:read("X_test"):all()
nwords = f:read("nwords"):all()[1]
nclasses = f:read("nclasses"):all()[1]

--a minor hack to avoid changing variable names
C = nclasses
nfeatures = f:read("nfeaturesHMM"):all()[1]

--sentences
X_valid_sen = f:read("X_valid_sen"):all()
X_test_sen = f:read("X_test_sen"):all()

In [29]:
--Fits the count-based model with given smoothing parameters
--X : sequence features
--Y : sequence labels
--alpha1 : additive alpha for class counts
--alpha2 : additive alpha for class-conditional feature counts
function hmm_fit(X, Y, alpha1, alpha2)
    --count matrix of class transitions: p(y_i|y_{i-1},\theta)
    local C_trans = torch.ones(nclasses,nclasses)*alpha1
    --count matrix of class-conditional features: p(x_i|y_i,\theta)
    local C_emi = torch.ones(nclasses, nwords)*alpha2
    for i = 2,X:size(1) do 
        local y_curr = Y[i]
        local y_prev = Y[i-1]
        local x_curr = X[i][1]
        C_trans[y_prev][y_curr] = C_trans[y_prev][y_curr] + 1
        C_emi[y_curr][x_curr] = C_emi[y_curr][x_curr] + 1
    end
    C_trans:cdiv((C_trans:sum(2)):expand(C_trans:size(1),C_trans:size(2)))
    C_emi:cdiv((C_emi:sum(2)):expand(C_emi:size(1),C_emi:size(2)))
    return C_trans:log():t(), C_emi:log():t()
end

-- log-scores of transition and emission
-- corresponds to the vector y in the lecture notes
-- i: timestep for the computed score
function score_hmm(observations, i, trans, emi)
    local observation_emission = emi[observations[i]]:reshape(C, 1):expand(C, C)
    -- NOTE: allocates a new Tensor
    return observation_emission + trans
end

-- Viterbi algorithm.
-- observations: a sequence of observations, represented as integers
-- logscore: the edge scoring function over classes and observations in a history-based model
function viterbi(observations, logscore, trans, emi)
    local initial = torch.zeros(nclasses,1) + .000001
    initial[8] = 1.0
    initial = initial / torch.sum(initial)
    
    local n = observations:size(1)
    local max_table = torch.Tensor(n, C)
    local backpointer_table = torch.Tensor(n, C)
    -- first timestep
    -- the initial most likely paths are the initial state distribution
    -- NOTE: another unnecessary Tensor allocation here
    local maxes, backpointers = torch.log(initial):max(2)--(init+ emi[observations[1]]):max(2)
    max_table[1] = maxes
    -- remaining timesteps ("forwarding" the maxes)
    for i=2,n do
        -- precompute edge scores
        y = logscore(observations, i, trans, emi)
        scores = y + maxes:view(1, C):expand(C, C)
        -- compute new maxes (NOTE: another unnecessary Tensor allocation here)
        maxes, backpointers = scores:max(2)
        -- record
        max_table[i] = maxes
        backpointer_table[i] = backpointers
        end
    -- follow backpointers to recover max path
    local classes = torch.Tensor(n)
    maxes, classes[n] = maxes:max(1)
    for i=n,2,-1 do
        classes[i-1] = backpointer_table[{i, classes[i]}]
    end
    return classes:sub(1,-1)
end

-- Returns a table of predicted tags for each sequence
-- X : tensor of sequences by word features
function predict_tags(X, ct, ce)
    local predictions = torch.zeros(X:size(1), X:size(2))
    for i=1,X:size(1) do
        local sen = (X[i]:sub(1,torch.nonzero(X[i]):size(1))):squeeze()
        
        local p = viterbi(sen, score_hmm, ct, ce)
        predictions[{{i,i},{1,p:size(1)}}] = p
    end
    return predictions
end

-- Returns validation tags in format similar to p_tags
-- valid : correct tags in sequence
function format_valid(valid)
    local y_tags = torch.zeros(X_valid_sen:size(1), X_valid_sen:size(2))
    local sen_idx = 0
    local col_idx = 0
    for i=1,valid:size(1) do
        if valid[i] == 8 then
            sen_idx = sen_idx + 1
            col_idx = 1
        end
        y_tags[sen_idx][col_idx] = valid[i]
        col_idx = col_idx + 1
    end
    return y_tags
end

-- Returns predicted fscore for supervised data
-- p_tags : tensor of predicted tags for each sequence
-- y_tags : labeled tags for each sequence
function predict_fscore(p_tags, y_tags)
    local sen_idx = 0
end

-- Writes to file in Kaggle prediction format
-- p : prediction tensor
-- fname : output filename
function kaggle_format(p, fname)
    local f = io.open("predictions/" .. fname, "w")
    f:write("ID,Labels\n")
    for i=1,p:size(1) do
        local s = ""
        local t = {[1]="O",[2]="PER",[3]="LOC",[4]="ORG",[5]="MISC",[6]="MISC",[7]="LOC"}
        local prev_tag = 0
        local prev_idx = 0
        for j=2,p:size(2) do
            if p[i][j] == 0 then
                break
            end
            if p[i][j] ~= 1 then
                if prev_tag == 0 then
                    s = s .. t[p[i][j]] .. "-" .. tostring(j-1)
                elseif p[i][j] == prev_tag and prev_idx == j-1 then
                    s = s .. "-" .. tostring(j-1) 
                else
                    s = s .. " " .. t[p[i][j]] .. "-" .. tostring(j-1)
                end
                prev_tag = p[i][j]
                prev_idx = j
            end
        end
        f:write(i .. "," .. s .. "\n")
    end
    f:close()
end

In [34]:
ct, ce = hmm_fit(X_train, Y_train, 1, 1)
p_tags = predict_tags(X_valid_sen, ct, ce)
y_tags = format_valid(Y_valid)

In [33]:
kaggle_format(p_tags, "pred_" .. tostring(os.time()) .. "_hmm.csv")




In [11]:
predict_fscore(p_tags, Y_valid)




In [36]:
y_tags:sub(1,1)

Columns 1 to 26
 8  1  1  4  1  1  1  1  1  1  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0

Columns 27 to 52
 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0

Columns 53 to 63
 0  0  0  0  0  0  0  0  0  0  0
[torch.DoubleTensor of size 1x63]



In [37]:
p_tags:sub(1,1)

Columns 1 to 26
 8  1  1  1  1  1  1  1  1  1  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0

Columns 27 to 52
 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0

Columns 53 to 63
 0  0  0  0  0  0  0  0  0  0  0
[torch.DoubleTensor of size 1x63]

