In [1]:
require("optim")
require('hdf5')

<h3>Multinomial Logistic Regression - LBFGS Minibatch - L2 Norm</h3>

In [37]:
neval = 0

function ml(W, X, Y)
    local W = W:reshape(Y:size(2), X:size(2)+1)
    
    --intercept
    local b = W:sub(1, W:size(1), W:size(2),W:size(2)):t()
    
    --coefficient
    W = W:sub(1, W:size(1),1,W:size(2)-1)
    
    --XW^T
    local p = X*W:t()
    
    --XW^T + b
    p:add(b:expand(b,p:size(1),b:size(2)))
    
    local arr = p:clone()
    arr = arr:t()
    
    --predicted Z scores for y_hat
    local vmax = arr:max(1)
    local evmax = torch.expand(vmax,arr:size(1),vmax:size(2))
    arr:csub(evmax)
    arr:exp()
    arr = arr:sum(1)
    arr:log()
    arr:add(vmax)
    arr = arr:t()
    arr:expand(arr, arr:size(1), p:size(2))
    p:csub(arr)
    
    --L2 regularization
    local norm = W:reshape(W:size(1)*W:size(2), 1)
    
    local loss = (torch.sum(torch.cmul(Y,p))*-1) + 1.0 *0.5 * torch.dot(norm, norm)
    
    p:exp()
    
    return loss, p, W
end

function mlg(W, X, Y, bsize)

    local bsize = 1000
    
    --random ordering of ints [1,nexamples] and take first bsize
    local idx = torch.randperm(X:size(1)):sub(1,bsize)
    
    --training minibatches
    local X_batch = torch.Tensor(bsize, X:size(2))
    local Y_batch = torch.Tensor(bsize, Y:size(2))
    
    for i=1,bsize do
        X_batch[i] = X[idx[i]]
        Y_batch[i] = Y[idx[i]]
    end

    --initialize gradient
    local grad = torch.zeros(Y_batch:size(2), X_batch:size(1)+1)
    
    --calculate loss, updated weight matrix
    local loss, p, W = ml(W, X_batch, Y_batch)
    local diff = torch.csub(p,Y_batch)
 
    local grad = diff:t()*X_batch

    grad:add(W)
    grad = grad:cat(torch.zeros(grad:size(1),1), 2)
    grad:sub(1, grad:size(1), grad:size(2), grad:size(2)):add(diff:sum(1))
    neval = neval + 1
    print(neval, loss)
    return loss, grad:reshape(grad:size(1)*grad:size(2), 1)
end


function fit(X, Y, rate, iter, lX, batch)
    --Weight matrix must be passed in as vector
    local W = torch.zeros(Y:size(2) * (X:size(2)+1), 1)   
    
    --define local function for optimization
    local func = function(W)
        loss, grad = mlg(W, X, Y, batch)
        return loss, grad
    end
    
    --optimization parameters
    local state = {learningRate = rate, maxIter=iter, tolX=lX}
    
    --LBFGS with no line search, therefore specify learning rate
    W, f_hist, currentFuncEval = optim.lbfgs(func, W, state)
    
    W = W:reshape(Y:size(2), X:size(2)+1)
    
    --intercept
    b = W:sub(1, W:size(1), W:size(2), W:size(2))
    
    --coefficients
    W = W:sub(1, W:size(1), 1, W:size(2)-1)
    
    return W, b
end

function predict(X, W, b)
    local b = b:t()
    return (X*W:t()):add(b:expand(b, X:size(1), b:size(2)))
end

function predict_score(ypred, ytrue)
    local c = 0
    for i=1,ypred:size(1) do
        if ypred[i][1] == ytrue[i][1] then
            c = c + 1       
        end
    end
    return c/ypred:size(1)
end

<h3>Create Document Word Matrix and One Hot Encoding</h3>

In [3]:
--feature weight: counts
function createDocWordMatrix(vocab, max_sent_len, sparseMatrix)
    docword = torch.zeros(sparseMatrix:size(1), vocab)
    for i=1,sparseMatrix:size(1) do
        for j=1, max_sent_len do
            local idx = (sparseMatrix[i][j])
            if idx ~= 0 then
                docword[i][idx] = 1 + docword[i][idx]
            end
        end
    end
    return docword
end
 
function onehotencode(classes, target)
    onehot = torch.zeros(target:size(1), classes)
    for i=1,target:size(1) do
        onehot[i][target[i]] = 1
    end
    return onehot
end

function write2file(fname, pred) 
    f = io.open(fname, "w")
    f:write("ID,Category\n")
    for i=1,pred:size(1) do
        f:write(tostring(i) .. "," .. tostring(pred[i][1]) .. "\n")
    end
    f:close()
end

In [23]:
f = hdf5.open("SST1.hdf5", "r")

X_train = f:read("train_input"):all()
Y_train = f:read("train_output"):all()
X_valid = f:read("valid_input"):all()
Y_valid = f:read("valid_output"):all()
X_test = f:read("test_input"):all()
nclasses = f:read('nclasses'):all():long()[1]
nfeatures = f:read('nfeatures'):all():long()[1]

f:close()

In [24]:
X_train =createDocWordMatrix(nfeatures, 53, X_train)
Y_train = onehotencode(nclasses, Y_train)
X_test = createDocWordMatrix(nfeatures, 53, X_valid)
Y_test = onehotencode(nclasses, Y_valid)

In [38]:
start_time = os.time()
W, b = fit(X_train, Y_train, 0.1, 1, 100)
end_time = os.time()
print(end_time - start_time)

[string "neval = 0..."]:52: bad argument #2 to 'sub' (number expected, got nil)
stack traceback:
	[C]: in function 'sub'
	[string "neval = 0..."]:52: in function 'mlg'
	[string "neval = 0..."]:87: in function 'opfunc'
	/Users/Vincent/torch/install/share/lua/5.1/optim/lbfgs.lua:66: in function 'lbfgs'
	[string "neval = 0..."]:95: in function 'fit'
	[string "start_time = os.time()..."]:2: in main chunk
	[C]: in function 'xpcall'
	/Users/Vincent/torch/install/share/lua/5.1/itorch/main.lua:179: in function </Users/Vincent/torch/install/share/lua/5.1/itorch/main.lua:143>
	/Users/Vincent/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	...s/Vincent/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	...s/Vincent/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	...s/Vincent/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	/Users/Vincent/torch/install/share/lua/5.1/itorch/main.lua:350: in main chunk
	[C]: in function 'require'
	(command line):1: in main chunk
	[C]: at 0x010d120bd0: 

In [28]:
Y_pred = predict(X_test, W, b)
_, Y_pred = torch.max(Y_pred, 2)
_,Y_true = torch.max(Y_test, 2)
acc_score = predict_score(Y_pred, Y_true)
print(acc_score)

0.37511353315168	


In [22]:
write2file("MNB_7.csv", Y_pred)


