In [29]:
require 'nn'
require 'image'
require 'cutorch'
require 'cunn'
require 'optim'
require 'csvigo'

In [30]:
torch.setdefaulttensortype('torch.FloatTensor')




Prepare data:

In [9]:
function load_train_dataset(path)
    local raw_data = csvigo.load{path = path, mode = 'raw'} 
    local loaded_data = {}
    
    local data = torch.Tensor(#raw_data - 1, #raw_data[1])
    
    for i = 2, #raw_data do
        data[i - 1] = torch.Tensor(raw_data[i])
    end
    
    local labels = data[{{}, 1}]
    labels[torch.eq(labels, 0)] = 10
    local predictors = normalize(data[{{}, {2, -1}}])
    
    return {labels=labels,
            predictors=predictors,
            size=function() return (#labels)[1] end}
end

function load_test_dataset(path, train_data)
    local raw_data = csvigo.load{path = path, mode = 'raw'} 
    local loaded_data = {}
    
    local data = torch.Tensor(#raw_data - 1, #raw_data[1])
    
    for i = 2, #raw_data do
        data[i - 1] = torch.Tensor(raw_data[i])
    end
    
    mean = train_data.predictors:mean()
    std = train_data.predictors:std()
    local predictors = normalize(data, mean, std)
    
    return {predictors=predictors,
            size=function() return (#labels)[1] end}
end

function normalize(data, mean, std)
    local mean = mean or data:mean()
    local std = std or data:std()
    return (data - mean)/std
end

train_data = load_train_dataset('~/Downloads/train.csv')
test_data = load_test_dataset('~/Downloads/test.csv', train_data)

<csv>	parsing file: /Users/vitillo/Downloads/train.csv	


<csv>	parsing done	


<csv>	parsing file: /Users/vitillo/Downloads/test.csv	


<csv>	parsing done	


Setup model:

In [10]:
function setup_model(use_cuda)
    local model = nn.Sequential()
    model:add(nn.View(1, 28, 28))
    model:add(nn.SpatialConvolutionMM(1, 8, 5, 5, 1, 1, 2))
    model:add(nn.ReLU())
    model:add(nn.SpatialMaxPooling(2,2,2,2))
    
    model:add(nn.SpatialConvolutionMM(8, 16, 5, 5, 1, 1, 2))
    model:add(nn.ReLU())
    model:add(nn.SpatialMaxPooling(3,3,3,3))
    
    model:add(nn.View(16*4*4))
    model:add(nn.Linear(16*4*4, 10))
    model:add(nn.LogSoftMax())

    local criterion = nn.ClassNLLCriterion()
    if use_cuda then
        model:cuda()
        criterion:cuda()
    end
    return model, criterion
end

model, criterion = setup_model(false)

In [13]:
model = torch.load("mnist_model")  -- load pre-trained model if available

Train!

In [31]:
classes = {'1','2','3','4','5','6','7','8','9','0'}

function minibatch(train_data, shuffle, t, batchSize)
    local inputs = {}
    local targets = {}
    
    for i = t,math.min(t + batchSize - 1, train_data:size()) do
        local input = train_data.predictors[shuffle[i]]
        if useCuda then input = input:cuda() end
        local target = train_data.labels[shuffle[i]]
        table.insert(inputs, input)
        table.insert(targets, target)
    end
    
    return inputs, targets
end
function train_epoch(train)
    epoch = epoch or 1
    
    local time = sys.clock()
    local shuffle = torch.randperm(train:size())
    local parameters, gradParameters = model:getParameters()
    local confusion = optim.ConfusionMatrix(classes)
    
    local batchSize = 25
    local optimState = {
        learningRate = 1e-2,
        weightDecay = 0.001,
        momentum = 0.9,
        learningRateDecay = 1e-7
    }
    
    print("==> online epoch # " .. epoch .. ' [batchSize = ' .. batchSize .. ']')
    
    for t = 1,train:size(),batchSize do
        inputs, targets = minibatch(train, shuffle, t, batchSize)
        
        local feval = function(x)
            -- get new parameters
            if x~= parameters then
                parameters:copy(x)
            end
            
            -- reset gradients
            gradParameters:zero()
            
            -- f is the average of all criterions
            local f = 0
            
            -- evaluate function for complete mini batch
            for i = 1,#inputs do
               -- estimate f
                local output = model:forward(inputs[i])
                local err = criterion:forward(output, targets[i])
                f = f + err
                
                -- estimate df/dW
                local df_do = criterion:backward(output, targets[i])
                model:backward(inputs[i], df_do)
                                
                -- update confusion
                confusion:add(output, targets[i])
            end
            
            -- normalize gradients and f(X)
            gradParameters:div(#inputs)
            f = f/#inputs
            
            -- return f and df/dW
            return f,gradParameters
        end
        
        optim.sgd(feval, parameters, optimState)
    end
    
    -- time taken
    time = sys.clock() - time
    time = time / train:size()
    print("==> time to learn 1 sample = " .. (time*1000) .. ' ms')
    
    confusion:updateValids()
    print("==> average row correct: " .. 100*confusion.averageValid)
    confusion:zero()
    
    epoch = epoch + 1
end

In [32]:
for i=1,25 do
    train_epoch(train_data)
end

==> online epoch # 1 [batchSize = 25]	


==> time to learn 1 sample = 0.69993802479335 ms	
==> average row correct: 99.317809343338	


==> online epoch # 2 [batchSize = 25]	


==> time to learn 1 sample = 0.69234964393434 ms	
==> average row correct: 99.273147583008	


==> online epoch # 3 [batchSize = 25]	


==> time to learn 1 sample = 0.69453826404753 ms	
==> average row correct: 99.358031749725	


==> online epoch # 4 [batchSize = 25]	


==> time to learn 1 sample = 0.6919089498974 ms	


==> average row correct: 99.380862116814	


==> online epoch # 5 [batchSize = 25]	


==> time to learn 1 sample = 0.6865386679059 ms	
==> average row correct: 99.387073516846	


==> online epoch # 6 [batchSize = 25]	


==> time to learn 1 sample = 0.68470028468541 ms	
==> average row correct: 99.36172246933	


==> online epoch # 7 [batchSize = 25]	


==> time to learn 1 sample = 0.68103707404364 ms	
==> average row correct: 99.394195079803	


==> online epoch # 8 [batchSize = 25]	


==> time to learn 1 sample = 0.68137807505471 ms	
==> average row correct: 99.443085193634	


==> online epoch # 9 [batchSize = 25]	


==> time to learn 1 sample = 0.69025640828269 ms	
==> average row correct: 99.412466883659	


==> online epoch # 10 [batchSize = 25]	


==> time to learn 1 sample = 0.69083471525283 ms	
==> average row correct: 99.45777118206	


==> online epoch # 11 [batchSize = 25]	


==> time to learn 1 sample = 0.68706343287513 ms	


==> average row correct: 99.468392729759	


==> online epoch # 12 [batchSize = 25]	


==> time to learn 1 sample = 0.6895212389174 ms	
==> average row correct: 99.506705999374	


==> online epoch # 13 [batchSize = 25]	


==> time to learn 1 sample = 0.68250135580699 ms	
==> average row correct: 99.505968689919	


==> online epoch # 14 [batchSize = 25]	


==> time to learn 1 sample = 0.69503411792573 ms	
==> average row correct: 99.563407301903	


==> online epoch # 15 [batchSize = 25]	


==> time to learn 1 sample = 0.70589340300787 ms	
==> average row correct: 99.498865604401	


==> online epoch # 16 [batchSize = 25]	


==> time to learn 1 sample = 0.6864667847043 ms	


==> average row correct: 99.54827606678	


==> online epoch # 17 [batchSize = 25]	


==> time to learn 1 sample = 0.68727793012347 ms	
==> average row correct: 99.463312029839	


==> online epoch # 18 [batchSize = 25]	


==> time to learn 1 sample = 0.69235971428099 ms	
==> average row correct: 99.563852548599	


==> online epoch # 19 [batchSize = 25]	


==> time to learn 1 sample = 0.68873650119418 ms	
==> average row correct: 99.528200030327	


==> online epoch # 20 [batchSize = 25]	


==> time to learn 1 sample = 0.69373460043044 ms	
==> average row correct: 99.516069293022	


==> online epoch # 21 [batchSize = 25]	


==> time to learn 1 sample = 0.68445726235708 ms	
==> average row correct: 99.559218287468	


==> online epoch # 22 [batchSize = 25]	


==> time to learn 1 sample = 0.69092528592973 ms	
==> average row correct: 99.577621817589	


==> online epoch # 23 [batchSize = 25]	


==> time to learn 1 sample = 0.68878504775819 ms	


==> average row correct: 99.578521847725	


==> online epoch # 24 [batchSize = 25]	


==> time to learn 1 sample = 0.69230171044668 ms	


==> average row correct: 99.616076350212	


==> online epoch # 25 [batchSize = 25]	


==> time to learn 1 sample = 0.69467242558797 ms	
==> average row correct: 99.605869054794	


Test model:

In [7]:
torch.save("mnist_model", model)




In [33]:
function max_tensor_index(tensor)
    local max_index = 1
    for i=1, tensor:size()[1] do
        if tensor[i] > tensor[max_index] then
            max_index = i
        end
    end
    
    if max_index == 10 then
        return 0
    else
        return max_index
    end
end

function test_model(test_data)
    result = {}
    
    for i = 1,test_data.predictors:size()[1] do
        local output = max_tensor_index(model:forward(test_data.predictors[i]))
        table.insert(result, output)
    end
    
    return result
end


result = test_model(test_data)

In [34]:
function write_result(path, result)
    file = io.open(path, 'w')
    file:write("ImageId,Label\n")
    
    for i = 1, #result do
        file:write(i .. ',' .. result[i] .. '\n')
    end
    file:close()
end

write_result('result.csv', result)