In [None]:
require 'torch'
require 'nn'
require 'image'
-- cltorch.setDevice(1)

In [None]:
-- os.execute('wget -c https://s3.amazonaws.com/torch7/data/cifar10torchsmall.zip')
-- os.execute('unzip cifar10torchsmall.zip')
trainset = torch.load('cifar10-train.t7')

In [None]:
testset = torch.load('cifar10-test.t7')
classes = {'airplane', 'automobile', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck'}

In [None]:
setmetatable(trainset, 
    {__index = function(t, i) 
                    return {t.data[i], t.label[i]} 
                end}
);
trainset.data = trainset.data:double() -- convert the data from a ByteTensor to a DoubleTensor.

function trainset:size() 
    return self.data:size(1) 
end

In [None]:
mean = {} -- store the mean, to normalize the test set in the future
stdv  = {} -- store the standard-deviation for the future
for i=1,3 do -- over each image channel
    mean[i] = trainset.data[{ {}, {i}, {}, {}  }]:mean() -- mean estimation
    print('Channel ' .. i .. ', Mean: ' .. mean[i])
    trainset.data[{ {}, {i}, {}, {}  }]:add(-mean[i]) -- mean subtraction
    
    stdv[i] = trainset.data[{ {}, {i}, {}, {}  }]:std() -- std estimation
    print('Channel ' .. i .. ', Standard Deviation: ' .. stdv[i])
    trainset.data[{ {}, {i}, {}, {}  }]:div(stdv[i]) -- std scaling
end

In [None]:
criterion = nn.ClassNLLCriterion()

In [None]:
net = nn.Sequential()
net:add(nn.SpatialConvolution(3, 6, 5, 5)) -- 3 input image channels, 6 output channels, 5x5 convolution kernel
net:add(nn.SpatialMaxPooling(2,2,2,2))     -- A max-pooling operation that looks at 2x2 windows and finds the max.
net:add(nn.SpatialConvolution(6, 16, 5, 5))
net:add(nn.SpatialMaxPooling(2,2,2,2))
net:add(nn.View(16*5*5))                    -- reshapes from a 3D tensor of 16x5x5 into 1D tensor of 16*5*5
net:add(nn.Linear(16*5*5, 120))             -- fully connected layer (matrix multiplication between input and weights)
net:add(nn.Linear(120, 84))
net:add(nn.Linear(84, 10))                   -- 10 is the number of outputs of the network (in this case, 10 digits)
net:add(nn.LogSoftMax())            -- converts the output to a log-probability. Useful for classification problems

In [None]:
shorttrainer = nn.StochasticGradient(net, criterion)
shorttrainer.learningRate = 0.001
shorttrainer.maxIteration = 5 -- just do 5 epochs of training.

In [None]:
shorttrainer:train(trainset)

In [None]:
testset.data = testset.data:double()   -- convert from Byte tensor to Double tensor
for i=1,3 do -- over each image channel
    testset.data[{ {}, {i}, {}, {}  }]:add(-mean[i]) -- mean subtraction    
    testset.data[{ {}, {i}, {}, {}  }]:div(stdv[i]) -- std scaling
end

In [None]:
for j=200,300 do
    local theimage = testset.data[j]
    
    itorch.image(theimage)
    local groundtruth = testset.label[j]
    local prediction = net:forward(testset.data[j])
    local pred = prediction:exp()

    local confidences, indices = torch.sort(prediction, true)  -- true means sort in descending order
    print("Got: ", classes[indices[1]])
    print("Expected: ", classes[groundtruth])
--    if groundtruth == indices[1] then
--        correct = correct + 1
--    end
    for i=1,6 do
        print(classes[indices[i]], confidences[i] * 100)
    end
end

In [None]:
images = image.load('dont_push_button.png',1,'float')

In [None]:
print(images:size())

In [None]:
image:resize(torch.FloatTensor(256,256,1,1))