From 36e07615dedc1af2784c4af5b32dd189133f2f56 Mon Sep 17 00:00:00 2001 From: y0ast Date: Sat, 18 Oct 2014 11:45:50 +0200 Subject: [PATCH] move to torch7 adagrad --- .gitignore | 1 + binaryva.lua | 86 +++++++++++++++++++++++++++++----------------------- load.lua | 17 ++++++----- to-do.txt | 5 --- 4 files changed, 58 insertions(+), 51 deletions(-) delete mode 100644 to-do.txt diff --git a/.gitignore b/.gitignore index 731cf21..33d8176 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ params +save diff --git a/binaryva.lua b/binaryva.lua index 088e56c..9e55651 100644 --- a/binaryva.lua +++ b/binaryva.lua @@ -3,10 +3,10 @@ require 'sys' require 'torch' require 'nn' require 'xlua' +require 'optim' --Packages necessary for SGVB require 'Reparametrize' -require 'BCECriterion' require 'KLDCriterion' --Custom Linear module to support different reset function @@ -15,22 +15,16 @@ require 'LinearVA' --For loading data files require 'load' ---For saving weights and biases -require 'hdf5' -require 'adagrad' - -data = load28('datasets/mnist.hdf5') +data = load32() dim_input = data.train:size(2) -dim_hidden = 2 +dim_hidden = 10 hidden_units_encoder = 400 hidden_units_decoder = 400 batchSize = 100 -learningRate = 0.03 -adaGradInitRounds = 10 torch.manualSeed(1) --Does not seem to do anything @@ -63,28 +57,16 @@ va:add(nn.Sigmoid()) --Binary cross entropy term BCE = nn.BCECriterion() +BCE.sizeAverage = false KLD = nn.KLDCriterion() -opfunc = function(batch) - va:zeroGradParameters() - - f = va:forward(batch) - err = BCE:forward(f, batch) - df_dw = BCE:backward(f, batch) - va:backward(batch,df_dw) - - KLDerr = KLD:forward(va:get(1).output, batch) - de_dw = KLD:backward(va:get(1).output, batch) - encoder:backward(batch,de_dw) +parameters, gradients = va:getParameters() +config = { + learningRate = -0.03, +} - lowerbound = err + KLDerr - weights, grads = va:parameters() - - return weights, grads, lowerbound -end - -h = adaGradInit(data.train, opfunc, adaGradInitRounds) +state = {} epoch = 0 @@ -94,7 +76,11 @@ while true do local time = sys.clock() local shuffle = torch.randperm(data.train:size(1)) - for i = 1, data.train:size(1), batchSize do + --Make sure batches are always batchSize + local N = data.train:size(1) - (data.train:size(1) % batchSize) + local N_test = data.test:size(1) - (data.test:size(1) % batchSize) + + for i = 1, N, batchSize do local iend = math.min(data.train:size(1),i+batchSize-1) xlua.progress(iend, data.train:size(1)) @@ -106,20 +92,44 @@ while true do k = k + 1 end - batchlowerbound = adaGradUpdate(batch, opfunc) - lowerbound = lowerbound + batchlowerbound + local opfunc = function(x) + if x ~= parameters then + parameters:copy(x) + end + + va:zeroGradParameters() + + local f = va:forward(batch) + local err = BCE:forward(f, batch) + local df_dw = BCE:backward(f, batch) + va:backward(batch,df_dw) + + local KLDerr = KLD:forward(va:get(1).output, batch) + local de_dw = KLD:backward(va:get(1).output, batch) + encoder:backward(batch,de_dw) + + lowerbound = err + KLDerr + + return lowerbound, gradients + end + + x, batchlowerbound = optim.adagrad(opfunc, parameters, config, state) + print(batchlowerbound[1]) + lowerbound = lowerbound + batchlowerbound[1] end - print("\nEpoch: " .. epoch .. " Lowerbound: " .. lowerbound/data.train:size(1) .. " time: " .. sys.clock() - time) - if epoch % 2 == 0 then - local myFile = hdf5.open('params/epoch_' .. epoch .. '.hdf5', 'w') + print("\nEpoch: " .. epoch .. " Lowerbound: " .. lowerbound/N .. " time: " .. sys.clock() - time) - myFile:write('wtanh', va:get(3).weight) - myFile:write('btanh', va:get(3).bias) - myFile:write('wsig', va:get(5).weight) - myFile:write('bsig', va:get(5).bias) + if lowerboundlist then + lowerboundlist = torch.cat(lowerboundlist,torch.Tensor(1,1):fill(lowerbound/N),1) + else + lowerboundlist = torch.Tensor(1,1):fill(lowerbound/N) + end - myFile:close() + if epoch % 2 == 0 then + torch.save('save/parameters.t7', parameters) + torch.save('save/state.t7', state) + torch.save('save/lowerbound.t7', torch.Tensor(lowerboundlist)) end end diff --git a/load.lua b/load.lua index 840cee9..f9da5c3 100644 --- a/load.lua +++ b/load.lua @@ -1,16 +1,17 @@ -require 'hdf5' - function load32() - train = torch.load('datasets/train_32x32.t7', 'ascii') - test = torch.load('datasets/test_32x32.t7', 'ascii') + data = {} + data.train = torch.load('datasets/train_32x32.t7', 'ascii').data + data.test = torch.load('datasets/test_32x32.t7', 'ascii').data --Convert training data to floats - train.data = train.data:double() - test.data = test.data:double() + data.train = data.train:double() + data.test = data.test:double() --Rescale to 0..1 and invert - train.data:div(255):resize(60000,1024) - test.data:div(255):resize(10000,1024) + data.train:div(255):resize(60000,1024) + data.test:div(255):resize(10000,1024) + + return data end function load28(path) diff --git a/to-do.txt b/to-do.txt deleted file mode 100644 index 64216b5..0000000 --- a/to-do.txt +++ /dev/null @@ -1,5 +0,0 @@ -- Adaptive learning rate? Come up with a way to increase learning at start -- Write plotting code for lowerbound -- more consistent naming for criterion/parts of model -- get test lowerbound and save -- Add tests with Jacobian