From 36e07615dedc1af2784c4af5b32dd189133f2f56 Mon Sep 17 00:00:00 2001
From: y0ast <joost@joo.st>
Date: Sat, 18 Oct 2014 11:45:50 +0200
Subject: [PATCH] move to torch7 adagrad

---
 .gitignore   |  1 +
 binaryva.lua | 86 +++++++++++++++++++++++++++++-----------------------
 load.lua     | 17 ++++++-----
 to-do.txt    |  5 ---
 4 files changed, 58 insertions(+), 51 deletions(-)
 delete mode 100644 to-do.txt

diff --git a/.gitignore b/.gitignore
index 731cf21..33d8176 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 params
+save
diff --git a/binaryva.lua b/binaryva.lua
index 088e56c..9e55651 100644
--- a/binaryva.lua
+++ b/binaryva.lua
@@ -3,10 +3,10 @@ require 'sys'
 require 'torch'
 require 'nn'
 require 'xlua'
+require 'optim'
 
 --Packages necessary for SGVB
 require 'Reparametrize'
-require 'BCECriterion'
 require 'KLDCriterion'
 
 --Custom Linear module to support different reset function
@@ -15,22 +15,16 @@ require 'LinearVA'
 --For loading data files
 require 'load'
 
---For saving weights and biases
-require 'hdf5'
 
-require 'adagrad'
-
-data = load28('datasets/mnist.hdf5')
+data = load32()
 
 dim_input = data.train:size(2) 
-dim_hidden = 2
+dim_hidden = 10
 hidden_units_encoder = 400
 hidden_units_decoder = 400
 
 batchSize = 100
-learningRate = 0.03
 
-adaGradInitRounds = 10
 
 torch.manualSeed(1)
 --Does not seem to do anything
@@ -63,28 +57,16 @@ va:add(nn.Sigmoid())
 
 --Binary cross entropy term
 BCE = nn.BCECriterion()
+BCE.sizeAverage = false
 KLD = nn.KLDCriterion()
 
-opfunc = function(batch) 
-    va:zeroGradParameters()
-
-    f = va:forward(batch)
-    err = BCE:forward(f, batch)
-    df_dw = BCE:backward(f, batch)
-    va:backward(batch,df_dw)
-
-    KLDerr = KLD:forward(va:get(1).output, batch)
-    de_dw = KLD:backward(va:get(1).output, batch)
-    encoder:backward(batch,de_dw)
+parameters, gradients = va:getParameters()
 
+config = {
+    learningRate = -0.03,
+}
 
-    lowerbound = err  + KLDerr
-    weights, grads = va:parameters()
-
-    return weights, grads, lowerbound
-end
-
-h = adaGradInit(data.train, opfunc, adaGradInitRounds)
+state = {}
 
 
 epoch = 0
@@ -94,7 +76,11 @@ while true do
     local time = sys.clock()
     local shuffle = torch.randperm(data.train:size(1))
 
-    for i = 1, data.train:size(1), batchSize do
+    --Make sure batches are always batchSize
+    local N = data.train:size(1) - (data.train:size(1) % batchSize)
+    local N_test = data.test:size(1) - (data.test:size(1) % batchSize)
+
+    for i = 1, N, batchSize do
         local iend = math.min(data.train:size(1),i+batchSize-1)
         xlua.progress(iend, data.train:size(1))
 
@@ -106,20 +92,44 @@ while true do
             k = k + 1
         end
 
-        batchlowerbound = adaGradUpdate(batch, opfunc)
-        lowerbound = lowerbound + batchlowerbound
+        local opfunc = function(x)
+            if x ~= parameters then
+                parameters:copy(x)
+            end
+
+            va:zeroGradParameters()
+
+            local f = va:forward(batch)
+            local err = BCE:forward(f, batch)
+            local df_dw = BCE:backward(f, batch)
+            va:backward(batch,df_dw)
+
+            local KLDerr = KLD:forward(va:get(1).output, batch)
+            local de_dw = KLD:backward(va:get(1).output, batch)
+            encoder:backward(batch,de_dw)
+
+            lowerbound = err  + KLDerr
+
+            return lowerbound, gradients
+        end
+
+        x, batchlowerbound = optim.adagrad(opfunc, parameters, config, state)
+        print(batchlowerbound[1])
+        lowerbound = lowerbound + batchlowerbound[1]
     end
 
-    print("\nEpoch: " .. epoch .. " Lowerbound: " .. lowerbound/data.train:size(1) .. " time: " .. sys.clock() - time)
-    if epoch % 2 == 0 then
-        local myFile = hdf5.open('params/epoch_' .. epoch .. '.hdf5', 'w')
+    print("\nEpoch: " .. epoch .. " Lowerbound: " .. lowerbound/N .. " time: " .. sys.clock() - time)
 
-        myFile:write('wtanh', va:get(3).weight)
-        myFile:write('btanh', va:get(3).bias)
-        myFile:write('wsig', va:get(5).weight)
-        myFile:write('bsig', va:get(5).bias)
+    if lowerboundlist then
+        lowerboundlist = torch.cat(lowerboundlist,torch.Tensor(1,1):fill(lowerbound/N),1)
+    else
+        lowerboundlist = torch.Tensor(1,1):fill(lowerbound/N)
+    end
 
-        myFile:close()
+    if epoch % 2 == 0 then
+        torch.save('save/parameters.t7', parameters)
+        torch.save('save/state.t7', state)
+        torch.save('save/lowerbound.t7', torch.Tensor(lowerboundlist))
     end
 
 end
diff --git a/load.lua b/load.lua
index 840cee9..f9da5c3 100644
--- a/load.lua
+++ b/load.lua
@@ -1,16 +1,17 @@
-require 'hdf5'
-
 function load32()
-    train = torch.load('datasets/train_32x32.t7', 'ascii')
-    test = torch.load('datasets/test_32x32.t7', 'ascii')
+    data = {}
+    data.train = torch.load('datasets/train_32x32.t7', 'ascii').data
+    data.test = torch.load('datasets/test_32x32.t7', 'ascii').data
 
     --Convert training data to floats
-    train.data = train.data:double()
-    test.data = test.data:double()
+    data.train = data.train:double()
+    data.test = data.test:double()
 
     --Rescale to 0..1 and invert
-    train.data:div(255):resize(60000,1024)
-    test.data:div(255):resize(10000,1024)
+    data.train:div(255):resize(60000,1024)
+    data.test:div(255):resize(10000,1024)
+
+    return data
 end
 
 function load28(path)
diff --git a/to-do.txt b/to-do.txt
deleted file mode 100644
index 64216b5..0000000
--- a/to-do.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-- Adaptive learning rate? Come up with a way to increase learning at start
-- Write plotting code for lowerbound
-- more consistent naming for criterion/parts of model
-- get test lowerbound and save
-- Add tests with Jacobian