In [1]:
import os
import sys

sys.path.append('../')
from Linear import Linear
from ReLU import ReLU
from Model import Model
from Criterion import Criterion

import torch
import torchfile
import numpy as np

In [5]:
class SGDOptimizer():
    
    def __init__(self, model, lr, momentum=0.0, decay=None):
        
        self.model = model
        
        self.lr0 = lr
        self.lr = lr
        
        self.momentum = momentum
        self.init_momentum_params()
        
        print(decay)
        self.decay = decay
        
        self.num_steps = 0
        
    def decay_lr(self):
        """
        exponential decay of learning rate
        """
        if self.decay is not None:
            self.lr = self.lr0 * np.exp(self.dacay * self.num_steps)
    
    def init_momentum_params(self):
        """
        initialize zero values for retained grads
        """
        self.vW = []
        self.vB = []
        for layer in self.model.Layers:
            if layer.has_params:
                self.vW.append(np.zeros_like(layer.W))
                self.vB.append(np.zeros_like(layer.B))
    
    def step(self):
        index = 0
        for layer in self.model.Layers:
            if layer.has_params:
                self.vW[index] = self.momentum * self.vW[index] - self.lr * layer.gradW
                self.vB[index] = self.momentum * self.vB[index] - self.lr * layer.gradB
                layer.W += self.vW[index]
                layer.B += self.vB[index]
                index += 1
        
        self.num_steps += 1
        self.decay_lr()
        
model = Model([
    Linear(20, 10),
    ReLU(),
    Linear(10, 5),
    ReLU(),
    Linear(5, 2)]
)
optimizer = SGDOptimizer(model, lr=0.001, decay=1e-5)
loss = Criterion()

batch_size = 4
num_classes = 2

inp = np.random.rand(batch_size, 20)
target = np.random.randint(num_classes, size=(batch_size, 1)) + 1

# zero the parameter gradients
model.clearGradParam()

# forward + backward
out = model.forward(inp)
print(loss.forward(out, target))
gradInput = loss.backward(out, target)
model.backward(inp, gradInput)

# optimize
optimizer.step()

1e-05
0.8332343330796017


AttributeError: 'SGDOptimizer' object has no attribute 'dacay'

In [None]:
sample_dir = '/Users/vinayak/pro/acads/SEM8/CS763/Assignment3/info/'

w = torchfile.load(os.path.join(sample_dir, 'W_sample_1.bin'))
b = torchfile.load(os.path.join(sample_dir, 'B_sample_1.bin'))
inp = torchfile.load(os.path.join(sample_dir, 'input_sample_1.bin'))
grad_output = torchfile.load(os.path.join(sample_dir, 'gradOutput_sample_2.bin'))
output = torchfile.load(os.path.join(sample_dir, 'output_sample_1.bin'))
print(w[0].T.shape == (192, 10))
print(b[0].shape == (10,))
inp = inp.reshape(-1, (192))
print(inp.shape)
print(grad_output.shape)