In [3]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
from unoai.imports import *
from unoai.data.datasets import *
from unoai.train import *

In [5]:
import shutil
import gc
import contextlib
import timeit

In [6]:
gfile = tf.io.gfile


In [7]:
DATASET_LOC = "datasets/cifar10"
MODEL_LOC = "models/cifar10"
BATCH_SIZE = 512
RESOLUTION = (32, 32)
NUM_CHANNELS = 3
NUM_TRAIN = 50000

In [8]:
MOMENTUM = 0.9
WEIGHT_DECAY = 0.000125
LEARNING_RATE = 0.9
EPOCHS = 15
WARMUP = 5

In [9]:
if os.path.isdir(MODEL_LOC):
    shutil.rmtree(MODEL_LOC)
gfile.makedirs(MODEL_LOC)

In [10]:
train_data, test_data = get_cifar10(ds_dir=DATASET_LOC, batch_size=BATCH_SIZE, normalize=True)

In [11]:
def init_pytorch(shape, dtype=tf.float32, partition_info=None):
  fan = np.prod(shape[:-1])
  bound = 1 / math.sqrt(fan)
  return tf.random.uniform(shape, minval=-bound, maxval=bound, dtype=dtype)

class ConvBN(tf.keras.Model):
  def __init__(self, c_out, virtual_batch_size=None):
    super().__init__()
    self.conv = tf.keras.layers.Conv2D(filters=c_out, kernel_size=3, padding="SAME", kernel_initializer=init_pytorch, use_bias=False)
    self.bn = tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, virtual_batch_size=None)

  def call(self, inputs):
    return tf.nn.relu(self.bn(self.conv(inputs)))

class ConvPoolBNAct(tf.keras.Model):
  def __init__(self, c_out, virtual_batch_size=None):
    super().__init__()
    self.conv = tf.keras.layers.Conv2D(filters=c_out, kernel_size=3, padding="SAME", kernel_initializer=init_pytorch, use_bias=False)
    self.bn = tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, virtual_batch_size=None)
    self.pool = tf.keras.layers.MaxPool2D()

  def call(self, inputs):
    return tf.nn.relu(self.bn(self.pool(self.conv(inputs))))

class ResBlk(tf.keras.Model):
  def __init__(self, c_out, pool, res = False):
    super().__init__()
    self.conv_bn = ConvBN(c_out, 8)
    self.conv_pool_bn_act = ConvPoolBNAct(c_out, 8)
    self.pool = pool
    self.res = res
    if self.res:
      self.res1 = ConvBN(c_out)
      self.res2 = ConvBN(c_out)

  def call(self, inputs):
    #h = self.pool(self.conv_bn(inputs))
    h = self.conv_pool_bn_act(inputs)
    if self.res:
      h = h + self.res2(self.res1(h))
    return h

class DavidNet(tf.keras.Model):
  def __init__(self, c=32, weight=0.125):
    super().__init__()
    pool = tf.keras.layers.MaxPooling2D()
    self.init_conv_bn = ConvBN(c)
    self.blk1 = ResBlk(c*3, pool, res = True)
    self.blk2 = ResBlk(c*6, pool)
    self.blk3 = ResBlk(c*9, pool, res = True)
    self.pool = tf.keras.layers.GlobalMaxPool2D()
    self.linear = tf.keras.layers.Dense(10, kernel_initializer=init_pytorch, use_bias=False)
    self.weight = weight

  def call(self, x):
    h = self.pool(self.blk3(self.blk2(self.blk1(self.init_conv_bn(x)))))
    h = self.linear(h) * self.weight
    return h

In [13]:
class CosineAnnealer:
    def __init__(self, start, end, steps):
        self.start = start
        self.end = end
        self.steps = steps
        self.n = 0
        
    def step(self):
        self.n += 1
        cos = np.cos(np.pi * (self.n / self.steps)) + 1
        return self.end + (self.start - self.end) / 2. * cos

class LinearAnnealer:
    def __init__(self, start, end, steps):
        self.start = start
        self.end = end
        self.steps = steps
        self.incr = (self.end-self.start)/self.steps
        self.n = 0
        
    def step(self):
        self.n += 1
        return self.start + (self.incr*self.n)
        #return self.start + self.incr * self.n

class OneCycleScheduler(tf.keras.callbacks.Callback):
    def __init__(self, lr_max, steps, lr_min=None, final_lr=None, warmup_steps=None, mom_min=0.85, mom_max=0.95, phase_1_pct=0.3, div_factor=25.):
        super(OneCycleScheduler, self).__init__()
        if lr_min is None:       lr_min = lr_max / div_factor
        if final_lr is None:     final_lr = lr_max / (div_factor * 1e4)
        if warmup_steps is None: phase_1_steps = steps * phase_1_pct
        else:                    phase_1_steps = warmup_steps
        phase_2_steps = steps - phase_1_steps
        
        self.phase_1_steps = phase_1_steps
        self.phase_2_steps = phase_2_steps
        self.phase = 0
        self.step = 0
        
        #self.phases = [[CosineAnnealer(lr_min, lr_max, phase_1_steps), CosineAnnealer(mom_max, mom_min, phase_1_steps)], 
        #        [CosineAnnealer(lr_max, final_lr, phase_2_steps), CosineAnnealer(mom_min, mom_max, phase_2_steps)]]
        
        self.phases = [[LinearAnnealer(lr_min, lr_max, phase_1_steps), LinearAnnealer(mom_max, mom_min, phase_1_steps)], 
                [LinearAnnealer(lr_max, final_lr, phase_2_steps), LinearAnnealer(mom_min, mom_max, phase_2_steps)]]
        
        self.lrs = []
        self.moms = []

    def on_train_begin(self, logs=None):
        self.phase = 0
        self.step = 0

        self.set_lr(self.lr_schedule().start)
        self.set_momentum(self.mom_schedule().start)
        
    def on_train_batch_begin(self, batch, logs=None):
        self.lrs.append(self.get_lr())
        self.moms.append(self.get_momentum())

    def on_train_batch_end(self, batch, logs=None):
        self.step += 1
        if self.step >= self.phase_1_steps:
            self.phase = 1
            
        self.set_lr(self.lr_schedule().step())
        self.set_momentum(self.mom_schedule().step())

    def on_epoch_begin(self, epoch, logs=None):
        print(f"LR: {self.get_lr()}")
        
    def get_lr(self):
        try:
            return tf.keras.backend.get_value(self.model.optimizer.lr)
        except AttributeError:
            return None
        
    def get_momentum(self):
        try:
            return tf.keras.backend.get_value(self.model.optimizer.momentum)
        except AttributeError:
            return None
        
    def set_lr(self, lr):
        try:
            tf.keras.backend.set_value(self.model.optimizer.lr, lr)
        except AttributeError:
            pass # ignore
        
    def set_momentum(self, mom):
        try:
            tf.keras.backend.set_value(self.model.optimizer.momentum, mom)
        except AttributeError:
            pass # ignore

    def lr_schedule(self):
        return self.phases[self.phase][0]
    
    def mom_schedule(self):
        return self.phases[self.phase][1]
    
    def plot(self):
        ax = plt.subplot(1, 2, 1)
        ax.plot(self.lrs)
        ax.set_title('Learning Rate')
        ax = plt.subplot(1, 2, 2)
        ax.plot(self.moms)
        ax.set_title('Momentum')

In [None]:
steps = np.ceil(NUM_TRAIN / BATCH_SIZE) * EPOCHS
warmup_steps = np.ceil(NUM_TRAIN / BATCH_SIZE) * WARMUP
lr_min = 1e-8
lr_schedule = OneCycleScheduler(lr_max=LEARNING_RATE, steps=steps, lr_min=lr_min, final_lr=lr_min, warmup_steps=warmup_steps)

optimizer = tf.keras.optimizers.SGD(lr=LEARNING_RATE)
loss_fn = tf.nn.sparse_softmax_cross_entropy_with_logits

callbacks = [lr_schedule]

model = train_model_custom(train_ds=train_data, test_ds=test_data, 
                           epochs=EPOCHS, model_fn=DavidNet, 
                           opt=optimizer, loss_fn=loss_fn, callbacks=callbacks)

lr_schedule.plot()

  0%|          | 0/15 [00:00<?, ?it/s]

LR: None


  0%|          | 0/2 [00:00<?, ?it/s]

KeyboardInterrupt: 