# Load libraries

In [1]:
!git clone https://github.com/koshian2/OctConv-TFKeras
!mv OctConv-TFKeras/*.py ./

Cloning into 'OctConv-TFKeras'...
remote: Enumerating objects: 6, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 66 (delta 1), reused 0 (delta 0), pack-reused 60[K
Unpacking objects: 100% (66/66), done.


In [2]:
!pip install tensorflow==1.13.1

Collecting tensorflow==1.13.1
[?25l  Downloading https://files.pythonhosted.org/packages/77/63/a9fa76de8dffe7455304c4ed635be4aa9c0bacef6e0633d87d5f54530c5c/tensorflow-1.13.1-cp36-cp36m-manylinux1_x86_64.whl (92.5MB)
[K     |████████████████████████████████| 92.5MB 1.3MB/s 
Collecting tensorboard<1.14.0,>=1.13.0 (from tensorflow==1.13.1)
[?25l  Downloading https://files.pythonhosted.org/packages/0f/39/bdd75b08a6fba41f098b6cb091b9e8c7a80e1b4d679a581a0ccd17b10373/tensorboard-1.13.1-py3-none-any.whl (3.2MB)
[K     |████████████████████████████████| 3.2MB 30.7MB/s 
[?25hCollecting tensorflow-estimator<1.14.0rc0,>=1.13.0 (from tensorflow==1.13.1)
[?25l  Downloading https://files.pythonhosted.org/packages/bb/48/13f49fc3fa0fdf916aa1419013bb8f2ad09674c275b4046d5ee669a46873/tensorflow_estimator-1.13.0-py2.py3-none-any.whl (367kB)
[K     |████████████████████████████████| 368kB 41.5MB/s 
Collecting mock>=2.0.0 (from tensorflow-estimator<1.14.0rc0,>=1.13.0->tensorflow==1.13.1)
  Downloading

# Train OctConv Wide ResNet
* alpha = 0 -> normal wide res-net
* alpha > 0 -> OctConv wide res-net

It takes about 2 hour for training.

In [11]:
import tensorflow as tf
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler, History
from tensorflow.contrib.tpu.python.tpu import keras_support
from models import *

from keras.datasets import cifar10
from keras.utils import to_categorical
import pickle, os, time
import matplotlib.pyplot as plt

In [None]:
def lr_scheduler(epoch):
    x = 0.1
    if epoch >= 100: x /= 5.0
    if epoch >= 150: x /= 5.0
    if epoch >= 200: x /= 5.0
    return x

In [2]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
train_gen = ImageDataGenerator(rescale=1.0/255, horizontal_flip=True, 
                                width_shift_range=4.0/32.0, height_shift_range=4.0/32.0)
test_gen = ImageDataGenerator(rescale=1.0/255)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

tf.logging.set_verbosity(tf.logging.FATAL)

In [3]:
alpha = 0.75
if alpha <= 0:
    model = create_normal_wide_resnet()
else:
    model = create_octconv_wide_resnet(alpha)
model.compile(SGD(0.1, momentum=0.9), "categorical_crossentropy", ["acc"])
model.summary()

# convert to tpu model
tpu_grpc_url = "grpc://"+os.environ["COLAB_TPU_ADDR"]
tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu_grpc_url)
strategy = keras_support.TPUDistributionStrategy(tpu_cluster_resolver)
model = tf.contrib.tpu.keras_to_tpu_model(model, strategy=strategy)

batch_size = 128
scheduler = LearningRateScheduler(lr_scheduler)
hist = History()

start_time = time.time()
model.fit_generator(train_gen.flow(X_train, y_train, batch_size, shuffle=True),
                    steps_per_epoch=X_train.shape[0]//batch_size,
                    validation_data=test_gen.flow(X_test, y_test, batch_size, shuffle=False),
                    validation_steps=X_test.shape[0]//batch_size,
                    callbacks=[scheduler, hist], max_queue_size=5, epochs=200)
elapsed = time.time() - start_time
print(elapsed)

history = hist.history
history["elapsed"] = elapsed

with open(f"octconv_alpha_{alpha}.pkl", "wb") as fp:
    pickle.dump(history, fp)

Using TensorFlow backend.


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
average_pooling2d (AveragePooli (None, 16, 16, 3)    0           input_1[0][0]                    
__________________________________________________________________________________________________
oct_conv2d (OctConv2D)          [(None, 32, 32, 4),  864         input_1[0][0]                    
                                                                 average_pooling2d[0][0]          
__________________________________________________________________________________________________
batch_normalization_v1 (BatchNo

KeyboardInterrupt: ignored

# Results

In [13]:
for alpha in [0, 0.125, 0.25, 0.5, 0.75]:
    with open(f"results/wrn_cifar_alpha_{alpha}_history", "rb") as fp:
        data = pickle.load(fp)
        print(f"For alpha = {alpha}:\n Max test accuracy = {max(data['val_acc']):.04}\n")

For alpha = 0:
 Max test accuracy = 0.8822

For alpha = 0.125:
 Max test accuracy = 0.9464

For alpha = 0.25:
 Max test accuracy = 0.9453

For alpha = 0.5:
 Max test accuracy = 0.9364

For alpha = 0.75:
 Max test accuracy = 0.925

