# AlexNet

## Models

![](alexnet.png)

- used normalization and ReLU rather than sigmoid, hence it is much easier to train
- heavy data augumentation


In [1]:
import tensorflow as tf
from run_model import run_model_fn, per_device_batch_size
from utils.cifar10_utils import input_fn
from models_utils import conv2d_fixed_padding

In [2]:
DATA_DIR = "/home/karen/workspace/data/cifar10_data"
BATCH_SIZE = 128
NUM_EPOCHS = 1000

In [18]:
def alexnet_model(inputs, is_training, use_batchnorm, data_format, name="alexnet"):
    # Inputs should be images with dimension (H, W)
    inputs = conv2d_fixed_padding(inputs, 96, 11, 4, data_format)
    inputs = tf.layers.max_pooling2d(inputs,pool_size=3, strides=2)
    inputs = tf.layers.batch_normalization(inputs, training=is_training)
    
    paddings = tf.constant([[1, 1],[2, 2,], [2, 2], [1, 1]])
    inputs = tf.pad(inputs, paddings, "CONSTANT")
    inputs = conv2d_fixed_padding(inputs, 256, 5, 1, data_format)
    inputs = tf.layers.max_pooling2d(inputs,pool_size=3, strides=2)
    inputs = tf.layers.batch_normalization(inputs, training=is_training)
    
    inputs = conv2d_fixed_padding(inputs, 384, 3, 1, data_format)
    inputs = conv2d_fixed_padding(inputs, 384, 3, 1, data_format)
    inputs = conv2d_fixed_padding(inputs, 256, 3, 1, data_format)

    inputs = tf.layers.max_pooling2d(inputs,pool_size=3, strides=2)

    inputs = tf.layers.flatten(inputs)
    
    inputs = tf.layers.dense(inputs, 4096)
    inputs = tf.layers.dense(inputs, 4096)
    
    return tf.layers.dense(inputs, 1000)

In [19]:
input_train_fn = lambda:input_fn(is_training=True, data_dir=DATA_DIR, 
                          batch_size=per_device_batch_size(BATCH_SIZE, 1),
                          num_epochs=NUM_EPOCHS,
                          num_gpus=1)

In [20]:
model_fn = lambda features, labels, mode :run_model_fn(features, labels, mode, 
                                                          alexnet_model, "classic_alexnet_bn", True, 
                                                          False, "channels_first")

In [21]:
# distribution = tf.contrib.distribute.OneDeviceStrategy('device:GPU:0')
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
run_config = tf.estimator.RunConfig(session_config=config)
classifier = tf.estimator.Estimator(model_fn=model_fn, 
                                    model_dir="./tmp/classic_alexnet_bn",
                                    config=run_config,
                                    params=None)

INFO:tensorflow:Using config: {'_model_dir': './tmp/classic_alexnet_bn', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': gpu_options {
  allow_growth: true
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f4a34449470>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [22]:
classifier.train(input_fn=input_train_fn, hooks=[],
                 max_steps=10)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.


ResourceExhaustedError: OOM when allocating tensor of shape [4096,4096] and type float
	 [[Node: dense_1/kernel/Momentum/Initializer/zeros = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [4096,4096] values: [0 0 0]...>, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]

Caused by op 'dense_1/kernel/Momentum/Initializer/zeros', defined at:
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/asyncio/base_events.py", line 1432, in _run_once
    handle._run()
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2909, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-22-c83682a52376>", line 2, in <module>
    max_steps=10)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 363, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 843, in _train_model
    return self._train_model_default(input_fn, hooks, saving_listeners)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 856, in _train_model_default
    features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py", line 831, in _call_model_fn
    model_fn_results = self._model_fn(features=features, **kwargs)
  File "<ipython-input-20-902563834681>", line 3, in <lambda>
    False, "channels_first")
  File "/home/karen/workspace/notes/CNN/SampleCNNs/run_model.py", line 126, in run_model_fn
    minimize_op = optimizer.minimize(loss, global_step)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py", line 424, in minimize
    name=name)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py", line 600, in apply_gradients
    self._create_slots(var_list)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/training/momentum.py", line 77, in _create_slots
    self._zeros_slot(v, "momentum", self._name)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py", line 1150, in _zeros_slot
    new_slot_variable = slot_creator.create_zeros_slot(var, op_name)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/training/slot_creator.py", line 181, in create_zeros_slot
    colocate_with_primary=colocate_with_primary)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/training/slot_creator.py", line 155, in create_slot_with_initializer
    dtype)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/training/slot_creator.py", line 65, in _create_slot_var
    validate_shape=validate_shape)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 1317, in get_variable
    constraint=constraint)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 1079, in get_variable
    constraint=constraint)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 425, in get_variable
    constraint=constraint)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 394, in _true_getter
    use_resource=use_resource, constraint=constraint)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 786, in _get_single_variable
    use_resource=use_resource)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 2220, in variable
    use_resource=use_resource)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 2210, in <lambda>
    previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 2193, in default_variable_creator
    constraint=constraint)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 235, in __init__
    constraint=constraint)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 343, in _init_from_args
    initial_value(), name="initial_value", dtype=dtype)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 770, in <lambda>
    shape.as_list(), dtype=dtype, partition_info=partition_info)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/ops/init_ops.py", line 99, in __call__
    return array_ops.zeros(shape, dtype)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1626, in zeros
    output = fill(shape, constant(zero, dtype=dtype), name=name)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 2717, in fill
    "Fill", dims=dims, value=value, name=name)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
    op_def=op_def)
  File "/home/karen/anaconda3/envs/develop/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1718, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor of shape [4096,4096] and type float
	 [[Node: dense_1/kernel/Momentum/Initializer/zeros = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [4096,4096] values: [0 0 0]...>, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]


### Innovations

1. first use of ReLU
2. Used LRNorm Layers
3. Heavy Data Augumentation
4. Dropout 0.5
5. batchSize 128
6. SGD Momentum 0.9
7. Learning Rate:  1e-2, reduced by 10 manually when val accuracy plateaus
8. L2 weight decay 5e-4
9. 7 CNN Ensemble