In [47]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [48]:
import tensorflow as tf

In [49]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

## Autokeras doesnt use Network Morphism
https://github.com/keras-team/autokeras/issues/981

In [50]:
import numpy as np
from tensorflow.keras.datasets import cifar10

import autokeras as ak

import keras_tuner

In [51]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print(x_train.shape)  # (60000, 28, 28)
print(y_train.shape)  # (60000,)
print(y_train[:3])  # array([7, 2, 1], dtype=uint8)

(50000, 32, 32, 3)
(50000, 1)
[[6]
 [9]
 [9]]


In [52]:
# Initialize the image classifier.
### the model size is not well..
# clf = ak.ImageClassifier(overwrite=True, max_trials=40, max_model_size=500000,
#                         project_name='autokeras_c10_v0', num_classes=10)

# hp.Choice('units', [8, 16, 32])
# nl = keras_tuner.engine.hyperparameters.Choice()

In [53]:
import keras_tuner.engine.hyperparameters as hyp

In [54]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Add, GlobalAveragePooling2D,\
    Dense, Flatten, Conv2D, Lambda,	Input, BatchNormalization, Activation, ZeroPadding2D,\
    MaxPooling2D, AveragePooling2D
from tensorflow.keras.initializers import glorot_uniform

In [55]:
def identity_block(X, f, filters, stage, block):
   
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    F1, F2 = filters

    X_shortcut = X
   
    X = Conv2D(filters=F1, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2a', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2a')(X)
    X = Activation('relu')(X)

    X = Conv2D(filters=F2, kernel_size=(f, f), strides=(1, 1), padding='same', name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
    
    X = Add()([X, X_shortcut])# SKIP Connection
    X = Activation('relu')(X)

    return X

In [56]:
def convolutional_block(X, f, filters, stage, block, s=2):
   
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    F1, F2 = filters

    X_shortcut = X

    X = Conv2D(filters=F1, kernel_size=(1, 1), strides=(s, s), padding='valid', name=conv_name_base + '2a', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2a')(X)
    X = Activation('relu')(X)

    X = Conv2D(filters=F2, kernel_size=(f, f), strides=(1, 1), padding='same', name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X)

    X_shortcut = Conv2D(filters=F2, kernel_size=(1, 1), strides=(s, s), padding='valid', name=conv_name_base + '1', kernel_initializer=glorot_uniform(seed=0))(X_shortcut)
    X_shortcut = BatchNormalization(axis=3, name=bn_name_base + '1')(X_shortcut)

    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)

    return X

In [57]:
def ResNet(input_shape=(32, 32, 3), b0=32,
             b1=32, r1=1,
             b2=64, r2=1,
             b3=128, r3=1,
#              b4=256, r4=1,
            ):

    X_input = Input(input_shape)

    X = ZeroPadding2D((1, 1))(X_input)

    X = Conv2D(b0, (3, 3), strides=(1, 1), name='conv1', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name='bn_conv1')(X)
    X = Activation('relu')(X)
    
    X = convolutional_block(X, f=3, filters=[b1, b1], stage=2, block='a', s=1)
    for i in range(r1):
        X = identity_block(X, 3, [b1, b1], stage=2, block=chr(ord('b')+i))
#     X = identity_block(X, 3, [b1, b1], stage=2, block='c')


    X = convolutional_block(X, f=3, filters=[b2, b2], stage=3, block='a', s=2)
    for i in range(r2):
        X = identity_block(X, 3, [b2, b2], stage=3, block=chr(ord('b')+i))
#     X = identity_block(X, 3, [128, 128], stage=3, block='c')
#     X = identity_block(X, 3, [128, 128], stage=3, block='d')

    X = convolutional_block(X, f=3, filters=[b3, b3], stage=4, block='a', s=2)
    for i in range(r3):
        X = identity_block(X, 3, [b3, b3], stage=4, block=chr(ord('b')+i))
#     X = identity_block(X, 3, [256, 256], stage=4, block='c')
#     X = identity_block(X, 3, [256, 256], stage=4, block='d')

#### 4th block is not used in general for cifar dataset
#     X = convolutional_block(X, f=3, filters=[b4, b4], stage=5, block='a', s=2)
#     for i in range(r4):
#         X = identity_block(X, 3, [b4, b4], stage=5, block=chr(ord('b')+i))

#     X = identity_block(X, 3, [512, 512], stage=5, block='c')

#     X = AveragePooling2D(pool_size=(2, 2), padding='same')(X)
    X = GlobalAveragePooling2D()(X)
    
    model = Model(inputs=X_input, outputs=X, name='ResNet50')

    return model

In [58]:
# model = ResNet((32, 32, 3))

In [59]:
model = ResNet((32, 32, 3), b0=8, b1=8, r1=0, b2=16, r2=0, b3=32, r3=0) #, b4=64, r4=0)

In [60]:
model.summary()

Model: "ResNet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D (None, 34, 34, 3)    0           input_3[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 32, 32, 8)    224         zero_padding2d_1[0][0]           
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 32, 32, 8)    32          conv1[0][0]                      
___________________________________________________________________________________________

In [61]:
model(np.random.randn(2, 32, 32, 3)).shape

TensorShape([2, 64])

In [62]:
# tf.keras.utils.plot_model(
#     model,
#     to_file="model.png",
#     show_shapes=False,
#     show_dtype=False,
#     show_layer_names=True,
#     rankdir="TB",
#     expand_nested=False,
#     dpi=96,
# )

In [63]:
class AutoResNet(ak.Block):
    def build(self, hp, inputs=None):
        # Get the input_node from inputs.
#         input_node = tf.nest.flatten(inputs)[0]
        
        b0=hp.Int("B0", min_value=8, max_value=64, step=8, default=8)
        b1=hp.Int("B1", min_value=8, max_value=64, step=8, default=8) 
        r1=hp.Int("R1", min_value=0, max_value=4, step=1, default=1)
        b2=hp.Int("B2", min_value=16, max_value=64, step=8, default=16)
        r2=hp.Int("R2", min_value=0, max_value=4, step=1, default=1)
        b3=hp.Int("B3", min_value=16, max_value=128, step=8, default=16) 
        r3=hp.Int("R3", min_value=0, max_value=4, step=1, default=1)
#         b4=hp.Int("B4", min_value=16, max_value=128, step=8, default=16) 
#         r4=hp.Int("R4", min_value=0, max_value=4, step=1, default=1)
    
        model = ResNet((32, 32, 3), b0, b1, r1, b2, r2, b3, r3)#, b4, r4)
        output_node = model(inputs)
        return output_node

In [64]:
# arn = AutoResNet()
# arn.build(hyp, np.random.randn(2, 32, 32, 3))

In [65]:
input_node = ak.ImageInput()
output_node = ak.Normalization()(input_node)
output_node = ak.ImageAugmentation(vertical_flip=False, rotation_factor=False, zoom_factor=False)(output_node)
output_node = AutoResNet()(output_node)
output_node = ak.ClassificationHead()(output_node)

clf = ak.AutoModel(
    inputs=input_node, outputs=output_node, overwrite=True, max_trials=40, max_model_size=1000000,
    tuner="bayesian", 
)

In [66]:
clf

<autokeras.auto_model.AutoModel at 0x7f0a983b22e0>

In [67]:
# Feed the image classifier with training data.
history = clf.fit(x_train, y_train, verbose=1, epochs=50, batch_size=128)

Trial 7 Complete [00h 06m 05s]
val_loss: 1.0474326610565186

Best val_loss So Far: 0.8317501544952393
Total elapsed time: 00h 40m 09s

Search: Running Trial #8

Hyperparameter    |Value             |Best Value So Far 
image_augmentat...|0                 |0.1               
image_augmentat...|True              |True              
image_augmentat...|0.1               |0.1               
auto_res_net_1/B0 |8                 |8                 
auto_res_net_1/B1 |8                 |8                 
auto_res_net_1/R1 |0                 |0                 
auto_res_net_1/B2 |16                |16                
auto_res_net_1/R2 |1                 |0                 
auto_res_net_1/B3 |128               |72                
auto_res_net_1/R3 |0                 |0                 
auto_res_net_1/B4 |128               |128               
auto_res_net_1/R4 |4                 |4                 
classification_...|0                 |0                 
optimizer         |adam              |ada

RuntimeError: Too many consecutive oversized models.

In [None]:
# Predict with the best model.
predicted_y = clf.predict(x_test)
print(predicted_y)

# Evaluate the best model with testing data.
print(clf.evaluate(x_test, y_test))

In [None]:
model = clf.export_model()

In [None]:
model.summary()

In [13]:
model.get_config()

{'name': 'model',
 'layers': [{'class_name': 'InputLayer',
   'config': {'batch_input_shape': (None, 32, 32, 3),
    'dtype': 'uint8',
    'sparse': False,
    'ragged': False,
    'name': 'input_1'},
   'name': 'input_1',
   'inbound_nodes': []},
  {'class_name': 'Custom>CastToFloat32',
   'config': {'name': 'cast_to_float32',
    'trainable': True,
    'dtype': 'float32'},
   'name': 'cast_to_float32',
   'inbound_nodes': [[['input_1', 0, 0, {}]]]},
  {'class_name': 'Normalization',
   'config': {'name': 'normalization',
    'trainable': True,
    'dtype': 'float32',
    'axis': (-1,)},
   'name': 'normalization',
   'inbound_nodes': [[['cast_to_float32', 0, 0, {}]]]},
  {'class_name': 'RandomFlip',
   'config': {'name': 'random_flip',
    'trainable': True,
    'dtype': 'float32',
    'mode': 'horizontal_and_vertical',
    'seed': None},
   'name': 'random_flip',
   'inbound_nodes': [[['normalization', 0, 0, {}]]]},
  {'class_name': 'Conv2D',
   'config': {'name': 'conv2d',
    'tra