Demo to verify the number of paramters of layers in Keras models.

In [12]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Embedding
from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D
from keras.layers import SimpleRNN, LSTM

### Multilayer Perceptron (MLP) for multi-class softmax classification

In [2]:
model = Sequential()
# Dense(64) is a fully-connected layer with 64 hidden units.
# in the first layer, you must specify the expected input data shape:
# here, 20-dimensional vectors.
model.add(Dense(7, activation='relu', input_dim=20))
model.add(Dropout(0.5))
model.add(Dense(13, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(23, activation='softmax'))

In [3]:
print(model.summary())

assert(7 * (20+1) == 147)
assert(13 * (7+1) == 104)
assert(23 * (13+1) == 322)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 7)                 147       
_________________________________________________________________
dropout_1 (Dropout)          (None, 7)                 0         
_________________________________________________________________
dense_2 (Dense)              (None, 13)                104       
_________________________________________________________________
dropout_2 (Dropout)          (None, 13)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 23)                322       
Total params: 573.0
Trainable params: 573.0
Non-trainable params: 0.0
_________________________________________________________________
None


### MLP for binary classification

In [4]:
model = Sequential()
model.add(Dense(7, input_dim=20, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(13, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [5]:
print(model.summary())

assert(7 * (20+1) == 147)
assert(13 * (7+1) == 104)
assert(1 * (13+1) == 14)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 7)                 147       
_________________________________________________________________
dropout_3 (Dropout)          (None, 7)                 0         
_________________________________________________________________
dense_5 (Dense)              (None, 13)                104       
_________________________________________________________________
dropout_4 (Dropout)          (None, 13)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 14        
Total params: 265.0
Trainable params: 265.0
Non-trainable params: 0.0
_________________________________________________________________
None


### VGG-like convnet

In [6]:
model = Sequential()
# input: 100x100 images with 3 channels -> (100, 100, 3) tensors.
# this applies 23 convolution filters of size 3x3 each.
model.add(Conv2D(7, (3, 3), activation='relu', input_shape=(100, 100, 3)))
model.add(Conv2D(13, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(17, (3, 3), activation='relu'))
model.add(Conv2D(19, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(23, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(29, activation='softmax'))

In [7]:
print(model.summary())

assert(7 * (3 * (3*3) + 1) == 196)
assert(13 * (7 * (3*3) + 1) == 832)
assert(17 * (13 * (3*3) + 1) == 2006)
assert(19 * (17 * (3*3) + 1) == 2926)
assert(22 * 22 * 19 == 9196)
assert(23 * (9196+1) == 211531)
assert(29 * (23+1) == 696)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 98, 98, 7)         196       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 96, 96, 13)        832       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 48, 48, 13)        0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 48, 48, 13)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 46, 46, 17)        2006      
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 44, 44, 19)        2926      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 22, 22, 19)        0         
__________

### Simple RNN
https://github.com/yang-zhang/courses/blob/master/deeplearning1/nbs/lesson6.ipynb

In [81]:
n_hidden, n_fac, cs, vocab_size = (256, 42, 8, 86)

model=Sequential([
        Embedding(input_dim=vocab_size, output_dim=n_fac, input_length=cs),
        SimpleRNN(n_hidden, activation='relu', inner_init='identity'),
        Dense(vocab_size, activation='softmax')
    ])

print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_20 (Embedding)     (None, 8, 42)             3612      
_________________________________________________________________
simple_rnn_18 (SimpleRNN)    (None, 256)               76544     
_________________________________________________________________
dense_27 (Dense)             (None, 86)                22102     
Total params: 102,258.0
Trainable params: 102,258
Non-trainable params: 0.0
_________________________________________________________________
None


  '` call to the Keras 2 API: ' + signature)


In [91]:
assert 86 * 42 == 3612

assert 86 * (256 + 1) == 22102

assert 256 * (42 + (256 + 1)) == 76544

### Sequence classification with LSTM

In [8]:
max_features = 7
model = Sequential()
model.add(Embedding(max_features, output_dim=13))
model.add(LSTM(17))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [9]:
print(model.summary())
# TODO

assert(13 * 7 == 91)
print(2108/17, 'then?')
assert(1 * (17+1) == 18)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 13)          91        
_________________________________________________________________
lstm_1 (LSTM)                (None, 17)                2108      
_________________________________________________________________
dropout_8 (Dropout)          (None, 17)                0         
_________________________________________________________________
dense_9 (Dense)              (None, 1)                 18        
Total params: 2,217.0
Trainable params: 2,217.0
Non-trainable params: 0.0
_________________________________________________________________
None
124.0 then?


### Sequence classification with 1D convolutions

In [121]:
seq_length = 70
model = Sequential()
model.add(Conv1D(13, 5, activation='relu', input_shape=(seq_length, 103)))
model.add(Conv1D(17, 4, activation='relu'))
model.add(MaxPooling1D(3))
model.add(Conv1D(19, 7, activation='relu'))
model.add(Conv1D(23, 6, activation='relu'))
model.add(GlobalAveragePooling1D())
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [136]:
print(model.summary())
assert(13 * (5*103 + 1) == 6708)
assert(17 * (4*13 + 1) == 901)
assert(63/3 == 21)
assert(19 * (7*17 + 1) == 2280)
assert(23 * (6*19 + 1) == 2645)
assert(1 * (23 + 1) == 24)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_32 (Conv1D)           (None, 66, 13)            6708      
_________________________________________________________________
conv1d_33 (Conv1D)           (None, 63, 17)            901       
_________________________________________________________________
max_pooling1d_9 (MaxPooling1 (None, 21, 17)            0         
_________________________________________________________________
conv1d_34 (Conv1D)           (None, 15, 19)            2280      
_________________________________________________________________
conv1d_35 (Conv1D)           (None, 10, 23)            2645      
_________________________________________________________________
global_average_pooling1d_8 ( (None, 23)                0         
_________________________________________________________________
dropout_40 (Dropout)         (None, 23)                0         
__________

### Stacked LSTM for sequence classification

In [140]:
data_dim = 11
timesteps = 7
num_classes = 13

# expected input data shape: (batch_size, timesteps, data_dim)
model = Sequential()
model.add(LSTM(31, return_sequences=True,
               input_shape=(timesteps, data_dim)))  # returns a sequence of vectors of dimension 31
model.add(LSTM(37, return_sequences=True))  # returns a sequence of vectors of dimension 37
model.add(LSTM(41))  # return a single vector of dimension 41
model.add(Dense(17, activation='softmax'))

In [150]:
print(model.summary())
# TODO
(5332/31)
172/4

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_9 (LSTM)                (None, 7, 31)             5332      
_________________________________________________________________
lstm_10 (LSTM)               (None, 7, 37)             10212     
_________________________________________________________________
lstm_11 (LSTM)               (None, 41)                12956     
_________________________________________________________________
dense_43 (Dense)             (None, 17)                714       
Total params: 29,214.0
Trainable params: 29,214
Non-trainable params: 0.0
_________________________________________________________________
None


43.0

### Same stacked LSTM model, rendered "stateful"

In [151]:
data_dim = 11
timesteps = 7
num_classes = 13
batch_size = 47

# expected input data shape: (batch_size, timesteps, data_dim)
model = Sequential()
model.add(LSTM(31, return_sequences=True, stateful=True,
               batch_input_shape=(batch_size, timesteps, data_dim)))  # returns a sequence of vectors of dimension 31
model.add(LSTM(37, return_sequences=True, stateful=True))  # returns a sequence of vectors of dimension 37
model.add(LSTM(41, stateful=True))  # return a single vector of dimension 41
model.add(Dense(17, activation='softmax'))

In [None]:
data_dim = 16
timesteps = 8
num_classes = 10
batch_size = 32

# Expected input batch shape: (batch_size, timesteps, data_dim)
# Note that we have to provide the full batch_input_shape since the network is stateful.
# the sample of index i in batch k is the follow-up for the sample i in batch k-1.
model = Sequential()
model.add(LSTM(32, return_sequences=True, stateful=True,
               batch_input_shape=(batch_size, timesteps, data_dim)))
model.add(LSTM(32, return_sequences=True, stateful=True))
model.add(LSTM(32, stateful=True))
model.add(Dense(10, activation='softmax'))

In [153]:
print(model.summary())
# TODO


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_12 (LSTM)               (47, 7, 31)               5332      
_________________________________________________________________
lstm_13 (LSTM)               (47, 7, 37)               10212     
_________________________________________________________________
lstm_14 (LSTM)               (47, 41)                  12956     
_________________________________________________________________
dense_44 (Dense)             (47, 17)                  714       
Total params: 29,214.0
Trainable params: 29,214
Non-trainable params: 0.0
_________________________________________________________________
None


Reference: 
- https://keras.io/getting-started/sequential-model-guide/