In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras

1.regularization

In [4]:
[name for name in dir(keras.regularizers) if not name.startswith('_')]

['L1',
 'L1L2',
 'L2',
 'OrthogonalRegularizer',
 'Regularizer',
 'deserialize',
 'get',
 'l1',
 'l1_l2',
 'l2',
 'orthogonal_regularizer',
 'serialize']

In [5]:
r_l1=keras.regularizers.l1(l1=0.01)
r_l2=keras.regularizers.l2(l2=0.01)

r_l1_l2=keras.regularizers.l1_l2(l1=0.01,l2=0.01)

ex1

In [6]:
#Load data
from tensorflow.keras.datasets import fashion_mnist
(x_train_set,y_train_set),(x_test,y_test)=fashion_mnist.load_data()
#Split data
from sklearn.model_selection import train_test_split
x_train,x_valid,y_train,y_valid=train_test_split(
    x_train_set,y_train_set,random_state=1)
#Preprocessing
pixel_means=x_train.mean(axis=0,keepdims=True)
pixel_stds=x_train.std(axis=0,keepdims=True)
x_train_scaled=(x_train-pixel_means)/pixel_stds
x_valid_scaled=(x_valid-pixel_means)/pixel_stds
x_test_scaled=(x_test-pixel_means)/pixel_stds

In [7]:
tf.keras.backend.clear_session()
np.random.seed(1)
tf.random.set_seed(1)

In [8]:
r_l2=keras.regularizers.l2(l2=0.01)
model=keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.Dense(300,activation='relu',kernel_initializer='he_normal',
                      kernel_regularizer=r_l2),
    keras.layers.Dense(100,activation='relu',kernel_initializer='he_normal',
                      kernel_regularizer=r_l2),
    keras.layers.Dense(10,activation='softmax',
                      kernel_regularizer=r_l2),
])

In [9]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='nadam',metrics=['accuracy'])

In [10]:
train=model.fit(x_train_scaled,y_train,
                epochs=2,
                validation_data=(x_valid_scaled,y_valid))

Epoch 1/2
Epoch 2/2


2.Dropout

In [11]:
model=keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(300,activation='relu',kernel_initializer='he_normal',
                      kernel_regularizer=r_l2),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(100,activation='relu',kernel_initializer='he_normal',
                      kernel_regularizer=r_l2),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(10,activation='softmax',
                      kernel_regularizer=r_l2),
])

In [12]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='nadam',metrics=['accuracy'])

In [13]:
train=model.fit(x_train_scaled,y_train,
                epochs=2,
                validation_data=(x_valid_scaled,y_valid))

Epoch 1/2
Epoch 2/2


In [14]:
model.evaluate(x_test,y_test)



[25.002761840820312, 0.6822999715805054]

In [15]:
y_proba=model.predict(x_test_scaled)
y_proba[:3].round(2)



array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.05, 0.  , 0.33, 0.  , 0.61],
       [0.  , 0.  , 0.81, 0.  , 0.06, 0.  , 0.13, 0.  , 0.  , 0.  ],
       [0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ]],
      dtype=float32)

In [16]:
y_pred=np.argmax(y_proba,axis=1)
y_pred[:3]

array([9, 2, 1])

In [17]:
y_test[:3]

array([9, 2, 1], dtype=uint8)

3.MC Dropout

In [18]:
y_probs=np.stack([model(x_test_scaled[:3],training=True) for _ in range(100)])
y_probs.shape

(100, 3, 10)

In [19]:
y_proba_mc=y_probs.mean(axis=0)
np.round(y_proba_mc,2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.07, 0.  , 0.33, 0.  , 0.59],
       [0.01, 0.  , 0.76, 0.  , 0.08, 0.  , 0.15, 0.  , 0.  , 0.  ],
       [0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ]],
      dtype=float32)

In [20]:
y_pred=np.argmax(y_proba_mc,axis=1)
y_pred

array([9, 2, 1])

Alpha dropout

In [21]:
model=keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.AlphaDropout(rate=0.2),
    keras.layers.Dense(300,activation='selu',kernel_initializer='lecun_normal'),
    keras.layers.AlphaDropout(rate=0.2),
    keras.layers.Dense(100,activation='selu',kernel_initializer='lecun_normal'),
    keras.layers.AlphaDropout(rate=0.2),
    keras.layers.Dense(10,activation='softmax')
])

In [22]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='nadam',metrics=['accuracy'])

In [23]:
train=model.fit(x_train_scaled,y_train,
                epochs=2,
                validation_data=(x_valid_scaled,y_valid))

Epoch 1/2
Epoch 2/2


4.Max-Norm Regularization

In [24]:
model=keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.AlphaDropout(rate=0.2),
    keras.layers.Dense(300,activation='selu',kernel_initializer='lecun_normal',
                      kernel_constraint=keras.constraints.max_norm(1.)),
    keras.layers.AlphaDropout(rate=0.2),
    keras.layers.Dense(100,activation='selu',kernel_initializer='lecun_normal',
                      kernel_constraint=keras.constraints.max_norm(1.)),
    keras.layers.AlphaDropout(rate=0.2),
    keras.layers.Dense(10,activation='softmax')
])

In [25]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='nadam',metrics=['accuracy'])

In [26]:
train=model.fit(x_train_scaled,y_train,
                epochs=2,
                validation_data=(x_valid_scaled,y_valid))

Epoch 1/2
Epoch 2/2


Transfer learning

In [27]:
#load data
from tensorflow.keras.datasets import fashion_mnist 
(x_train_set,y_train_set),(x_test_set,y_tesr_set)=fashion_mnist.load_data()
#split data
from sklearn.model_selection import train_test_split
x_train,x_valid,y_train,y_valid=train_test_split(
x_train_set,y_train_set,random_state=1)
#preprocessing
x_train=x_train/255.0
x_valid=x_valid/255.0
x_test=x_test/255.0

把資料切成A跟B，模擬資料少的情況

In [104]:
def split_data(x,y):
    ind_B=(y==4)|(y==5)
    y_B=(y[ind_B]==5).astype(np.float32)
    y_A=y[~ind_B]
    y_A[y_A>5]-=2
    return (x[~ind_B],y_A),(x[ind_B],y_B)

(x_train_A,y_train_A),(x_train_B,y_train_B)=split_data(x_train,y_train)
(x_valid_A,y_valid_A),(x_valid_B,y_valid_B)=split_data(x_valid,y_valid)
(x_test_A,y_test_A),(x_test_B,y_test_B)=split_data(x_test,y_test)

x_train_B,y_train_B=x_train_B[:100],y_train_B[:100]

In [105]:
keras.backend.clear_session()
np.random.seed(1)
tf.random.set_seed(1)

In [106]:
model_A=keras.models.Sequential()
model_A.add(keras.layers.Flatten(input_shape=[28,28]))
for hidden_i in(200,150,100,50):
    model_A.add(keras.layers.Dense(hidden_i,activation='relu'))
model_A.add(keras.layers.Dense(8,activation='softmax'))

In [107]:
model_A.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 200)               157000    
                                                                 
 dense_1 (Dense)             (None, 150)               30150     
                                                                 
 dense_2 (Dense)             (None, 100)               15100     
                                                                 
 dense_3 (Dense)             (None, 50)                5050      
                                                                 
 dense_4 (Dense)             (None, 8)                 408       
                                                                 
Total params: 207708 (811.36 KB)
Trainable params: 20770

In [108]:
model_A.compile(loss='sparse_categorical_crossentropy',
              optimizer=tf.keras.optimizers.legacy.SGD(learning_rate=1e-3),metrics=['accuracy'])

In [109]:
train=model_A.fit(x_train_A,y_train_A,epochs=20,
                validation_data=(x_valid_A,y_valid_A))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [110]:
model_A.evaluate(x_test_A,y_test_A)



[0.3855954706668854, 0.8651250004768372]

In [111]:
model_A.save('model_A.h5')

In [112]:
model_B=keras.models.Sequential()
model_B.add(keras.layers.Flatten(input_shape=[28,28]))
for hidden_i in(300,150,100,80):
    model_B.add(keras.layers.Dense(hidden_i,activation='relu'))
model_B.add(keras.layers.Dense(1,activation='sigmoid'))

In [113]:
model_B.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 dense_5 (Dense)             (None, 300)               235500    
                                                                 
 dense_6 (Dense)             (None, 150)               45150     
                                                                 
 dense_7 (Dense)             (None, 100)               15100     
                                                                 
 dense_8 (Dense)             (None, 80)                8080      
                                                                 
 dense_9 (Dense)             (None, 1)                 81        
                                                                 
Total params: 303911 (1.16 MB)
Trainable params: 30391

In [114]:
model_B.compile(loss='binary_crossentropy',
                optimizer=tf.keras.optimizers.legacy.SGD(learning_rate=1e-3),metrics=['accuracy'])

In [115]:
train=model_B.fit(x_train_B,y_train_B,epochs=20,
                 validation_data=(x_valid_B,y_valid_B))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [116]:
model_B.evaluate(x_test_B,y_test_B)



[0.5927383899688721, 0.6309999823570251]

In [117]:
model_A=keras.models.load_model('model_A.h5')

In [118]:
model_tr=keras.models.Sequential(model_A.layers[:-2])

In [119]:
model_tr.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 200)               157000    
                                                                 
 dense_1 (Dense)             (None, 150)               30150     
                                                                 
 dense_2 (Dense)             (None, 100)               15100     
                                                                 
Total params: 202250 (790.04 KB)
Trainable params: 202250 (790.04 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [120]:
model_tr.add(keras.layers.Dense(80,activation='relu'))
model_tr.add(keras.layers.Dense(1,activation='sigmoid'))

In [121]:
model_tr.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 200)               157000    
                                                                 
 dense_1 (Dense)             (None, 150)               30150     
                                                                 
 dense_2 (Dense)             (None, 100)               15100     
                                                                 
 dense_10 (Dense)            (None, 80)                8080      
                                                                 
 dense_11 (Dense)            (None, 1)                 81        
                                                                 
Total params: 210411 (821.92 KB)
Trainable params: 210

In [122]:
for layer in model_tr.layers[:-2]:
    layer.trainable=False

In [123]:
model_tr.summary(show_trainable=True)

Model: "sequential_2"
____________________________________________________________________________
 Layer (type)                Output Shape              Param #   Trainable  
 flatten (Flatten)           (None, 784)               0         N          
                                                                            
 dense (Dense)               (None, 200)               157000    N          
                                                                            
 dense_1 (Dense)             (None, 150)               30150     N          
                                                                            
 dense_2 (Dense)             (None, 100)               15100     N          
                                                                            
 dense_10 (Dense)            (None, 80)                8080      Y          
                                                                            
 dense_11 (Dense)            (None, 1)                

In [124]:
model_tr.compile(loss='binary_crossentropy',
                 optimizer=tf.keras.optimizers.legacy.SGD(learning_rate=1e-3),metrics=['accuracy'])

In [125]:
train=model_tr.fit(x_train_B,y_train_B,epochs=4,
                  validation_data=(x_valid_B,y_valid_B))

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [127]:
for layer in model_tr.layers[:-2]:
    layer.trainable=True

In [128]:
model_tr.summary(show_trainable=True)

Model: "sequential_2"
____________________________________________________________________________
 Layer (type)                Output Shape              Param #   Trainable  
 flatten (Flatten)           (None, 784)               0         Y          
                                                                            
 dense (Dense)               (None, 200)               157000    Y          
                                                                            
 dense_1 (Dense)             (None, 150)               30150     Y          
                                                                            
 dense_2 (Dense)             (None, 100)               15100     Y          
                                                                            
 dense_10 (Dense)            (None, 80)                8080      Y          
                                                                            
 dense_11 (Dense)            (None, 1)                

In [129]:
model_tr.compile(loss='binary_crossentropy',
                 optimizer=tf.keras.optimizers.legacy.SGD(learning_rate=1e-3),metrics=['accuracy'])

In [130]:
train=model_tr.fit(x_train_B,y_train_B,epochs=16,
                  validation_data=(x_valid_B,y_valid_B))

Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


In [131]:
model_tr.evaluate(x_test_B,y_test_B)



[0.33377698063850403, 0.9700000286102295]