In [39]:
import numpy as np
import chainer
from chainer import cuda, Function, gradient_check, report, training, utils, Variable
from chainer import datasets, iterators, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions
from chainer.dataset import concat_examples
from chainer.backends.cuda import to_cpu


In [40]:
a = L.Linear(2, 3)

In [107]:
print(a.W)
print(a.W.data.shape)
mask = np.array([[0,1],[0,1], [0,1]])
print(mask)
print(mask.shape)
print(a.W * mask)

variable W([[-1.0434684  -0.27513197]
            [ 0.61474335 -0.2853942 ]
            [-0.5197403  -0.77510285]])
(3, 2)
[[0 1]
 [0 1]
 [0 1]]
(3, 2)
variable([[-0.         -0.27513197]
          [ 0.         -0.2853942 ]
          [-0.         -0.77510285]])


In [75]:
x_data = np.array([5], dtype=np.float32)
x = Variable(x_data)
x

variable([5.])

In [78]:
y = (x**2 - 2 * x + 1)
y

variable([16.])

In [79]:
y.backward()
x.grad

array([8.], dtype=float32)

In [41]:
batchsize = 128

mask = np.zeros((batchsize,2))
mask[:,1] = 1
# print(mask)

In [51]:
# Network definition
class MLP(chainer.Chain):

    def __init__(self, n_units, n_out):
        super(MLP, self).__init__()
        with self.init_scope():
            # the size of the inputs to each layer will be inferred
            self.l1 = L.Linear(None, n_units)  # n_in -> n_units
            self.l2 = L.Linear(None, n_units)  # n_units -> n_units
            self.l3 = L.Linear(None, n_out)  # n_units -> n_out

    def __call__(self, x, val_flag=False):
        h1 = F.relu(self.l1(x))
        h2 = F.relu(self.l2(h1))
        
        mask = np.zeros((h2.shape[0], 2))
        mask[:,1] = 1
        h2 = h2 * mask
    
        if val_flag:
            print("\n")
            print(self.l2.W)
            print(self.l2.b)
            print("\n")
        
        return self.l3(h2)

In [52]:
# Load the MNIST dataset
train, test = chainer.datasets.get_mnist()

In [54]:
batchsize = 128
epoch = 10
gpu = -1
unit = 2
out = 'result'
plot = True

model = MLP(unit, 10)

if gpu >= 0:
    # Make a specified GPU current
    chainer.cuda.get_device_from_id(gpu).use()
    model.to_gpu()  # Copy the model to the GPU
    
#  Setup an optimizer
optimizer = chainer.optimizers.Adam()
optimizer.setup(model)

train_iter = chainer.iterators.SerialIterator(train, batchsize)
test_iter = chainer.iterators.SerialIterator(test, batchsize,
                                             repeat=False, shuffle=False)

while train_iter.epoch < epoch:

    # ---------- One iteration of the training loop ----------
    train_batch = train_iter.next()
    image_train, target_train = concat_examples(train_batch, gpu)

    # Calculate the prediction of the network
    prediction_train = model(image_train)

    # Calculate the loss with softmax_cross_entropy
    loss = F.softmax_cross_entropy(prediction_train, target_train)

    # Calculate the gradients in the network
    model.cleargrads()
    loss.backward()

    # Update all the trainable paremters
    optimizer.update()
    # --------------------- until here ---------------------

    # Check the validation accuracy of prediction after every epoch
    if train_iter.is_new_epoch:  # If this iteration is the final iteration of the current epoch

        # Display the training loss
        print('\n\n\nepoch:{:02d} train_loss:{:.04f} '.format(
            train_iter.epoch, float(to_cpu(loss.data))))

        test_losses = []
        test_accuracies = []
        while True:
            test_batch = test_iter.next()
            image_test, target_test = concat_examples(test_batch, gpu)

            # Forward the test data
            prediction_test = model(image_test)

            # Calculate the loss
            loss_test = F.softmax_cross_entropy(prediction_test, target_test)
            test_losses.append(to_cpu(loss_test.data))

            # Calculate the accuracy
            accuracy = F.accuracy(prediction_test, target_test)
            accuracy.to_cpu()
            test_accuracies.append(accuracy.data)

            if test_iter.is_new_epoch:
                test_iter.epoch = 0
                test_iter.current_position = 0
                test_iter.is_new_epoch = False
                test_iter._pushed_position = None
                prediction_test = model(image_test, val_flag=True)
                break

        print('val_loss:{:.04f} val_accuracy:{:.04f}'.format(
            np.mean(test_losses), np.mean(test_accuracies)))




epoch:01 train_loss:1.9921 


variable W([[ 1.747008   0.5278429]
            [-1.1458106  0.3740262]])
variable b([0.         0.20723538])


val_loss:2.0196 val_accuracy:0.2144



epoch:02 train_loss:2.0277 


variable W([[ 1.747008    0.5278429 ]
            [-1.2352842   0.42442495]])
variable b([0.         0.30676216])


val_loss:1.9517 val_accuracy:0.2248



epoch:03 train_loss:1.8144 


variable W([[ 1.747008    0.5278429 ]
            [-1.284077    0.44787654]])
variable b([0.         0.38972387])


val_loss:1.9061 val_accuracy:0.2399



epoch:04 train_loss:1.8028 


variable W([[ 1.747008    0.5278429 ]
            [-1.3090727   0.46080175]])
variable b([0.         0.45676038])


val_loss:1.8653 val_accuracy:0.2552



epoch:05 train_loss:1.6732 


variable W([[ 1.747008    0.5278429 ]
            [-1.3210577   0.47087708]])
variable b([0.         0.51617163])


val_loss:1.8342 val_accuracy:0.2678



epoch:06 train_loss:1.7659 


variable W([[ 1.747008    0.5278429 ]
        

# Max Pooling and Upsampling (Unpooling)

In [4]:
x = np.arange(1, 26).reshape(1, 1, 5, 5).astype(np.float32)
x = chainer.Variable(x)
print(x.data.shape)
x.data

(1, 1, 5, 5)


array([[[[ 1.,  2.,  3.,  4.,  5.],
         [ 6.,  7.,  8.,  9., 10.],
         [11., 12., 13., 14., 15.],
         [16., 17., 18., 19., 20.],
         [21., 22., 23., 24., 25.]]]], dtype=float32)

In [5]:
p = F.max_pooling_2d(x, ksize=2)
print(p.shape)
p

(1, 1, 3, 3)


variable([[[[ 7.,  9., 10.],
            [17., 19., 20.],
            [22., 24., 25.]]]])

In [14]:
p_unpooled = F.unpooling_2d(p, ksize=2, cover_all=False)
print(p_unpooled.data.shape)
p_unpooled

(1, 1, 6, 6)


variable([[[[ 8.,  8., 10., 10., 12., 12.],
            [ 8.,  8., 10., 10., 12., 12.],
            [20., 20., 22., 22., 24., 24.],
            [20., 20., 22., 22., 24., 24.],
            [32., 32., 34., 34., 36., 36.],
            [32., 32., 34., 34., 36., 36.]]]])