In [15]:
!which python

/home/wei/anaconda3/envs/chainercv/bin/python


In [1]:
import numpy as np
import chainer
from chainer import cuda, Function, gradient_check, report, training, utils, Variable
from chainer import datasets, iterators, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions

In [2]:
?chainer.cuda.get_device_from_id

In [40]:
x_data = np.array([5], dtype=np.float32)

In [41]:
x = Variable(x_data)

In [42]:
x

variable([5.])

In [43]:
 y = x**2 - 2 * x + 1

In [24]:
y

variable([16.])

In [25]:
y.data

array([16.], dtype=float32)

In [26]:
x.data

array([5.], dtype=float32)

In [27]:
y.backward()

In [28]:
x.grad

array([8.], dtype=float32)

In [47]:
x

variable([5.])

In [13]:
z = 2*x
y = x**2 - z + 1
y.backward(retain_grad=True)
z.grad

array([-1.], dtype=float32)

In [14]:
x.grad

array([16.], dtype=float32)

In [63]:
x = Variable(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32))
y = x**2 - 2*x + 1
y.grad = np.ones((2,3), dtype=np.float32)
y.backward()
x.grad

array([[ 0.,  2.,  4.],
       [ 6.,  8., 10.]], dtype=float32)

In [57]:
f = L.Linear(3, 2)

In [58]:
f.W.data

array([[-0.57954234,  0.68946636,  0.3200392 ],
       [ 0.794923  , -0.8238686 , -0.02581505]], dtype=float32)

In [59]:
f.b.data

array([0., 0.], dtype=float32)

In [67]:
x = Variable(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32))
y = f(x)
y.data

array([[ 1.759508 , -0.9302593],
       [ 3.049398 , -1.0945411]], dtype=float32)

In [68]:
f.cleargrads()

In [69]:
y.grad = np.ones((2, 2), dtype=np.float32)
y.backward()
f.W.grad

array([[5., 7., 9.],
       [5., 7., 9.]], dtype=float32)

Gradient is cummulative by the number of input

In [70]:
f.b.grad

array([2., 2.], dtype=float32)

In [71]:
y.grad

array([[1., 1.],
       [1., 1.]], dtype=float32)

In [72]:
l1 = L.Linear(4, 3)
l2 = L.Linear(3, 2)
def my_forward(x):
    h = l1(x)
    return l2(h)

In [73]:
x = Variable(np.array([[1, 2, 3, 4], [4, 5, 6, 6]], dtype=np.float32))

In [74]:
y = my_forward(x)
y

variable([[-0.37159705,  2.8304615 ],
          [-0.12051916,  6.6296334 ]])

In [75]:
class MyProc(object):
    def __init__(self):
        self.l1 = L.Linear(4, 3)
        self.l2 = L.Linear(3, 2)
    def forward(self, x):
        h = self.l1(x)
        return self.l2(h)

In [76]:
class MyChain(Chain):
    def __init__(self):
        super(MyChain, self).__init__()
        with self.init_scope():
            self.l1 = L.Linear(4, 3)
            self.l2 = L.Linear(3, 2)
    def __call__(self, x):
        h = self.l1(x)
        return self.l2(h)

In [90]:
model = MyChain()
optimizer = optimizers.SGD()
optimizer.setup(model)

In [91]:
optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005))

In [92]:
x = np.random.uniform(-1, 1, (2, 4)).astype('f')
model.cleargrads()
loss = F.sum(model(chainer.Variable(x)))
loss.backward()
optimizer.update()

In [94]:
def lossfun(arg1, arg2):
    # calculate loss
    loss = F.sum(model(arg1 - arg2))
    return loss
arg1 = np.random.uniform(-1, 1, (2, 4)).astype('f')
arg2 = np.random.uniform(-1, 1, (2, 4)).astype('f')
optimizer.update(lossfun, chainer.Variable(arg1), chainer.Variable(arg2))


In [95]:
serializers.save_npz('my.model', model)

In [96]:
serializers.load_npz('my.model', model)

# MNIST example

In [2]:
train, test = datasets.get_mnist()

In [3]:
train_iter = iterators.SerialIterator(train, batch_size=100, shuffle=True)
test_iter = iterators.SerialIterator(test, batch_size=100, repeat=False, shuffle=False)


In [4]:
class MLP(Chain):
    def __init__(self, n_units, n_out):
        super(MLP, self).__init__()
        with self.init_scope():
            # the size of the inputs to each layer will be inferred
            self.l1 = L.Linear(None, n_units)  # n_in -> n_units
            self.l2 = L.Linear(None, n_units)  # n_units -> n_units
            self.l3 = L.Linear(None, n_out)    # n_units -> n_out
    def __call__(self, x):
        h1 = F.relu(self.l1(x))
        h2 = F.relu(self.l2(h1))
        y = self.l3(h2)
        return y

In [5]:
class Classifier(Chain):
    def __init__(self, predictor):
        super(Classifier, self).__init__()
        with self.init_scope():
            self.predictor = predictor
    def __call__(self, x, t):
        y = self.predictor(x)
        loss = F.softmax_cross_entropy(y, t)
        accuracy = F.accuracy(y, t)
        report({'loss': loss, 'accuracy': accuracy}, self)
        return loss

In [6]:
chainer.cuda.get_device_from_id(0).use()

In [7]:
model = L.Classifier(MLP(100, 10))  # the input size, 784, is inferred
model.to_gpu()
optimizer = optimizers.SGD()
optimizer.setup(model)

In [8]:
updater = training.StandardUpdater(train_iter, optimizer, device=0)
trainer = training.Trainer(updater, (20, 'epoch'), out='result')

In [9]:
trainer.extend(extensions.Evaluator(test_iter, model, device=0))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(['epoch', 'main/accuracy', 'validation/main/accuracy']))
trainer.extend(extensions.ProgressBar())

In [10]:
trainer.run()

epoch       main/accuracy  validation/main/accuracy
[J     total [..................................................]  0.83%
this epoch [########..........................................] 16.67%
       100 iter, 0 epoch / 20 epochs
       inf iters/sec. Estimated time to finish: 0:00:00.
[4A[J     total [..................................................]  1.67%
this epoch [################..................................] 33.33%
       200 iter, 0 epoch / 20 epochs
    401.23 iters/sec. Estimated time to finish: 0:00:29.409495.
[4A[J     total [#.................................................]  2.50%
this epoch [#########################.........................] 50.00%
       300 iter, 0 epoch / 20 epochs
    398.15 iters/sec. Estimated time to finish: 0:00:29.385862.
[4A[J     total [#.................................................]  3.33%
this epoch [#################################.................] 66.67%
       400 iter, 0 epoch / 20 epochs
    398.33 iters/sec. E

[4A[J     total [#############.....................................] 27.50%
this epoch [#########################.........................] 50.00%
      3300 iter, 5 epoch / 20 epochs
    363.14 iters/sec. Estimated time to finish: 0:00:23.957525.
[4A[J     total [##############....................................] 28.33%
this epoch [#################################.................] 66.67%
      3400 iter, 5 epoch / 20 epochs
     363.4 iters/sec. Estimated time to finish: 0:00:23.665319.
[4A[J     total [##############....................................] 29.17%
this epoch [#########################################.........] 83.33%
      3500 iter, 5 epoch / 20 epochs
    363.18 iters/sec. Estimated time to finish: 0:00:23.404688.
[4A[J6           0.917167       0.9233                    
[J     total [###############...................................] 30.00%
this epoch [..................................................]  0.00%
      3600 iter, 6 epoch / 20 epochs
    359

[4A[J     total [###########################.......................] 54.17%
this epoch [#########################################.........] 83.33%
      6500 iter, 10 epoch / 20 epochs
    364.66 iters/sec. Estimated time to finish: 0:00:15.082336.
[4A[J11          0.934334       0.9385                    
[J     total [###########################.......................] 55.00%
this epoch [..................................................]  0.00%
      6600 iter, 11 epoch / 20 epochs
    362.48 iters/sec. Estimated time to finish: 0:00:14.897572.
[4A[J     total [###########################.......................] 55.83%
this epoch [########..........................................] 16.67%
      6700 iter, 11 epoch / 20 epochs
    363.05 iters/sec. Estimated time to finish: 0:00:14.598683.
[4A[J     total [############################......................] 56.67%
this epoch [################..................................] 33.33%
      6800 iter, 11 epoch / 20 epochs
   

[4A[J     total [########################################..........] 80.83%
this epoch [########..........................................] 16.67%
      9700 iter, 16 epoch / 20 epochs
     364.2 iters/sec. Estimated time to finish: 0:00:06.315246.
[4A[J     total [########################################..........] 81.67%
this epoch [################..................................] 33.33%
      9800 iter, 16 epoch / 20 epochs
    364.47 iters/sec. Estimated time to finish: 0:00:06.036180.
[4A[J     total [#########################################.........] 82.50%
this epoch [#########################.........................] 50.00%
      9900 iter, 16 epoch / 20 epochs
    364.79 iters/sec. Estimated time to finish: 0:00:05.756687.
[4A[J     total [#########################################.........] 83.33%
this epoch [#################################.................] 66.67%
     10000 iter, 16 epoch / 20 epochs
    365.12 iters/sec. Estimated time to finish: 0:00:05.4776

In [14]:
from chainercv.datasets.cub.cub_label_dataset import CUBLabelDataset

In [15]:
data = CUBLabelDataset(return_bb=True)

Downloading ...
From: http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/CUB_200_2011.tgz
To: /home/wei/.chainer/dataset/_dl_cache/92ddffe2095c104aeae52e33f8eaf8a5
  %   Total    Recv       Speed  Time left
100 1097MiB 1097MiB   6788KiB/s    0:00:00Downloading ...
From: http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/segmentations.tgz
To: /home/wei/.chainer/dataset/_dl_cache/c641dacd8933196befce49e4c9d05842
  %   Total    Recv       Speed  Time left
100   37MiB   37MiB   7004KiB/s    0:00:00