In [1]:
import mxnet as mx

def get_iterators(batch_size, data_shape=(3, 224, 224)):
    train = mx.io.ImageRecordIter(
#         path_imgrec         = '/data4/srip_face/img/jump_detector/old/jump_train_old.rec', 
        path_imgrec         = '/data4/srip_face/img/jump_detector/jump_train.rec', 
        data_name           = 'data',
        label_name          = 'softmax_label',
        batch_size          = batch_size,
        data_shape          = data_shape,
        shuffle             = True,
        rand_crop           = True,
        rand_mirror         = True)
    val = mx.io.ImageRecordIter(
#         path_imgrec         = '/data4/srip_face/img/jump_detector/old/jump_valid_old.rec',
        path_imgrec         = '/data4/srip_face/img/jump_detector/jump_valid.rec',
        data_name           = 'data',
        label_name          = 'softmax_label',
        batch_size          = batch_size,
        data_shape          = data_shape,
        rand_crop           = False,
        rand_mirror         = False)
    return (train, val)

In [2]:
import os
import numpy as np

class WeightedLogisticRegression(mx.operator.CustomOp):
    def __init__(self, neg_w, pos_w):
        self.neg_w = float(neg_w)
        self.pos_w = float(pos_w)
    def forward(self, is_train, req, in_data, out_data, aux):
        x = in_data[0].asnumpy()
        y = np.exp(x - x.max(axis=1).reshape((x.shape[0], 1)))
        y /= y.sum(axis=1).reshape((x.shape[0], 1))
        self.assign(out_data[0], req[0], mx.nd.array(y))
    def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
        l = in_data[1].asnumpy().ravel().astype(np.int)
        y = out_data[0].asnumpy()
        
        w0 = self.neg_w
        w1 = self.pos_w
        
        # fg wrong
        mask0 = np.ones(y.shape, dtype=float)
        mask0[:,0] = 0.0
        mask0[:,l] = 0.0
        
        # bg wrong
        mask1 = np.ones(y.shape, dtype=float)
        mask1[:,1] = 0.0
        mask1[:,l] = 0.0
        
        # fg right
        mask2 = np.zeros(y.shape, dtype=float)
        mask2[:,l] = 1.0
        mask2[:,0] = 0.0
        
        # bg right
        mask3 = np.zeros(y.shape, dtype=float)
        mask3[:,l] = 1.0
        mask3[:,1] = 0.0
        
        mask = mask0*w0 + mask1*w1 + mask2*w1 + mask3*w0
        
        y[np.arange(l.shape[0]), l] -= 1.0
        y *= mask
        self.assign(in_grad[0], req[0], mx.nd.array(y))

@mx.operator.register("weighted_logistic_regression")
class WeightedLogisticRegressionProp(mx.operator.CustomOpProp):
    def __init__(self, neg_w, pos_w):
        self.neg_w = float(neg_w)
        self.pos_w = float(pos_w)
        super(WeightedLogisticRegressionProp, self).__init__(False)
    def list_arguments(self):
        return ['data', 'label']
    def list_outputs(self):
        return ['output']
    def infer_type(self, in_type):
        dtype = in_type[0]
        return [dtype, dtype], [dtype], []
    def infer_shape(self, in_shape):
        data_shape = in_shape[0]
        label_shape = (in_shape[0][0],)
        output_shape = in_shape[0]
        return [data_shape, label_shape], [output_shape], []
    def create_operator(self, ctx, shapes, dtypes):
        return WeightedLogisticRegression(self.neg_w, self.pos_w)

In [3]:
sym, arg_params, aux_params = mx.model.load_checkpoint('caffenet', 0)

In [4]:
def get_fine_tune_model(symbol, arg_params, num_classes, layer_name='drop7'):
    """
    symbol: the pre-trained network symbol
    arg_params: the argument parameters of the pre-trained model
    num_classes: the number of classes for the fine-tune datasets
    layer_name: the layer name before the last fully-connected layer
    """
    all_layers = symbol.get_internals()
    net = all_layers[layer_name+'_output']
    net = mx.symbol.FullyConnected(data=net, num_hidden=num_classes, name='fc8')
    net = mx.sym.Custom(data=net, name = 'softmax', neg_w=0.5, pos_w=1.0,
                        op_type = 'weighted_logistic_regression')
#     net = mx.symbol.SoftmaxOutput(data=net, name='softmax')
    freeze = [k for k in arg_params if 'fc8' or 'softmax' not in k]
    new_args = dict({k:arg_params[k] for k in arg_params if 'fc8' not in k})
    return (net, new_args, freeze)

In [5]:
import logging
head = '%(asctime)-15s %(message)s'
logging.basicConfig(level=logging.DEBUG, format=head)

def fit(symbol, arg_params, aux_params, train, val, batch_size, num_gpus=1, num_epoch=1, fixed_param_names=[]):
    devs = [mx.gpu(i) for i in range(num_gpus)] # replace mx.gpu by mx.cpu for CPU training
    mod = mx.mod.Module(symbol=symbol, context=devs, fixed_param_names=fixed_param_names)
    mod.bind(data_shapes=train.provide_data, label_shapes=train.provide_label)
    mod.init_params(initializer=mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=1))
    mod.set_params(arg_params, aux_params, allow_missing=True)
    
    mod.fit(train, val, 
        num_epoch=num_epoch,
        batch_end_callback = mx.callback.log_train_metric(100,True),
        epoch_end_callback = mx.callback.do_checkpoint("caffenet-mxnet-jump-start-weighted", 1),
        kvstore='device',
        optimizer='sgd',
        optimizer_params={'learning_rate':0.0009},
        eval_metric='acc')
#        eval_metric=mx.metric.CrossEntropy())
    
    return mod

In [6]:
num_classes = 2 # This is binary classification
batch_per_gpu = 128
num_gpus = 1
epoch = 30
(new_sym, new_args, freeze) = get_fine_tune_model(sym, arg_params, num_classes)

batch_size = batch_per_gpu * num_gpus
(train, val) = get_iterators(batch_size)
mod = fit(new_sym, new_args, aux_params, train, val, batch_size, num_gpus, epoch)
metric = mx.metric.Accuracy()
mod_score = mod.score(val, metric)
print mod_score

2017-09-18 12:45:10,900 Already bound, ignoring bind()
  allow_missing=allow_missing, force_init=force_init)
2017-09-18 12:45:21,547 Iter[0] Batch[0] Train-accuracy=0.273438
2017-09-18 12:45:43,663 Iter[0] Batch[100] Train-accuracy=0.794766
2017-09-18 12:46:05,702 Iter[0] Batch[200] Train-accuracy=0.822812
2017-09-18 12:46:11,426 Epoch[0] Train-accuracy=0.824820
2017-09-18 12:46:11,431 Epoch[0] Time cost=57.420
2017-09-18 12:46:14,336 Saved checkpoint to "caffenet-mxnet-jump-start-weighted-0001.params"
2017-09-18 12:46:17,880 Epoch[0] Validation-accuracy=0.790956
2017-09-18 12:46:17,984 Iter[1] Batch[0] Train-accuracy=0.804688
2017-09-18 12:46:40,018 Iter[1] Batch[100] Train-accuracy=0.832578
2017-09-18 12:47:02,039 Iter[1] Batch[200] Train-accuracy=0.842187
2017-09-18 12:47:07,758 Epoch[1] Train-accuracy=0.836238
2017-09-18 12:47:07,761 Epoch[1] Time cost=49.878
2017-09-18 12:47:10,562 Saved checkpoint to "caffenet-mxnet-jump-start-weighted-0002.params"
2017-09-18 12:47:13,492 Epoch[1

2017-09-18 13:01:32,562 Iter[17] Batch[100] Train-accuracy=0.890781
2017-09-18 13:01:54,598 Iter[17] Batch[200] Train-accuracy=0.890234
2017-09-18 13:02:00,096 Epoch[17] Train-accuracy=0.890312
2017-09-18 13:02:00,098 Epoch[17] Time cost=49.693
2017-09-18 13:02:02,732 Saved checkpoint to "caffenet-mxnet-jump-start-weighted-0018.params"
2017-09-18 13:02:05,655 Epoch[17] Validation-accuracy=0.855824
2017-09-18 13:02:05,758 Iter[18] Batch[0] Train-accuracy=0.921875
2017-09-18 13:02:27,786 Iter[18] Batch[100] Train-accuracy=0.892578
2017-09-18 13:02:49,813 Iter[18] Batch[200] Train-accuracy=0.896016
2017-09-18 13:02:55,534 Epoch[18] Train-accuracy=0.893930
2017-09-18 13:02:55,536 Epoch[18] Time cost=49.879
2017-09-18 13:02:58,173 Saved checkpoint to "caffenet-mxnet-jump-start-weighted-0019.params"
2017-09-18 13:03:01,001 Epoch[18] Validation-accuracy=0.847900
2017-09-18 13:03:01,102 Iter[19] Batch[0] Train-accuracy=0.914062
2017-09-18 13:03:23,124 Iter[19] Batch[100] Train-accuracy=0.89585

[('accuracy', 0.8603219696969697)]


In [7]:
# prefix = 'caffenet-mxnet-jump-start-128-bootstrap'
# mc = mod.save_checkpoint(prefix, epoch)