# train (Mindspore implementation)

In [2]:
import sys
import os.path
import mindspore
from mindspore import Tensor, nn, Model, context
from mindspore import load_checkpoint, load_param_into_net
from mindspore import ops as P
from mindspore.ops import functional as F
from mindspore.ops import composite as C
from mindspore.common.parameter import ParameterTuple
from mindspore.train.callback import LossMonitor, CheckpointConfig, ModelCheckpoint, TimeMonitor
from mindspore.nn.loss.loss import _Loss
import numpy as np
from tqdm import tqdm
import config
import data
import model
import utils
import mindspore.context as context

# context.set_context(mode=context.PYNATIVE_MODE, device_target='Ascend')
context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU')

In [1]:
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

## Option 1

* Not recommended! Please use option 2 :((((

In [3]:
class NLLLoss(_Loss):
    '''
       NLLLoss function
    '''
    def __init__(self, reduction='mean'):
        super(NLLLoss, self).__init__(reduction)
        self.reduce_sum = P.ReduceSum()
        self.log_softmax = P.LogSoftmax(axis=0)

    def construct(self, logits, label):
        nll = -self.log_softmax(logits)
        loss = self.reduce_sum(nll * label / 10, axis=1).mean()
        return self.get_loss(loss)

class WithLossCell(nn.Cell):
    """
    The cell wrapped with NLL loss, for train only
    """
    def __init__(self, backbone):
        super(WithLossCell, self).__init__(auto_prefix=False)
        self._loss_fn = NLLLoss()
        self._backbone = backbone
        self.reduce_sum = P.ReduceSum()

    def construct(self, v, q, a, item, q_len):
        out = self._backbone(v, q, q_len)
        loss = self._loss_fn(out, a)
        return loss

test:

In [36]:
train_loader = data.get_loader(train=True)
net = model.Net(train_loader.source.num_tokens)
net = WithLossCell(net)
for v, q, a, idx, q_len in train_loader:
    print(v.shape, q.shape, a.shape)
    print(a.shape)
    out = net(v, q, a, idx, q_len)
    print(out)
    break

4.067258


Train now!

In [4]:
from datetime import datetime
name = datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
target_name = os.path.join('logs', '{}.ckpt'.format(name))
print('will save to {}'.format(target_name))
config_as_dict = {k: v for k, v in vars(config).items() if not k.startswith('__')}

train_loader = data.get_loader(train=True)
# val_loader = data.get_loader(val=True)

net = model.Net(train_loader.source.num_tokens)
if config.pretrained:
    param_dict = load_checkpoint(config.pretrained_model_path)
    if param_dict is not None: print("Successfully loaded pretrained model from {}.".format(config.pretrained_model_path))
    load_param_into_net(net, param_dict)

loss_net = WithLossCell(net) # self defined WithLossCell
optimizer = nn.Adam(params=net.trainable_params(), learning_rate=config.initial_lr)
train_net = Model(network=loss_net, optimizer=optimizer) # wrap in model

loss_cb = LossMonitor()
config_ck = CheckpointConfig(save_checkpoint_steps=150, keep_checkpoint_max=1)
ckpoint_cb = ModelCheckpoint(prefix="ms", directory='logs', config=config_ck)
time_cb = TimeMonitor()
callbacks = [time_cb, ckpoint_cb, loss_cb]

train_net.train(epoch=config.epochs, train_dataset=train_loader, callbacks=callbacks, dataset_sink_mode=True)

will save to logs/2021-07-15_11:45:01.ckpt
epoch: 1 step: 1, loss is 2.7623253
epoch: 1 step: 2, loss is 2.8053799
epoch: 1 step: 3, loss is 2.881455
epoch: 1 step: 4, loss is 3.04573
epoch: 1 step: 5, loss is 2.6881871
epoch: 1 step: 6, loss is 2.8143573
epoch: 1 step: 7, loss is 2.986301
epoch: 1 step: 8, loss is 3.119298
epoch: 1 step: 9, loss is 2.9587572
epoch: 1 step: 10, loss is 2.702518
epoch: 1 step: 11, loss is 2.7756617
epoch: 1 step: 12, loss is 2.8608723
epoch: 1 step: 13, loss is 2.9280546
epoch: 1 step: 14, loss is 2.6857605
epoch: 1 step: 15, loss is 2.8104558
epoch: 1 step: 16, loss is 2.4992619
epoch: 1 step: 17, loss is 2.7337642
epoch: 1 step: 18, loss is 3.0796766
epoch: 1 step: 19, loss is 3.15968
epoch: 1 step: 20, loss is 2.9457896
epoch: 1 step: 21, loss is 3.0661964
epoch: 1 step: 22, loss is 2.6680708
epoch: 1 step: 23, loss is 2.9891412
epoch: 1 step: 24, loss is 2.6883287
epoch: 1 step: 25, loss is 3.0324857
epoch: 1 step: 26, loss is 3.0038111
epoch: 1 ste

## Option 2

In [3]:
class TrainOneStepCell(nn.Cell):
    """
    Network training package class.

    Wraps the network with an optimizer. The resulting Cell be trained without inputs.
    Backward graph will be created in the construct function to do parameter updating. Different
    parallel modes are available to run the training.

    Args:
        network (Cell): The training network.
        optimizer (Cell): Optimizer for updating the weights.
        sens (Number): The scaling number to be filled as the input of backpropagation. Default value is 1.0.

    Outputs:
        Tensor, a scalar Tensor with shape :math:`()`.

    Examples:
        >>> net = Net()
        >>> loss_fn = nn.SoftmaxCrossEntropyWithLogits()
        >>> optim = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
        >>> loss_net = nn.WithLossCell(net, loss_fn)
        >>> train_net = nn.TrainOneStepCell(loss_net, optim)
    """
    def __init__(self, network, optimizer, sens=1.0):
        super(TrainOneStepCell, self).__init__(auto_prefix=False)
        self.network = network
        self.network.add_flags(defer_inline=True)
        self.weights = ParameterTuple(network.trainable_params())
        self.optimizer = optimizer
        self.grad = C.GradOperation(get_by_list=True)
        self.sens = sens

    def construct(self, v, q, a, item, q_len):
        weights = self.weights
        loss = self.network(v, q, a, item, q_len)
        sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
        grads = self.grad(self.network, weights)(v, q, a, item, q_len)
        return F.depend(loss, self.optimizer(grads))

class NLLLoss(_Loss):
    '''
       NLLLoss function
    '''
    def __init__(self, reduction='mean'):
        super(NLLLoss, self).__init__(reduction)
        self.reduce_sum = P.ReduceSum()
        self.log_softmax = P.LogSoftmax(axis=0)

    def construct(self, logits, label):
        nll = -self.log_softmax(logits)
        loss = self.reduce_sum(nll * label / 10, axis=1).mean()
        return self.get_loss(loss)

class WithLossCell(nn.Cell):
    """
    The cell wrapped with NLL loss, for train only
    """
    def __init__(self, backbone):
        super(WithLossCell, self).__init__(auto_prefix=False)
        self._loss_fn = NLLLoss()
        self._backbone = backbone
        self.reduce_sum = P.ReduceSum()

    def construct(self, v, q, a, item, q_len):
        out = self._backbone(v, q, q_len)
        loss = self._loss_fn(out, a)
        return loss

class TrainNetWrapper(nn.Cell):
    def __init__(self, backbone):
        super(TrainNetWrapper, self).__init__(auto_prefix=False)
        self.net = backbone
        
        loss_net = WithLossCell(backbone)
        optimizer = nn.Adam(params=net.trainable_params(), learning_rate=config.initial_lr)
        
        self.loss_train_net = TrainOneStepCell(loss_net, optimizer)

    def construct(self, v, q, a, item, q_len):
        loss = self.loss_train_net(v, q, a, item, q_len)
        accuracy = Tensor(0.35)
        return loss, accuracy

Train now!

In [11]:
from datetime import datetime
name = datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
target_name = os.path.join('logs', '{}.ckpt'.format(name))
print('will save to {}'.format(target_name))
config_as_dict = {k: v for k, v in vars(config).items() if not k.startswith('__')}

train_loader = data.get_loader(train=True)
# val_loader = data.get_loader(val=True)

net = model.Net(train_loader.source.num_tokens)
if config.pretrained:
    param_dict = load_checkpoint(config.pretrained_model_path)
    if param_dict is not None: print("Successfully loaded pretrained model from {}.".format(config.pretrained_model_path))
    load_param_into_net(net, param_dict)

tracker = utils.Tracker()
train_net = TrainNetWrapper(net)
step = 0

for epoch in range(config.epochs):
    # train_loader = data.get_loader(train=True) # not sure if it matters?

    """
    Hand-crafted train wiht `for` loop
    """
    # train_net.set_train()
    # for v, q, a, idx, q_len in train_loader:
    #     train_result = train_net(v, q, a, idx, q_len)
    #     train_loss = train_result[0]
    #     train_acc = train_result[1]
    #     print("T{} step {}: loss = {}, acc = {}".format(epoch, step, train_loss, train_acc))
    #     step += 1
    
    """
    Wrapped train with `tqdm`
    """
    run(train_net, train_loader, tracker, train=True, prefix='train', epoch=epoch)

    # train_loader.reset() # not sure if it matters??
    # break


will save to logs/2021-07-15_13:10:21.ckpt


train E000: 164it [01:20,  2.05it/s, acc=0.3500, loss=3.6668]
train E001: 164it [01:18,  2.08it/s, acc=0.3500, loss=3.0501]
train E002: 138it [01:06,  2.07it/s, acc=0.3500, loss=2.9741]


KeyboardInterrupt: 

In [4]:
def run(net, loader, tracker, train=False, prefix='', epoch=0):
    """ Run an epoch over the given loader """
    if train:
        net.set_train()
        tracker_class, tracker_params = tracker.MovingMeanMonitor, {'momentum': 0.99}
    else:
        net.set_train(False)
        tracker_class, tracker_params = tracker.MeanMonitor, {}

    tq = tqdm(loader, desc='{} E{:03d}'.format(prefix, epoch), ncols=0)
    loss_tracker = tracker.track('{}_loss'.format(prefix), tracker_class(**tracker_params))
    acc_tracker = tracker.track('{}_acc'.format(prefix), tracker_class(**tracker_params))
    for v, q, a, idx, q_len in tq:
        if train:
            loss, acc = net(v, q, a, idx, q_len)
        else:
            print("Evaluating...")
        
        loss_tracker.append(loss.asnumpy())
        acc_tracker.append(acc.asnumpy())
        # acc_tracker.append(acc.mean())
        # for a in acc:
        #     acc_tracker.append(a.item())
        fmt = '{:.4f}'.format
        tq.set_postfix(loss=fmt(loss_tracker.mean.value), acc=fmt(acc_tracker.mean.value))