# train (Mindspore implementation)

In [30]:
import sys
import os.path
import mindspore
from mindspore import Tensor, nn, Model, context
from mindspore import load_checkpoint, load_param_into_net
from mindspore import ops as P
from mindspore.train.callback import LossMonitor, CheckpointConfig, ModelCheckpoint, TimeMonitor
from tqdm import tqdm
import config
import data
import model
import utils
import mindspore.context as context

context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU')

In [7]:
!CUDA_VISIBLE_DEVICES=2

More definitions for training:

In [34]:
def update_learning_rate(optimizer, iteration):
    lr = config.initial_lr * 0.5**(float(iteration) / config.lr_halflife)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


total_iterations = 0

class WithLossCell(nn.Cell):
    """
    The cell wrapped with NLL loss, for train only
    """
    def __init__(self, backbone):
        super(WithLossCell, self).__init__(auto_prefix=False)
        self._backbone = backbone
        self.log_softmax = P.LogSoftmax(axis=0)
        self.reduce_sum = P.ReduceSum()

    def construct(self, v, q, a, item, q_len):
        out = self._backbone(v, q, q_len)
        nll = -self.log_softmax(out)
        loss = self.reduce_sum(nll * a / 10, axis=1).mean()
        return loss

test:

In [36]:
train_loader = data.get_loader(train=True)
net = model.Net(train_loader.source.num_tokens)
net = WithLossCell(net)
for v, q, a, idx, q_len in train_loader:
    out = net(v, q, a, idx, q_len)
    print(out)
    break

4.067258


### Train Now!

In [39]:
from datetime import datetime
name = datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
target_name = os.path.join('logs', '{}.ckpt'.format(name))
print('will save to {}'.format(target_name))
config_as_dict = {k: v for k, v in vars(config).items() if not k.startswith('__')}

train_loader = data.get_loader(train=True)
val_loader = data.get_loader(val=True)

net = model.Net(train_loader.source.num_tokens)
if config.pretrained:
    param_dict = load_checkpoint(config.pretrained_model_path)
    if param_dict is not None: print("Successfully loaded pretrained model from {}.".format(config.pretrained_model_path))
    load_param_into_net(net, param_dict)

net = WithLossCell(net) # self defined WithLossCell

# loss = P.NLLLoss(reduction="mean")
# net = nn.WithLossCell(net, loss) # nn.WithLossCell

optimizer = nn.Adam(params=net.trainable_params(), learning_rate=config.initial_lr)
net = Model(net, optimizer=optimizer) # wrap in model

loss_cb = LossMonitor()
config_ck = CheckpointConfig(save_checkpoint_steps=1, keep_checkpoint_max=1)
ckpoint_cb = ModelCheckpoint(prefix="test", directory='logs', config=config_ck)
time_cb = TimeMonitor()
callbacks = [time_cb, ckpoint_cb, loss_cb]

net.train(epoch=config.epochs, train_dataset=train_loader)

# for i in range(config.epochs):
    # _ = run(net, train_loader, optimizer, tracker, train=True, prefix='train', epoch=i)
    # r = run(net, val_loader, optimizer, tracker, train=False, prefix='val', epoch=i)
    # mindspore.save_checkpoint(net, target_name)

#     results = {
#         'name': name,
#         'tracker': tracker.to_dict(),
#         'config': config_as_dict,
#         'weights': net.state_dict(),
#         'eval': {
#             'answers': r[0],
#             'accuracies': r[1],
#             'idx': r[2],
#         },
#         'vocab': train_loader.dataset.vocab,
#     }
    


will save to logs/2021-07-14_23:28:39.ckpt


RuntimeError: mindspore/ccsrc/backend/session/kernel_build_client.h:97 Request] Try to send request before Open()

# 