# Visualization and debugging for MindSpore
## Target
- Be familiared with basic functions of MindInsight.
- Based on implementation of Lenet model, learn how to utilize MindInsight to debug training problems.

## Setup
- MindSpore version: 0.5.0
- MindInsight version: 0.7.0
- System: Linux Ubuntu 18.04
- Hardware: Ascend, GPU or CPU
- More information, please refer to:
https://www.mindspore.cn/install/en

## Experiment
- Model: Lenet5 (Please refer to https://gitee.com/mindspore/mindspore/tree/r0.5/model_zoo/lenet)
- Dataset: MNIST (Hand written digits, Download: http://yann.lecun.com/exdb/mnist/)

### 1. Dataset preprocessing

In [None]:
import mindspore.dataset as ds
import mindspore.dataset.transforms.vision.c_transforms as CV
import mindspore.dataset.transforms.c_transforms as C
from mindspore.dataset.transforms.vision import Inter
from mindspore.common import dtype as mstype

In [None]:
def create_dataset(data_path, batch_size=32, repeat_size=1,
                   num_parallel_workers=1):
    """
    create dataset for train or test
    """
    # define dataset
    mnist_ds = ds.MnistDataset(data_path)

    resize_height, resize_width = 32, 32
    rescale = 1.0 / 255.0
    shift = 0.0
    rescale_nml = 1 / 0.3081
    shift_nml = -1 * 0.1307 / 0.3081

    # define map operations
    resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)  # Bilinear mode
    rescale_op = CV.Rescale(rescale, shift)
    hwc2chw_op = CV.HWC2CHW()
    rotation_op = CV.RandomRotation(degrees=200) # modify it to 10 can recover training
    type_cast_op = C.TypeCast(mstype.int32)

    # apply map operations on images
    mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(input_columns="image", operations=rotation_op, num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)

    # apply DatasetOps
    buffer_size = 10000
    mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size)  # 10000 as in LeNet train script
    mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
    mnist_ds = mnist_ds.repeat(repeat_size)

    return mnist_ds

### 2. Model Structure

In [None]:
"""LeNet."""
import mindspore.nn as nn
from mindspore.common.initializer import TruncatedNormal

def conv(in_channels, out_channels, kernel_size, stride=1, padding=0):
    """weight initial for conv layer"""
    weight = weight_variable()
    return nn.Conv2d(in_channels, out_channels,
                     kernel_size=kernel_size, stride=stride, padding=padding,
                     weight_init=weight, has_bias=False, pad_mode="valid")


def fc_with_initialize(input_channels, out_channels):
    """weight initial for fc layer"""
    weight = weight_variable()
    bias = weight_variable()
    return nn.Dense(input_channels, out_channels, weight, bias)


def weight_variable():
    """weight initial"""
    return TruncatedNormal(0.02)


"""

In the next model structure, you can add the line just after first conv layer in construct method to
recover the training process.
>>> x = self.relu(x)

"""

class LeNet5(nn.Cell):
    """
    Lenet network

    Args:
        num_class (int): Num classes. Default: 10.
        channel (int): Num channels. Default: 1.

    Returns:
        Tensor, output tensor
    Examples:
        >>> LeNet(num_class=10, channel=1)

    """
    def __init__(self, num_class=10, channel=1):
        super(LeNet5, self).__init__()
        self.num_class = num_class
        self.conv1 = conv(channel, 6, 5)
        self.conv2 = conv(6, 16, 5)
        self.fc1 = fc_with_initialize(16 * 5 * 5, 120)
        self.fc2 = fc_with_initialize(120, 84)
        self.fc3 = fc_with_initialize(84, self.num_class)
        self.relu = nn.ReLU()
        self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()

    def construct(self, x):
        x = self.conv1(x)
        
        x = self.max_pool2d(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.max_pool2d(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

### 3. Training hyperparameter configuration

In [None]:
from easydict import EasyDict as edict

cfg = edict({
    'num_classes': 10,
    'lr': 0.001, # modify it to 0.01 to recover the training process
    'momentum': 0.9,
    'epoch_size': 3,
    'batch_size': 32,
    'buffer_size': 1000,
    'image_height': 32,
    'image_width': 32,
    'save_checkpoint_steps': 1875,
    'keep_checkpoint_max': 10,
})

### 4. Training module initialization

In [None]:
import os
from mindspore import context
from mindspore.train.callback import LossMonitor, TimeMonitor, SummaryCollector
context.set_context(mode=context.GRAPH_MODE, device_target="CPU") # modify it according to your own device
                                                                  # such as ("Ascend", "GPU" or "CPU")

data_path = "MNIST" # modify according to your path where the MNIST dataset is saved

ds_train = create_dataset(os.path.join(data_path, "train"),
                              cfg.batch_size,
                              cfg.epoch_size)

network = LeNet5(cfg.num_classes)
net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
# The main object collecting log for visualization
summary_collector = SummaryCollector(summary_dir='./summary_dir', collect_freq=1)

### 5. Launch training

In [None]:
from mindspore.train import Model
from mindspore.nn.metrics import Accuracy

model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
model.train(cfg['epoch_size'], ds_train, callbacks=[time_cb, 
                                                    LossMonitor(per_print_times=ds_train.get_dataset_size()), 
                                                    summary_collector],
                dataset_sink_mode=False)

### 6. Model evaluation

In [None]:
ds_eval = create_dataset(os.path.join(data_path, "test"),
                             cfg.batch_size,
                             1)
acc = model.eval(ds_eval, dataset_sink_mode=False)
print('Metrics: ', acc)