# **Cut Mix**

In [None]:
def rand_bbox(W, H, lam):
    cut_rat = torch.sqrt(1.0 - lam)
    cut_w = (W * cut_rat).type(torch.long)
    cut_h = (H * cut_rat).type(torch.long)
    # uniform
    cx = torch.randint(W, (1,)).to(device)
    cy = torch.randint(H, (1,)).to(device)
    x1 = torch.clamp(cx - cut_w // 2, 0, W)
    y1 = torch.clamp(cy - cut_h // 2, 0, H)
    x2 = torch.clamp(cx + cut_w // 2, 0, W)
    y2 = torch.clamp(cy + cut_h // 2, 0, H)
    return x1, y1, x2, y2


def cutmix_data(x, y, alpha=1.0, p=0.5):
    if np.random.random() > p:
        return x, y, torch.zeros_like(y), 1.0
    W, H = x.size(2), x.size(3)
    shuffle = torch.randperm(x.size(0)).to(device)
    cutmix_x = x

    lam = torch.distributions.beta.Beta(alpha, alpha).sample().to(device)

    x1, y1, x2, y2 = rand_bbox(W, H, lam)
    cutmix_x[:, :, x1:x2, y1:y2] = x[shuffle, :, x1:x2, y1:y2]
    # Adjust lambda to match pixel ratio
    lam = 1 - ((x2 - x1) * (y2 - y1) / float(W * H)).item()
    y_a, y_b = y, y[shuffle]
    return cutmix_x, y_a, y_b, lam

# **Label Smooth Loss**

In [None]:
def loss_fn(outputs, targets):
    if len(targets.shape) == 1:
        return F.cross_entropy(outputs, targets)
    else:
        return torch.mean(torch.sum(-targets * F.log_softmax(outputs, dim=1), dim=1))

def label_smooth_loss_fn(outputs, targets, epsilon=0.1):
    onehot = F.one_hot(targets, 1000).float().to(device)
    targets = (1 - epsilon) * onehot + torch.ones(onehot.shape).to(device) * epsilon / 1000
    return loss_fn(outputs, targets)

# **Train**

In [None]:
scaler = torch.cuda.amp.GradScaler()
optimizer = optim.Adam(Model.parameters(), lr=LEARNING_RATE)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)
loss=label_smooth_loss_fn

Model.train()

Model.to(device)

for epoch in range(EPOCHS) :
  loss_val_sum=0
  for imgs, labels in train_loader:
    # Cut mix P=0.5
    imgs, labels = imgs.to(device), labels.to(device)
    imgs, labels_a, labels_b, lam = cutmix_data(imgs, labels)

    # optimizer.zero_grad()
    for param in Model.parameters():
      param.grad = None
    model_pred=Model(imgs)

    # Label Smoothing + Cutmix
    loss_out = lam * loss(model_pred, labels_a) + (1 - lam) * loss(model_pred, labels_b)

    scaler.scale(loss_out).backward()
    scaler.step(optimizer)
    scaler.update()

    loss_val_sum+=loss_out
    visual_loss_sum+=normal_loss_out

  loss_val_avg=loss_val_sum/len(train_loader)

  scheduler.step(loss_val_avg)