In [1]:
![ ! -d "fmix" ] && git clone https://github.com/ecs-vlc/fmix
![ ! -d "adai" ] && git clone https://github.com/zeke-xie/adaptive-inertia-adai adai

In [2]:
import os
import random
from multiprocessing import cpu_count

import matplotlib.pyplot as plt
import numpy as np
import PIL
import torch
import torch.nn.functional as F
import wandb
from adai.adai_optim import *
from dataset.affectnet import AffectNetDataset
from dataset.facialexpressions import FacialExpressionsDataset
from dataset.ferplus import FERPlusDataset
from dataset.rafdb import RAFDataset
from fmix_weight import FMix
from imgaug import augmenters as iaa
from mish_cuda import MishCuda
from model.ab import AccuracyBoosterPlusBlock
from model.resnet import custom_resnet18, custom_resnet50, custom_resnet101
from model.se import SqueezeExcitationBlock
from optim.lookahead import Lookahead
from optim.lr_scheduler.FlatCosineAnnealing import FlatCosineAnnealing
from optim.radam import RAdam
from sklearn.metrics import accuracy_score
from torch import nn, optim
from torch.utils.data import (
    ConcatDataset,
    DataLoader,
    Dataset,
    WeightedRandomSampler,
    random_split,
)
from torchvision import models, transforms
from tqdm.auto import tqdm, trange
from trainer import Trainer

In [3]:
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

torch.multiprocessing.set_sharing_strategy("file_system")
torch.set_deterministic(True)
# torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

In [4]:
torch.__version__

'1.7.1'

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [6]:
PROJECT_NAME = "facial-expressions-essay-v4"

WORKERS = cpu_count()

BATCH_TARGET = 256
BATCH_SIZE = 64
GRAD_ACC = BATCH_TARGET // BATCH_SIZE

MAX_STEP = 60_000
INFERENCE_BATCH_SIZE = BATCH_SIZE // 4

In [7]:
resize_transform = transforms.Resize(256)
centercrop_transform = transforms.CenterCrop(224)
tensor_transform = transforms.ToTensor()
normalize_transform = transforms.Normalize(
    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
)

train_transform = transforms.Compose(
    [
        resize_transform,
        centercrop_transform,
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(
            brightness=0.25, contrast=0.25, hue=0.05, saturation=0.05
        ),
        np.asarray,
        iaa.Sequential(
            [
                iaa.Affine(rotate=(-15, 15), shear=(-15, 15), mode="symmetric"),
            ]
        ).augment_image,
        tensor_transform,
        normalize_transform,
    ]
)

predict_transform = transforms.Compose(
    [resize_transform, centercrop_transform, tensor_transform, normalize_transform]
)

In [8]:
trainset = ConcatDataset(
    [
        AffectNetDataset("../dataset/AffectNet", "train", transform=train_transform),
        FacialExpressionsDataset(
            "../dataset/facial_expressions", transform=train_transform
        ),
        FERPlusDataset("../dataset/FERPlus", "train", transform=train_transform),
        RAFDataset("../dataset/RAF-DB", "train", transform=train_transform),
    ]
)
len(trainset)

342497

In [9]:
classes = [
    "neutral",
    "happy",
    "surprise",
    "sad",
    "anger",
    "disgust",
    "fear",
    "contempt",
]
classes

['neutral', 'happy', 'surprise', 'sad', 'anger', 'disgust', 'fear', 'contempt']

In [10]:
target = [label for innerset in trainset.datasets for label in innerset.get_labels()]
class_sample_count = np.unique(target, return_counts=True)[1]
class_sample_count

array([ 94645, 152606,  19305,  31289,  28417,   4919,   7389,   3927])

In [11]:
weight = [1 - (x / sum(class_sample_count)) for x in class_sample_count]

In [12]:
weight = 1.0 / class_sample_count
# samples_weight = weight[target]
# samples_weight = torch.from_numpy(samples_weight)
# sampler = WeightedRandomSampler(samples_weight, len(samples_weight))

In [13]:
tensor_weight = torch.as_tensor(weight, dtype=torch.float, device=device)
tensor_weight

tensor([1.0566e-05, 6.5528e-06, 5.1800e-05, 3.1960e-05, 3.5190e-05, 2.0329e-04,
        1.3534e-04, 2.5465e-04], device='cuda:0')

In [14]:
trainloader = DataLoader(
    trainset,
    batch_size=BATCH_SIZE,
    num_workers=WORKERS,
    drop_last=True,
    pin_memory=True,
    shuffle=True,
)

In [15]:
# def show_dataset(dataset, n=5):
#     pil_transform = transforms.ToPILImage()
#     img = np.vstack(
#         [
#             np.hstack([pil_transform(dataset[i][0]) for _ in range(5)])
#             for i in [random.randint(0, len(dataset)) for _ in range(n)]
#         ]
#     )
#     plt.imshow(img)


# show_dataset(trainset)

In [16]:
affectnet_valset = AffectNetDataset(
    "../dataset/AffectNet", "val", transform=predict_transform
)
ferplus_valset = FERPlusDataset(
    "../dataset/FERPlus", "val", transform=predict_transform
)
ferplus_testset = FERPlusDataset(
    "../dataset/FERPlus", "test", transform=predict_transform
)
raf_testset = RAFDataset("../dataset/RAF-DB", "test", transform=predict_transform)

In [17]:
valtestset = ConcatDataset(
    [
        affectnet_valset,
        ferplus_valset,
        ferplus_testset,
        raf_testset,
    ]
)
valsize = len(valtestset) // 2
valset, testset = random_split(valtestset, [valsize, len(valtestset) - valsize])

valloader = DataLoader(
    valset,
    batch_size=INFERENCE_BATCH_SIZE,
    shuffle=False,
    num_workers=WORKERS,
    pin_memory=True,
)
testloader = DataLoader(
    testset,
    batch_size=INFERENCE_BATCH_SIZE,
    shuffle=False,
    num_workers=WORKERS,
    pin_memory=True,
)
len(valset), len(testset)

(7084, 7084)

In [18]:
affectnet_valloader = DataLoader(
    affectnet_valset,
    batch_size=INFERENCE_BATCH_SIZE,
    shuffle=False,
    num_workers=WORKERS,
    pin_memory=True,
)
ferplus_valloader = DataLoader(
    ferplus_valset,
    batch_size=INFERENCE_BATCH_SIZE,
    shuffle=False,
    num_workers=WORKERS,
    pin_memory=True,
)
ferplus_testloader = DataLoader(
    ferplus_testset,
    batch_size=INFERENCE_BATCH_SIZE,
    shuffle=False,
    num_workers=WORKERS,
    pin_memory=True,
)
raf_testloader = DataLoader(
    raf_testset,
    batch_size=INFERENCE_BATCH_SIZE,
    shuffle=False,
    num_workers=WORKERS,
    pin_memory=True,
)

In [19]:
# model = models.resnet18(pretrained=True)
# model.fc = nn.Linear(model.fc.in_features, len(classes))

In [20]:
# torch.hub.list('zhanghang1989/ResNeSt', force_reload=False)
# model = torch.hub.load('zhanghang1989/ResNeSt', 'resnest50', pretrained=True)
# model.fc = nn.Linear(model.fc.in_features, len(classes))

In [21]:
model = custom_resnet50(
    activation_layer=MishCuda(),
    #     network_type="pyramid",
    output_block={
        "class": AccuracyBoosterPlusBlock,
        "params": {},
    },
    #     zero_init_residual=True,
    #     dropblock={"drop_prob": 0.1, "max_steps": MAX_STEP},
    num_classes=len(classes),
    without_skip=True,
)

In [22]:
# criterion = nn.CrossEntropyLoss()
# fmix = FMix(size=(224, 224))
criterion = nn.CrossEntropyLoss(weight=tensor_weight)
fmix = FMix(size=(224, 224), weight=tensor_weight)


def transform_func(X, y):
    return fmix(X), y

In [23]:
[i * 10_000 for i in range(1, (MAX_STEP // 10_000) + 1)]

[10000, 20000, 30000, 40000, 50000, 60000]

In [24]:
optimizer = RAdam(model.parameters(), lr=1e-3, diffgrad=False)
optimizer = Lookahead(optimizer)
scheduler = FlatCosineAnnealing(optimizer, MAX_STEP, step_size=0.5)

# optimizer = Adai(model.parameters(), lr=1e-2)
# scheduler = FlatCosineAnnealing(optimizer, MAX_STEP, step_size=0.5)

# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# scheduler = optim.lr_scheduler.MultiStepLR(
#     optimizer,
#     milestones=[i * 10_000 for i in range(1, (MAX_STEP // 10_000) + 1)],
#     gamma=0.75,
# )

trainer = Trainer(
    PROJECT_NAME,
    model,
    device,
    trainloader,
    classes,
    criterion=fmix.loss,
    valloaders=[
        ("mixed", valloader),
        ("affectnet", affectnet_valloader),
        ("ferplus", ferplus_valloader),
    ],
    optimizer=optimizer,
    scheduler=scheduler,
    gradient_accumulation=GRAD_ACC,
    lr_find=False,
    max_step=MAX_STEP,
    transform_func=transform_func,
    val_criterion=nn.CrossEntropyLoss(),
)

RAdam optimizer loaded. 
Gradient Centralization usage = True 
Diffgrad usage = False
Adabelief usage = False
GC applied to both conv and fc layers


[34m[1mwandb[0m: Currently logged in as: [33myusufrahadika[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.10.14 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Output path: output/glamorous-star-139


In [None]:
trainer.train()

Step:   0%|          | 0/5351 [00:00<?, ?it/s]



before
                       types |   # objects |   total size
                         str |      161485 |     28.86 MB
                        dict |       54466 |     22.00 MB
                         int |      371403 |      9.96 MB
                        code |       52649 |      7.27 MB
                        list |       11916 |      7.20 MB
                        type |        8119 |      7.17 MB
                       tuple |       50857 |      3.85 MB
               numpy.ndarray |         167 |      2.74 MB
                         set |        2508 |      1.18 MB
                     weakref |       10988 |    944.28 KB
     collections.OrderedDict |        2305 |    915.52 KB
                 numpy.int64 |       28385 |    887.03 KB
                 abc.ABCMeta |         636 |    656.74 KB
           getset_descriptor |        8367 |    653.67 KB
  builtin_function_or_method |        7781 |    607.89 KB


Step:   0%|          | 0/5351 [00:00<?, ?it/s]



before
                       types |   # objects |   total size
                         str |      193048 |     31.14 MB
                        dict |       54810 |     22.23 MB
                        list |       41845 |     20.18 MB
                         int |      720510 |     19.28 MB
                        code |       52649 |      7.27 MB
                        type |        8133 |      7.17 MB
                       tuple |       50948 |      3.86 MB
               numpy.ndarray |         167 |      2.74 MB
                         set |        2508 |      1.18 MB
                     weakref |       11047 |    949.35 KB
     collections.OrderedDict |        2305 |    915.52 KB
                 numpy.int64 |       28385 |    887.03 KB
                 abc.ABCMeta |         636 |    656.74 KB
           getset_descriptor |        8367 |    653.67 KB
  builtin_function_or_method |        7877 |    615.39 KB


Step:   0%|          | 0/5351 [00:00<?, ?it/s]



before
                       types |   # objects |   total size
                        list |       71773 |     36.28 MB
                         str |      222923 |     33.27 MB
                         int |     1069700 |     28.61 MB
                        dict |       55234 |     22.34 MB
                        code |       52649 |      7.27 MB
                        type |        8133 |      7.17 MB
                       tuple |       51035 |      3.86 MB
               numpy.ndarray |         167 |      2.74 MB
                         set |        2508 |      1.18 MB
                     weakref |       11091 |    953.13 KB
     collections.OrderedDict |        2305 |    915.52 KB
                 numpy.int64 |       28385 |    887.03 KB
                 abc.ABCMeta |         636 |    656.74 KB
           getset_descriptor |        8367 |    653.67 KB
  builtin_function_or_method |        7971 |    622.73 KB


Step:   0%|          | 0/5351 [00:00<?, ?it/s]



before
                       types |   # objects |   total size
                        list |      101703 |     55.16 MB
                         int |     1418819 |     37.93 MB
                         str |      252798 |     35.41 MB
                        dict |       55587 |     22.43 MB
                        code |       52649 |      7.27 MB
                        type |        8133 |      7.17 MB
                       tuple |       51121 |      3.87 MB
               numpy.ndarray |         167 |      2.74 MB
                         set |        2508 |      1.18 MB
                     weakref |       11135 |    956.91 KB
     collections.OrderedDict |        2305 |    915.52 KB
                 numpy.int64 |       28385 |    887.03 KB
                 abc.ABCMeta |         636 |    656.74 KB
           getset_descriptor |        8367 |    653.67 KB
  builtin_function_or_method |        8065 |    630.08 KB


Step:   0%|          | 0/5351 [00:00<?, ?it/s]



before
                       types |   # objects |   total size
                        list |      131635 |     77.58 MB
                         int |     1767907 |     47.25 MB
                         str |      282673 |     37.54 MB
                        dict |       55909 |     22.51 MB
                        code |       52649 |      7.27 MB
                        type |        8133 |      7.17 MB
                       tuple |       51208 |      3.88 MB
               numpy.ndarray |         167 |      2.74 MB
                         set |        2508 |      1.18 MB
                     weakref |       11179 |    960.70 KB
     collections.OrderedDict |        2305 |    915.52 KB
                 numpy.int64 |       28385 |    887.03 KB
                 abc.ABCMeta |         636 |    656.74 KB
           getset_descriptor |        8367 |    653.67 KB
  builtin_function_or_method |        8159 |    637.42 KB


Step:   0%|          | 0/5351 [00:00<?, ?it/s]



before
                       types |   # objects |   total size
                        list |      161569 |    104.46 MB
                         int |     2117095 |     56.58 MB
                         str |      312548 |     39.67 MB
                        dict |       56328 |     22.60 MB
                        code |       52649 |      7.27 MB
                        type |        8133 |      7.17 MB
                       tuple |       51295 |      3.88 MB
               numpy.ndarray |         167 |      2.74 MB
                         set |        2508 |      1.18 MB
                     weakref |       11223 |    964.48 KB
     collections.OrderedDict |        2305 |    915.52 KB
                 numpy.int64 |       28385 |    887.03 KB
                 abc.ABCMeta |         636 |    656.74 KB
           getset_descriptor |        8367 |    653.67 KB
  builtin_function_or_method |        8253 |    644.77 KB


Step:   0%|          | 0/5351 [00:00<?, ?it/s]



before
                       types |   # objects |   total size
                        list |      191505 |    134.00 MB
                         int |     2466211 |     65.90 MB
                         str |      342423 |     41.81 MB
                        dict |       56676 |     22.69 MB
                        code |       52649 |      7.27 MB
                        type |        8133 |      7.17 MB
                       tuple |       51381 |      3.89 MB
               numpy.ndarray |         167 |      2.74 MB
                         set |        2508 |      1.18 MB
                     weakref |       11267 |    968.26 KB
     collections.OrderedDict |        2305 |    915.52 KB
                 numpy.int64 |       28385 |    887.03 KB
                 abc.ABCMeta |         636 |    656.74 KB
           getset_descriptor |        8367 |    653.67 KB
  builtin_function_or_method |        8347 |    652.11 KB


Step:   0%|          | 0/5351 [00:00<?, ?it/s]

In [None]:
def predict(model, dataloader, device):
    model.eval()
    y_pred = []
    for i, (inputs, _) in enumerate(dataloader):
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        y_pred.append(predicted)

    return torch.cat(y_pred).cpu().numpy()

In [None]:
RUN_NAME = wandb.run.name
wandb.init(name=RUN_NAME, project=f"{PROJECT_NAME}_test", reinit=True)

In [None]:
y_test_actual = np.asarray([target for _, targets in testloader for target in targets])
y_test_pred = predict(model, testloader, device)
test_acc = accuracy_score(y_test_actual, y_test_pred)

In [None]:
"Test accuracy:", test_acc

In [None]:
ferplus_y_test_actual = np.asarray(
    [target for _, targets in ferplus_testloader for target in targets]
)
ferplus_y_test_pred = predict(model, ferplus_testloader, device)
ferplus_test_acc = accuracy_score(ferplus_y_test_actual, ferplus_y_test_pred)

In [None]:
"FERPlus Test accuracy:", ferplus_test_acc

In [None]:
raf_y_test_actual = np.asarray(
    [target for _, targets in raf_testloader for target in targets]
)
raf_y_test_pred = predict(model, raf_testloader, device)
raf_test_acc = accuracy_score(raf_y_test_actual, raf_y_test_pred)

In [None]:
"RAF-DB Test accuracy:", raf_test_acc

In [None]:
wandb.log(
    {
        "mixed_test_acc": test_acc,
        "ferplus_test_acc": ferplus_test_acc,
        "raf_test_acc": raf_test_acc,
    }
)