In [25]:
%reset

In [26]:
# define training settings
NUM_EPOCHS = 400
BATCH_SIZE = 128

In [27]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [28]:
from doren_bnn.mobilenet import MobileNet, NetType
from torchinfo import summary

NETTYPE = NetType.XNORPP_SCA
model = MobileNet(3, num_classes=10, nettype=NETTYPE).to(device)

summary(model, input_size=(BATCH_SIZE, 3, 32, 32))

Layer (type:depth-idx)                             Output Shape              Param #
MobileNet                                          [128, 10]                 --
├─Sequential: 1-1                                  [128, 1024, 1, 1]         --
│    └─MobileNet_ConvBlock: 2-1                    [128, 32, 16, 16]         --
│    │    └─Sequential: 3-1                        [128, 32, 16, 16]         902
│    └─MobileNet_ConvDsBlock: 2-2                  [128, 64, 16, 16]         --
│    │    └─Sequential: 3-2                        [128, 64, 16, 16]         2,560
│    └─MobileNet_ConvDsBlock: 2-3                  [128, 128, 8, 8]          --
│    │    └─Sequential: 3-3                        [128, 128, 8, 8]          9,216
│    └─MobileNet_ConvDsBlock: 2-4                  [128, 128, 8, 8]          --
│    │    └─Sequential: 3-4                        [128, 128, 8, 8]          18,304
│    └─MobileNet_ConvDsBlock: 2-5                  [128, 256, 4, 4]          --
│    │    └─Sequential: 

In [29]:
from torch.nn import CrossEntropyLoss
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

criterion = CrossEntropyLoss().to(device)
optimizer = AdamW(model.parameters(), lr=1e-3, weight_decay=5e-6)
scheduler = CosineAnnealingWarmRestarts(optimizer, 25, eta_min=1e-3)

In [30]:
# hyperparameters
ALPHA = 1e-2
LAMBDA = 1e-3

In [31]:
from doren_bnn.utils import Dataset, Experiment

EXPERIMENT_ID = f"mobilenet-xnorpp-sca-{ALPHA}-{LAMBDA}"
experiment = Experiment(EXPERIMENT_ID, Dataset.CIFAR10, BATCH_SIZE)

Files already downloaded and verified
Files already downloaded and verified


In [32]:
experiment.train(
    device,
    model,
    criterion,
    optimizer,
    scheduler,
    NUM_EPOCHS,
    resume=False,
    alpha=ALPHA,
    lamb=LAMBDA,
)

  0%|          | 0/400 [00:00<?, ?it/s]

In [33]:
experiment.test(device, model)

tensor([[-0.0346,  0.0354, -0.0795,  0.1035, -0.1381, -0.1777, -0.2592, -0.0901,
         -0.0826,  0.0023],
        [-0.0346,  0.0354, -0.0795,  0.1035, -0.1381, -0.1777, -0.2592, -0.0901,
         -0.0826,  0.0023]], device='cuda:0', grad_fn=<SliceBackward0>)
