In [1]:
%load_ext autoreload
%autoreload 2

%reset

In [2]:
# define training settings
NUM_EPOCHS = 200
BATCH_SIZE = 16

In [3]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
from doren_bnn.mobilenet import MobileNet, NetType
from torchinfo import summary

NETTYPE = NetType.XNORPP
model = MobileNet(3, num_classes=10, nettype=NETTYPE).to(device)

summary(model, input_size=(BATCH_SIZE, 3, 32, 32))

Layer (type:depth-idx)                        Output Shape              Param #
MobileNet                                     [16, 10]                  --
├─Upsample: 1-1                               [16, 3, 224, 224]         --
├─Sequential: 1-2                             [16, 1024, 1, 1]          --
│    └─MobileNet_ConvBlock: 2-1               [16, 32, 112, 112]        --
│    │    └─Sequential: 3-1                   [16, 32, 112, 112]        902
│    └─MobileNet_ConvDsBlock: 2-2             [16, 64, 112, 112]        --
│    │    └─Sequential: 3-2                   [16, 64, 112, 112]        2,560
│    └─MobileNet_ConvDsBlock: 2-3             [16, 128, 56, 56]         --
│    │    └─Sequential: 3-3                   [16, 128, 56, 56]         9,216
│    └─MobileNet_ConvDsBlock: 2-4             [16, 128, 56, 56]         --
│    │    └─Sequential: 3-4                   [16, 128, 56, 56]         18,304
│    └─MobileNet_ConvDsBlock: 2-5             [16, 256, 28, 28]         --
│    │   

In [5]:
from torch.nn import CrossEntropyLoss
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

criterion = CrossEntropyLoss().to(device)
optimizer = AdamW(model.parameters(), lr=1e-3, weight_decay=5e-6)
scheduler = CosineAnnealingWarmRestarts(optimizer, 25, eta_min=1e-4)

In [6]:
# hyperparameters
ALPHA = 0
LAMBDA = 0

In [7]:
from doren_bnn.utils import Dataset, Experiment

EXPERIMENT_ID = f"mobilenet-xnorpp"
experiment = Experiment(EXPERIMENT_ID, Dataset.CIFAR10, BATCH_SIZE)

Files already downloaded and verified
Files already downloaded and verified


In [8]:
experiment.train(
    device,
    model,
    criterion,
    optimizer,
    scheduler,
    NUM_EPOCHS,
    resume=True,
    alpha=ALPHA,
    lamb=LAMBDA,
)

  2%|2         | 5/200 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
# experiment.test(device, model)