In [78]:
# install pytorch lithening
!pip install pytorch-lightning --quiet
!pip install wandb --quiet -Uq

In [79]:
import pytorch_lightning as pl
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader,random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import CIFAR10
from pytorch_lightning.loggers import WandbLogger
import wandb


In [80]:
# create one class to deal with data
class CifarDataModule(pl.LightningDataModule):
  def __init__(self, batch_size, data_dir="./"):
    super().__init__()
    self.data_dir=data_dir
    self.batch_size=batch_size
    self.transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
    self.num_classes=10

  def prepare_data(self):
    CIFAR10(self.data_dir,train=True,download=True)
    CIFAR10(self.data_dir,train=False,download=True)

  def setup(self, stage=None):
    if stage=='fit' or stage is None:
      cifar_full=CIFAR10(self.data_dir,train=True,transform=self.transform)
      self.cifar_train,self.cifar_val=random_split(cifar_full,[45000,5000])

    if stage=='test' or stage is None:
      self.cifar_test=CIFAR10(self.data_dir,train=False,transform=self.transform)

  def train_dataloader(self):
    return DataLoader(self.cifar_train,batch_size=self.batch_size,shuffle=True,num_workers=2)

  def val_dataloader(self):
    return DataLoader(self.cifar_val,batch_size=self.batch_size,shuffle=False,num_workers=2)

  def test_dataloader(self):
    return DataLoader(self.cifar_test,batch_size=self.batch_size,shuffle=False,num_workers=2)




In [81]:
### HYPEROPT / optional task for the student
# extend WandB.ai integration in the code with sweeps
# (e.g. add variables like learning rate, optimizer, neurons_FC1, neurons_FC2)

class CIFAR10LitModel(pl.LightningModule):
    def __init__(self, config, input_shape, num_classes):
      super().__init__()

      self.learning_rate=config.learning_rate
      self.optimizer=config.optimizer
      self.FC1_num=config.neurons_FC1
      self.FC2_num=config.neurons_FC2

      # model architecture
      self.conv1=nn.Conv2d(3,32,3,1)
      self.conv2=nn.Conv2d(32,32,3,1)
      self.conv3=nn.Conv2d(32,64,3,1)
      self.conv4=nn.Conv2d(64,64,3,1)
      self.pool1=nn.MaxPool2d(2)
      self.pool2=nn.MaxPool2d(2)

      n_sizes = self._get_output_shape(input_shape)
      self.fc1=nn.Linear(n_sizes,self.FC1_num)
      self.fc2=nn.Linear(self.FC1_num,self.FC2_num)
      self.fc3=nn.Linear(self.FC2_num,num_classes)

      self.train_acc=Accuracy(task='multiclass',num_classes=10)
      self.val_acc=Accuracy(task='multiclass',num_classes=10)
      self.test_acc=Accuracy(task='multiclass',num_classes=10)


    def _get_output_shape(self, shape):
          '''returns the size of the output tensor from the conv layers'''
          batch_size = 1
          input = torch.autograd.Variable(torch.rand(batch_size, *shape))
          output_feat = self._feature_extractor(input)
          n_size = output_feat.data.view(batch_size, -1).size(1)
          return n_size


  # conv1,relu, conv2,relu, maxpool,conv3,relu,conv4,relu,maxpool
    def _feature_extractor(self,x):
      x=F.relu(self.conv1(x))
      x=self.pool1(F.relu(self.conv2(x)))
      x=F.relu(self.conv3(x))
      x=self.pool2(F.relu(self.conv4(x)))
      return x


    def forward(self,x):
      x=self._feature_extractor(x)
      x=x.view(x.size(0),-1)
      x=F.relu(self.fc1(x))
      x=F.relu(self.fc2(x))
      x=F.log_softmax(self.fc3(x),dim=1)
      return x

    def training_step(self, batch, batch_idx):
      x, y = batch
      logits = self(x)
      loss = F.nll_loss(logits, y)
      # metric
      preds = torch.argmax(logits, dim=1)
      acc = self.train_acc(preds, y)
      self.log('train_loss', loss, on_step=True, on_epoch=True, logger=True)
      self.log('train_acc', acc, on_step=True, on_epoch=True, logger=True)
      return loss

    # validation loop
    def validation_step(self, batch, batch_idx):
      x, y = batch
      logits = self(x)
      loss = F.nll_loss(logits, y)
      preds = torch.argmax(logits, dim=1)
      acc = self.val_acc(preds, y)
      self.log('val_loss', loss, prog_bar=True)
      self.log('val_acc', acc, prog_bar=True)
      return loss

    # test loop
    def test_step(self,batch,batch_idx):
      x,y=batch
      logits=self(x)
      loss=F.nll_loss(logits,y)

      pred=torch.argmax(logits,dim=1)
      acc=self.test_acc(pred,y)
      self.log('test_loss',loss,on_epoch=True)
      self.log('test_acc',acc,on_epoch=True)
      return loss

    def configure_optimizers(self):
      if self.optimizer == "sgd":
        optimizer = torch.optim.SGD(self.parameters(), self.learning_rate, momentum=0.9)
      elif self.optimizer == "adam":
        optimizer = torch.optim.Adam(self.parameters(), self.learning_rate)
      elif self.optimizer == "adamw":
        optimizer = torch.optim.AdamW(self.parameters(), self.learning_rate)
      else:
        raise ValueError(f"Unsupported optimizer choice: {self.optimizer_choice}")

      return optimizer



In [82]:
cifar = CifarDataModule(batch_size=32)
cifar.prepare_data()
cifar.setup()
# grab samples to log predictions on
samples = next(iter(cifar.val_dataloader()))

Files already downloaded and verified
Files already downloaded and verified


In [83]:
### WandB, you have have an account(if you don't, create one)

wandb.login(key='2b5311d734bce13a2088a6de0e16c9097b8e5102')
sweep_config = {
'method': 'random',
'name': 'sweep1',
'metric': {
    'goal': 'maximize',
    'name': 'val_acc'
    }
}
parameters_dict = {
    'optimizer': {'values': ['adam', 'sgd', 'adamw']},
    'learning_rate': {'values': [0.1, 0.01, 0.05, 0.001, 0.005]},
    'neurons_FC1': {'values': [128, 256, 512]},
    'neurons_FC2': {'values': [64, 128, 256, 512]},
    }

sweep_config['parameters'] = parameters_dict
print(sweep_config)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


{'method': 'random', 'name': 'sweep1', 'metric': {'goal': 'maximize', 'name': 'val_acc'}, 'parameters': {'optimizer': {'values': ['adam', 'sgd', 'adamw']}, 'learning_rate': {'values': [0.1, 0.01, 0.05, 0.001, 0.005]}, 'neurons_FC1': {'values': [128, 256, 512]}, 'neurons_FC2': {'values': [64, 128, 256, 512]}}}


In [84]:
### WandB, you have have an account(if you don't, create one)
def train_model(config=None):
    wandb.init(project='04a')
    config = wandb.config
    wandb_logger = WandbLogger(project='04a', job_type='train', log_model="all")

    # instantiate classes
    dm = CifarDataModule(32)
    dm.prepare_data()
    dm.setup()
    model = CIFAR10LitModel(config, (3, 32, 32), dm.num_classes)

    wandb_logger.watch(model)

    # Train the model 22 times
    trainer = pl.Trainer(max_epochs=22, logger=wandb_logger)

    # Train the model
    trainer.fit(model, dm)
    wandb.finish()



In [None]:
sweep_id=wandb.sweep(sweep_config, project="04a")
wandb.agent(sweep_id=sweep_id, function=train_model, count=6)

Create sweep with ID: 6byh26o9
Sweep URL: https://wandb.ai/xuy0727/04a/sweeps/6byh26o9


[34m[1mwandb[0m: Agent Starting Run: 1i5s0pso with config:
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	neurons_FC1: 128
[34m[1mwandb[0m: 	neurons_FC2: 64
[34m[1mwandb[0m: 	optimizer: sgd


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params
--------------------------------------------------
0  | conv1     | Conv2d             | 896   
1  | conv2     | Conv2d             | 9.2 K 
2  | conv3     | Conv2d             | 18.5 K
3  | conv4     | Conv2d             | 36.9 K
4  | pool1     | MaxPool2d          | 0     
5  | pool2     | MaxPool2d          | 0     
6  | fc1       | Linear             | 204 K 
7  | fc2       | Linear             | 8.3 K 
8  | fc3       | Linear             | 650   
9  | train_acc | MulticlassAccuracy | 0     
10 | val_acc   | MulticlassAccuracy | 0     
11 | test_acc  | MulticlassAccuracy | 0     
--------------------------------------------------
279 K     Trainable params
0         Non-trainable params
279 K     Total params
1.118     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]