## 원본 - https://github.com/wandb/examples/blob/master/colabs/pytorch/Organizing_Hyperparameter_Sweeps_in_PyTorch_with_W%26B.ipynb

In [18]:
%%capture
!pip install wandb --upgrade

# workaround to fetch MNIST data
!wget www.di.ens.fr/~lelarge/MNIST.tar.gz
!tar -zxvf MNIST.tar.gz

In [19]:
import wandb 

wandb.login()

True

In [20]:
sweep_config = {
    'method': 'random'
}

In [21]:
metric = {
    'name': 'loss',
    'goal': 'minimize'
}

sweep_config['metric'] = metric

In [22]:
parameters_dict = {
    'optimizer': {
        'values': ['adam','sgd']
    },
    'fc_layer_size': {
        'values' : [128, 256, 512]
    },
    'dropout': {
        'values' : [0.3, 0.4, 0.5]
    },
}

sweep_config['parameters'] = parameters_dict

In [23]:
parameters_dict.update({
    'epochs':{
        'value' : 1
    }
})

In [24]:
import math

parameters_dict.update({
    'learning_rate': {
        'distribution':'uniform',  # 균등분포
        'min': 0,
        'max': 0.1
    },
    'batch_size': {
        'distribution': 'q_log_uniform',  # 양자화된 로그 균일분포   round(X / q) * q을 반환
        'q': 1,
        'min': math.log(32),
        'max': math.log(256),
    }
})

In [25]:
import pprint

pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'minimize', 'name': 'loss'},
 'parameters': {'batch_size': {'distribution': 'q_log_uniform',
                               'max': 5.545177444479562,
                               'min': 3.4657359027997265,
                               'q': 1},
                'dropout': {'values': [0.3, 0.4, 0.5]},
                'epochs': {'value': 1},
                'fc_layer_size': {'values': [128, 256, 512]},
                'learning_rate': {'distribution': 'uniform',
                                  'max': 0.1,
                                  'min': 0},
                'optimizer': {'values': ['adam', 'sgd']}}}


### initialize sweep

In [26]:
sweep_id = wandb.sweep(sweep_config, project='pytorch-sweeps-demo')

Create sweep with ID: luoesn35
Sweep URL: https://wandb.ai/tjems6498/pytorch-sweeps-demo/sweeps/luoesn35


### run the sweep agent

In [27]:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [28]:
def train(config=None):
    with wandb.init(config=config):
        config = wandb.config

        loader = build_dataset(config.batch_size)
        network = build_network(config.fc_layer_size, config.dropout)
        optimizer = build_optimizer(network, config.optimizer, config.learning_rate)
        
        for epoch in range(config.epochs):
            avg_loss = train_epoch(network, loader, optimizer)
            wandb.log({'loss': avg_loss, 'epoch': epoch})

def train_epoch(network, loader, optimizer):
    cumu_loss = 0
    for _, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()

        loss = F.nll_loss(network(data), target)
        cumu_loss += loss.item()

        loss.backward()
        optimizer.step()

        wandb.log({'batch loss': loss.item()})
    return cumu_loss / len(loader)

In [29]:
def build_dataset(batch_size):
    transform = transforms.Compose(
        [
         transforms.ToTensor(),
         transforms.Normalize((0.1307,), (0.3081,))
        ]
    )
    dataset = datasets.MNIST(".", train=True, download=True,
                             transform=transform)
    sub_dataset = torch.utils.data.Subset(dataset, indices=range(0, len(dataset), 5))
    loader = torch.utils.data.DataLoader(sub_dataset, batch_size=batch_size)
    return loader

def build_network(fc_layer_size, dropout):
    network = nn.Sequential(
        nn.Flatten(),
        nn.Linear(784, fc_layer_size),
        nn.ReLU(),
        nn.Dropout(dropout),
        nn.Linear(fc_layer_size, 10),
        nn.LogSoftmax(dim=1)
    )
    return network.to(device)

def build_optimizer(network, optimizer, learning_rate):
    if optimizer == 'sgd':
        optimizer = optim.SGD(network.parameters(),
                              lr=learning_rate, momentum=0.9)
    elif optimizer == 'adam':
        optimizer = optim.Adam(network.parameters(),
                               lr=learning_rate)
    return optimizer

In [30]:
wandb.agent(sweep_id, train, count=5)  # 5번 실험

[34m[1mwandb[0m: Agent Starting Run: e0io0wlm with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	fc_layer_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.06441295546831664
[34m[1mwandb[0m: 	optimizer: sgd


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch loss,0.31236
_runtime,11.0
_timestamp,1621958130.0
_step,94.0
loss,0.57186
epoch,0.0


0,1
batch loss,█▇▅▃▂▃▂▂▃▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▁▁▁▂▂▂▁▂▁▂▁▁▁
_runtime,▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅████████
_timestamp,▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅████████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss,▁
epoch,▁


[34m[1mwandb[0m: Agent Starting Run: 2ny12msn with config:
[34m[1mwandb[0m: 	batch_size: 243
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	fc_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.008477230954651261
[34m[1mwandb[0m: 	optimizer: sgd


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch loss,0.48167
_runtime,3.0
_timestamp,1621958139.0
_step,50.0
loss,1.24917
epoch,0.0


0,1
batch loss,████▇▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▂▁▂▁
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█████████████████████████
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█████████████████████████
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
loss,▁
epoch,▁


[34m[1mwandb[0m: Agent Starting Run: 0ukenish with config:
[34m[1mwandb[0m: 	batch_size: 70
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	fc_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.0065760486786174965
[34m[1mwandb[0m: 	optimizer: adam


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch loss,0.59926
_runtime,4.0
_timestamp,1621958149.0
_step,172.0
loss,0.50844
epoch,0.0


0,1
batch loss,█▄▃▄▂▃▂▂▃▂▂▂▂▂▁▁▂▂▂▂▃▃▁▂▁▂▁▂▁▁▂▂▂▁▁▁▂▁▂▁
_runtime,▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅████████████
_timestamp,▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅████████████
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss,▁
epoch,▁


[34m[1mwandb[0m: Agent Starting Run: 00h8jht7 with config:
[34m[1mwandb[0m: 	batch_size: 50
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	fc_layer_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.0482230846746827
[34m[1mwandb[0m: 	optimizer: adam


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch loss,1.67633
_runtime,5.0
_timestamp,1621958159.0
_step,240.0
loss,3.21465
epoch,0.0


0,1
batch loss,█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅███
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅███
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss,▁
epoch,▁


[34m[1mwandb[0m: Agent Starting Run: bwx7gl1j with config:
[34m[1mwandb[0m: 	batch_size: 184
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	fc_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.08272844726212165
[34m[1mwandb[0m: 	optimizer: adam


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
batch loss,1.60186
_runtime,4.0
_timestamp,1621958167.0
_step,66.0
loss,4.64691
epoch,0.0


0,1
batch loss,▁▆█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁████████████████
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁████████████████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss,▁
epoch,▁
