##### Example 1

In [1]:
def this_is_the_next_line(): pass

In [2]:
x = torch.randn(1000, 1000)
y = torch.randn(1000, 1000)

NameError: name 'torch' is not defined

In [None]:
import torch

In [None]:
x.shape, y.shape

In [None]:
z = torch.matmul(x, y)

Write a one-liner code to ensure that the matrix multiplication operation between tensors `x` and `y` on a CUDA device is completed before proceeding to the next line

In [None]:
torch.cuda.synchronize()

In [None]:
this_is_the_next_line()

##### Example 1

In [4]:
from torch import nn

In [5]:
model = nn.Sequential(
    nn.Linear(5, 5),
    nn.ReLU(),
    nn.Linear(5, 5),
    nn.ReLU(),
    nn.Linear(5, 5),
    nn.ReLU(),
    nn.Linear(5, 5),
)

In [6]:
model

Sequential(
  (0): Linear(in_features=5, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=5, bias=True)
  (3): ReLU()
  (4): Linear(in_features=5, out_features=5, bias=True)
  (5): ReLU()
  (6): Linear(in_features=5, out_features=5, bias=True)
)

In [None]:
import time
from typing import Union, List

from torch import nn

In [None]:
model

Sequential(
  (0): Linear(in_features=5, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=5, bias=True)
  (3): ReLU()
  (4): Linear(in_features=5, out_features=5, bias=True)
  (5): ReLU()
  (6): Linear(in_features=5, out_features=5, bias=True)
)

`torchpipe.balance.profile.profile_times`

Write a function that profile that time that takes to execute the forward pass and backward pass in each layer

**Hint**: Ignore type annotations

In [None]:
def profile_times(
    model: nn.Module,
    sample: Union[torch.Tensor, List[torch.Tensor]],
) -> List[int]:
    """Profiles elapsed times per layer."""
    # ignored: check wehther the model has already performed backward pass
    records = [[] for _ in model]
    
    for i, layer in enumerate(model):
        start_time = time.time()
        
        outputs = [layer(x) for x in batch]
        backward_outputs = tuple(x for x in outputs if x.requires_grad)
        
        if backward_outputs: torch.autograd.backward(backward_outputs, backward_outputs)
        
        end_time = time.time()
        records[i].append(end_time - start_time)
    
    return records

In [None]:
batch.shape

torch.Size([10, 5, 5])

In [None]:
outputs = profile_times(model, batch)

In [None]:
outputs

[[0.002125263214111328],
 [0.00022482872009277344],
 [0.0007300376892089844],
 [0.00010609626770019531],
 [0.0004680156707763672],
 [8.20159912109375e-05],
 [0.00047588348388671875]]

##### Example 2

In [11]:
model

Sequential(
  (0): Linear(in_features=5, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=5, bias=True)
  (3): ReLU()
  (4): Linear(in_features=5, out_features=5, bias=True)
  (5): ReLU()
  (6): Linear(in_features=5, out_features=5, bias=True)
)

Compute the total number of bytes that are occupied by the parameters in the model

In [12]:
def compute_total_memory(model):
    total_size = 0
    
    for param in model.parameters():
        n_elements = param.element_size()
        size_per_element = param.numel()
        total_size += n_elements * size_per_element
    
    return total_size

In [13]:
total_memory = compute_total_memory(model)

In [14]:
total_memory

480

### GPipe

In [None]:
from collections import OrderedDict
from typing import Iterable, List, Tuple, cast

import torch
from torch import nn

##### Example 1

In [None]:
a = nn.Linear(420, 69)
b = nn.Linear(69, 69)
c = nn.Linear(69, 69)
d = nn.Linear(69, 169)
e = nn.Linear(69, 69)
f = nn.Linear(69, 420)

model = nn.Sequential(a, b, c, d, e, f)

In [None]:
model

Sequential(
  (0): Linear(in_features=420, out_features=69, bias=True)
  (1): Linear(in_features=69, out_features=69, bias=True)
  (2): Linear(in_features=69, out_features=69, bias=True)
  (3): Linear(in_features=69, out_features=169, bias=True)
  (4): Linear(in_features=69, out_features=69, bias=True)
  (5): Linear(in_features=69, out_features=420, bias=True)
)

In [None]:
balances = [3, 2, 1]

In [None]:
devices = [torch.device("cpu"), torch.device("cpu"), torch.device("cpu")]

In [None]:
from typing import OrderedDict

In [None]:
model

Sequential(
  (0): Linear(in_features=420, out_features=69, bias=True)
  (1): Linear(in_features=69, out_features=69, bias=True)
  (2): Linear(in_features=69, out_features=69, bias=True)
  (3): Linear(in_features=69, out_features=169, bias=True)
  (4): Linear(in_features=69, out_features=69, bias=True)
  (5): Linear(in_features=69, out_features=420, bias=True)
)

`balances` indicating the number of constructional layers for each partition

`devices` specifying the target device for each partition

In [None]:
balances, devices

([3, 2, 1], [device(type='cpu'), device(type='cpu'), device(type='cpu')])

Write a function that splits a model into partitions and send them to the target device

**Hint**: Ignore the type annotations

In [None]:
def split_model(
    model: nn.Sequential,
    balance: Iterable[int],
    devices: List[torch.device]
) -> Tuple[List[nn.Sequential], List[int], List[torch.device]]:
    patrition_idx = 0
    partitions = []
    layers = OrderedDict()
    
    for name, layer in model.named_children():
        layers[name] = layer
        if len(layers) == balance[patrition_idx]:
            device = devices[patrition_idx]
            partition = nn.Sequential(layers)
            partition.to(device)
            partitions.append(partition)
            layers.clear()
            patrition_idx += 1
    
    # can be ignored
    partitions = cast(List[nn.Sequential], nn.ModuleList(partitions))
    return partitions

In [None]:
partitions = split_model(model, balances, devices)

In [None]:
partitions

ModuleList(
  (0): Sequential(
    (0): Linear(in_features=420, out_features=69, bias=True)
    (1): Linear(in_features=69, out_features=69, bias=True)
    (2): Linear(in_features=69, out_features=69, bias=True)
  )
  (1): Sequential(
    (3): Linear(in_features=69, out_features=169, bias=True)
    (4): Linear(in_features=69, out_features=69, bias=True)
  )
  (2): Sequential(
    (5): Linear(in_features=69, out_features=420, bias=True)
  )
)