In [None]:
# todo: check the model cfg: augmentations, policy

In [1]:
from torch import nn
from utils import parse_cfg
from darknet import Darknet

In [2]:
layers_info = parse_cfg('cfg/yolov3.cfg')

In [3]:
darknet = Darknet(layers_info)
darknet.create_layers() #TODO

make_layers returns net_info as well. check whether it"s necessary


({'angle': '0',
  'batch': '64',
  'burn_in': '1000',
  'channels': '3',
  'decay': '0.0005',
  'exposure': '1.5',
  'height': '608',
  'hue': '.1',
  'learning_rate': '0.001',
  'max_batches': '500200',
  'momentum': '0.9',
  'name': 'net',
  'policy': 'steps',
  'saturation': '1.5',
  'scales': '.1,.1',
  'steps': '400000,450000',
  'subdivisions': '16',
  'width': '608'},
 ModuleList(
   (0): Sequential(
     (conv_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
     (bn_0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (leaky_0): LeakyReLU(negative_slope=0.1)
   )
 ))

In [18]:
# the first element is not a layer but the network info (lr, batchsize,  ...)
net_info = layers_info[0]
# init. the modulelist instead of a list to add all parameters to nn.Module
layer_list = nn.ModuleList()

print("WARNING: sudivisions of a batch aren't used in contrast to the original cfg" )

for i, layer_info in layers_info[1:]:
    # we initialize sequential as a layer may have conv, bn, and activation
    layer = nn.Sequential()
    # cache the # of filters as we will need them in Conv2d
    # it starts with the number of channels specified in net info, often = to 3 (RGB)
    filters_cache = [int(net_info['channels'])]
    name = layer_info['name'] # conv, upsample, route, shortcut, yolo
    
    if name == 'convolutional':
        # extract arguments for the layer
        in_filters = filters_cache[-1]
        out_filters = int(layer_info['filters'])
        kernel_size = int(layer_info['size'])
        pad = (kernel_size - 1) // 2 if int(module_def['pad']) else 0
        stride = int(layer_info['stride'])
        
        # make conv module and add it to the sequential
        conv = nn.Conv2d(in_filters, out_filters, kernel_size, stride, pad)
        layer.add_module('conv_{}'.format(i), conv)
        
        # some layers doesn't have BN
        try:
            layer_info['batch_normalize']
            layer.add_module('bn_' + i, nn.BatchNorm2d(out_filters))
            
        except KeyError:
            print('to del, this message should be printed 3ish times')
            pass
        
        # activation. if 'linear': no activation
        if layer_info['activation'] == 'leaky':
            layer.add_module('leaky_' + i, nn.LeakyReLU(0.1))
        
        # add the number of filters to filters_cache
        filters_cache.append(out_filters)
    
    elif name == 'upsample':
        # extract arguments for the layer
        stride = int(layer_info['stride'])
        layer.add_module('upsample_' + i, nn.Upsample(scale_factor=stride, mode='bilinear'))
        
    # here we need to deal only with the number of filters 
    elif name == 'route':
        # route can have one, two, or more sources
        # first, let's make them to be ints
        routes = [int(route) for route in layer_info['layers'].split(',')]
        # then, sum the number of filters from at each mentioned layer
        out_filters = sum([filters_cache[route] for route in routes])
#         # add the dummy layer to the list
#         layer.add_module('route_' + i, EmptyLayer())
        # add the route layer to the modulelist
        layer.add_module('route_' + i, RouteLayer(routes))
        
        # add the number of filters to filters_cache
        filters_cache.append(out_filters)
    
    # in forward() we will need to add the output of a previous layer, nothing to do here
    elif name == 'shortcut':
        # from which layer to use the shortcut
        frm = int(layer_info['from'])
        # add the shortcut layer to the modulelist
        layer.add_module('shortcut_' + i, ShortcutLayer(frm))
        
    # detection layer
    elif name == 'yolo':
        # extract arguments for the layer
        classes = int(layer_info['classes'])
        num = int(layer_info['num'])
        jitter = float(layer_info['jitter'])
        ignore_thresh = float(layer_info['ignore_thresh'])
        truth_thresh = float(layer_info['truth_thresh']) 
        random = float(layer_info['random']) # float??
        in_width = int(net_info['width'])
        
        # masks tells the dector which anchor to use (form: '6,7,8')
        masks = [int(mask) for mask in layer_info['mask'].split(',')]
        # select anchors (form: 10,13,16,30,33,23,30,61,62,45 -- 5 pairs)
        # first extract the coordinates
        coords = [int(coord) for coord in layer_info['anchors']]
        # make anchors (tuples)
        anchors = list(zip(coords[::2], coords[1::2]))
        # select anchors that belong to mask
        anchors = [anchors[mask] for mask in masks]
        
        # add the detector layer to the list
        detection = DetectionLayer(anchors, classes, num, jitter, ignore_thresh, truth_thresh, random, in_width)
        layer.add_module('detector_' + i, detection)
        
    
    # append the layer to the modulelist
    layer_list.append(layer)
    
    print('make_layers returns net_info as well. check whether it"s necessary')
    return net_info, layer_list

IndentationError: expected an indented block (<ipython-input-18-58f685282415>, line 69)

In [4]:
if 0.0:
    print(1.0)

In [28]:
layer_info = {'anchors': '10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326'}
coords = [int(coord) for coord in layer_info['anchors'].replace(' ', '').split(',')]
print(coords)
list(zip(coords[::2], coords[1::2]))

[10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]


[(10, 13),
 (16, 30),
 (33, 23),
 (30, 61),
 (62, 45),
 (59, 119),
 (116, 90),
 (156, 198),
 (373, 326)]

In [13]:
layers_info[1:]

[{'activation': 'leaky',
  'batch_normalize': '1',
  'filters': '32',
  'name': 'convolutional',
  'pad': '1',
  'size': '3',
  'stride': '1'},
 {'activation': 'leaky',
  'batch_normalize': '1',
  'filters': '64',
  'name': 'convolutional',
  'pad': '1',
  'size': '3',
  'stride': '2'},
 {'activation': 'leaky',
  'batch_normalize': '1',
  'filters': '32',
  'name': 'convolutional',
  'pad': '1',
  'size': '1',
  'stride': '1'},
 {'activation': 'leaky',
  'batch_normalize': '1',
  'filters': '64',
  'name': 'convolutional',
  'pad': '1',
  'size': '3',
  'stride': '1'},
 {'activation': 'linear', 'from': '-3', 'name': 'shortcut'},
 {'activation': 'leaky',
  'batch_normalize': '1',
  'filters': '128',
  'name': 'convolutional',
  'pad': '1',
  'size': '3',
  'stride': '2'},
 {'activation': 'leaky',
  'batch_normalize': '1',
  'filters': '64',
  'name': 'convolutional',
  'pad': '1',
  'size': '1',
  'stride': '1'},
 {'activation': 'leaky',
  'batch_normalize': '1',
  'filters': '128',
  '