In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

# import

In [2]:
from ..exp import resnet_ssd

In [3]:
from ..exp import databunch

In [4]:
from fastai.basic_train import Learner,DatasetType

In [5]:
# export
from fastai.callbacks.hooks import Hook,Hooks

In [6]:
# export
from torch import nn

In [7]:
# export
import torch

In [8]:
from IPython.core import debugger as idb

In [9]:
# export
from fastprogress.fastprogress import progress_bar

In [10]:
# export
import re

# functions

## LSUV

### get_convs

In [11]:
# export
def get_convs(model):
    ms = []
    for m in model.modules():
        if isinstance(m,nn.Conv2d):# and m.kernel_size[0]>1:
            ms += [m]
    return ms

### show_layer_stats

In [12]:
# export
def hook_mean_std(m, i, o):
    "Take the shape, mean and std of `o`."
    return m.kernel_size[0], o.shape[1:], o.mean().item(), o.std().item()

In [13]:
# export
# helper function
def show_layer_stats(model,x_batch):
    ms = get_convs(model)
    with Hooks(ms,hook_mean_std) as hooks_hd: 
        _ = model(x_batch)   
    
    for s in hooks_hd.stored:
        print(s)

### runtime_init_linear

In [14]:
# export
def hook_init(m,i,o):
    m.weight.data /= o.std()

In [15]:
# export
def runtime_init_linear(model, x_batch, hook_init=hook_init):
    '''
    Idea come from LSUV (https://arxiv.org/pdf/1511.06422.pdf).
    Initialize linear layer(conv,fc) weights and bias at runtime using a hook function.
    这个过程是按照顺序逐层初始化：
    （1）跑一遍模型，初始化第一层，该层的输入是x_batch(它是规则的)，则初始化操作之后保证该层的输出也是规则的；
    （2）再跑一遍模型，这时第一层的输出（即第二层的输入）已经是规则的了，初始化第二层；
    （3）再跑一遍模型，这时第一、二层的输出已经是规则的了，初始化第三层；
    （4）以此类推。
    因此这个过程的运行时间会较长。
    -----------------------
    参数：
    -- model: the model will get initialized.
    -- x_batch: a batch of data to run the model, you shoud make sure that x_batch is normalized.
    -- init_hook_func: a hook function used to initialize each layer
    -----------------------
    返回值：
    -- the model whose weights and bias is initialized by this function. 
    '''
    # set model.require_grad to False, otherwise you can not modify layer weights at runtime.
    model.requires_grad_(False)
    
    # find linear modules in model
    ms = get_convs(model)
    
    # 粗初始化：bias->0；weights->N(0,1)
    for m in ms:
        if m.bias is not None:
            m.bias.zero_()
        m.weight.normal_(0,1)
    
    # 逐模块初始化
    pb_ms = progress_bar(ms)
    pb_ms.comment = 'runing init'
    for m in pb_ms:
        with Hook(m, hook_init):
            _ = model(x_batch)
    
    # set model.require_grad to True 
    model.requires_grad_(True)
    
    return model

## init to pretrained

In [16]:
# export
def init_to_pretrained(model, pretrained_dict, link_names):
    '''
    根据一个预训练模型初始化模型里的相关参数。
    这里的model与预训练模型并不需要完全相同，这个函数也不是完全的把model的参数加载为预训练模型的参数，而是指加载你指定的那些参数。
    -----------------------
    参数：
    -- model：参数待加载的模型
    -- pretrained_dict：预训练模型的参数字典
    -- link_names：model里的参数名与预训练模型的参数名的对照
    '''
    for k,v in model.named_parameters():
        for ns in link_names:
            if len(re.findall(ns[0],k))>0:
                k = re.sub(ns[0],ns[1],k)
                v.data = pretrained_dict[k].data
                break

## test

In [17]:
# prepare data and model
data = databunch.get_databunch(data_root='./data/ds_20200428/')
x,y = data.one_batch()

### resnet18_ssd

In [18]:
# model = resnet_ssd.get_resnet18_ssd()

In [19]:
# check layer output stats before runtime init.
# show_layer_stats(model,x)

In [20]:
# runtime init
# runtime_init_linear(model, x, hook_init);

In [21]:
# check layer output stats after runtime init
# show_layer_stats(model,x)

In [22]:
# 因为runtime_init耗时较长，保存初始化后的模型，方便快速调用
# torch.save(model.state_dict(), './models/resnet18_ssd_init.pth') # 只在要保存初始化模型时运行该行代码

### resnet18_1ssd

In [23]:
# model = resnet_ssd.get_resnet18_1ssd()

#### LSUV

In [24]:
# check layer output stats before runtime init.
# show_layer_stats(model,x)

In [25]:
# runtime init
# runtime_init_linear(model, x, hook_init);

In [26]:
# check layer output stats after runtime init
# show_layer_stats(model,x)

In [27]:
# 因为runtime_init耗时较长，保存初始化后的模型，方便快速调用
# torch.save(model.state_dict(), './models/resnet18_1ssd_init.pth') # 只在要保存初始化模型时运行该行代码

#### init_to_pretrained

In [28]:
# pdict = torch.load('./models/pytorch_pretrained/resnet18-5c106cde.pth')

In [29]:
# 将模型中与 resnet18 对应的部分的参数从预训练模型加载
# link_names = [(r'^conv1',        'conv1'),
#               (r'^bn1',          'bn1'),
#               (r'^res_blocks.0', 'layer1'),
#               (r'^res_blocks.1', 'layer2'),
#               (r'^res_blocks.2', 'layer3'),
#               (r'^res_blocks.3', 'layer4')]
# init_to_pretrained(model, pdict, link_names)

In [30]:
# check layer output stats before runtime init.
# show_layer_stats(model,x)

In [31]:
# torch.save(model.state_dict(), './models/pretrained_res18_1ssd.pth') # 只在要保存初始化模型时运行该行代码

### resnet18_1ssd_2

In [38]:
model = resnet_ssd.get_resnet18_1ssd_2()

#### LSUV

In [40]:
# check layer output stats before runtime init.
show_layer_stats(model,x)

(7, torch.Size([64, 392, 392]), 0.04044461250305176, 0.2338630110025406)
(3, torch.Size([64, 196, 196]), -0.040899619460105896, 1.4524664878845215)
(3, torch.Size([64, 196, 196]), -0.023861113935709, 1.0377568006515503)
(3, torch.Size([64, 196, 196]), -0.0027381922118365765, 1.8267364501953125)
(3, torch.Size([64, 196, 196]), -0.041567351669073105, 0.979866623878479)
(3, torch.Size([128, 98, 98]), 0.03197834640741348, 1.5994309186935425)
(3, torch.Size([128, 98, 98]), 0.026550687849521637, 1.0436439514160156)
(1, torch.Size([128, 98, 98]), 0.0032223782036453485, 1.5773289203643799)
(3, torch.Size([128, 98, 98]), -0.052147310227155685, 1.3461328744888306)
(3, torch.Size([128, 98, 98]), 0.011380032636225224, 0.9789604544639587)
(3, torch.Size([256, 49, 49]), -0.05214457958936691, 1.3046921491622925)
(3, torch.Size([256, 49, 49]), 0.047852545976638794, 0.9583020210266113)
(1, torch.Size([256, 49, 49]), 0.020024040713906288, 1.288756012916565)
(3, torch.Size([256, 49, 49]), 0.0178394000977

In [41]:
# runtime init
runtime_init_linear(model, x, hook_init);

In [42]:
# check layer output stats after runtime init
show_layer_stats(model,x)

(7, torch.Size([64, 392, 392]), -0.08844493329524994, 1.0)
(3, torch.Size([64, 196, 196]), -0.026781173422932625, 1.0)
(3, torch.Size([64, 196, 196]), 0.01705138199031353, 1.0)
(3, torch.Size([64, 196, 196]), 0.008169354870915413, 0.9999999403953552)
(3, torch.Size([64, 196, 196]), -0.01741747558116913, 1.0)
(3, torch.Size([128, 98, 98]), 0.019459621980786324, 1.0)
(3, torch.Size([128, 98, 98]), -0.005722504109144211, 1.0)
(1, torch.Size([128, 98, 98]), -0.0583064891397953, 1.0)
(3, torch.Size([128, 98, 98]), -0.018746407702565193, 1.0)
(3, torch.Size([128, 98, 98]), 0.01924307830631733, 0.9999999403953552)
(3, torch.Size([256, 49, 49]), 0.007273969706147909, 1.0)
(3, torch.Size([256, 49, 49]), -0.04466003179550171, 1.0)
(1, torch.Size([256, 49, 49]), 0.043883103877305984, 1.0)
(3, torch.Size([256, 49, 49]), 0.014819080010056496, 1.0)
(3, torch.Size([256, 49, 49]), 0.005339494440704584, 1.0)
(3, torch.Size([512, 25, 25]), -0.03673718124628067, 1.0)
(3, torch.Size([512, 25, 25]), -0.024

In [44]:
# 因为runtime_init耗时较长，保存初始化后的模型，方便快速调用
torch.save(model.state_dict(), './models/resnet18_1ssd_2_init.pth') # 只在要保存初始化模型时运行该行代码

#### init_to_pretrained

In [45]:
pdict = torch.load('./models/pytorch_pretrained/resnet18-5c106cde.pth')

In [46]:
# 将模型中与 resnet18 对应的部分的参数从预训练模型加载
link_names = [(r'^conv1',        'conv1'),
              (r'^bn1',          'bn1'),
              (r'^res_blocks.0', 'layer1'),
              (r'^res_blocks.1', 'layer2'),
              (r'^res_blocks.2', 'layer3'),
              (r'^res_blocks.3', 'layer4')]
init_to_pretrained(model, pdict, link_names)

In [47]:
# check layer output stats before runtime init.
show_layer_stats(model,x)

(7, torch.Size([64, 392, 392]), 0.004280636552721262, 1.0100752115249634)
(3, torch.Size([64, 196, 196]), -0.7636838555335999, 1.212753176689148)
(3, torch.Size([64, 196, 196]), -0.06247009336948395, 0.365950345993042)
(3, torch.Size([64, 196, 196]), -0.5044122934341431, 1.0826414823532104)
(3, torch.Size([64, 196, 196]), -0.05583054944872856, 0.2739122211933136)
(3, torch.Size([128, 98, 98]), -0.23261772096157074, 0.7655931711196899)
(3, torch.Size([128, 98, 98]), -0.17523027956485748, 0.35710206627845764)
(1, torch.Size([128, 98, 98]), -0.057857248932123184, 0.3755256235599518)
(3, torch.Size([128, 98, 98]), -0.24035312235355377, 0.511385977268219)
(3, torch.Size([128, 98, 98]), -0.033098671585321426, 0.2020649015903473)
(3, torch.Size([256, 49, 49]), -0.29096776247024536, 0.5045275688171387)
(3, torch.Size([256, 49, 49]), -0.10739132016897202, 0.3606847822666168)
(1, torch.Size([256, 49, 49]), -0.035742126405239105, 0.1319136619567871)
(3, torch.Size([256, 49, 49]), -0.3483544588088

In [48]:
torch.save(model.state_dict(), './models/pretrained_res18_1ssd_2.pth') # 只在要保存初始化模型时运行该行代码

### resnet34_1ssd

In [19]:
model = resnet_ssd.get_resnet34_1ssd()

#### LSUV

In [40]:
# check layer output stats before runtime init.
show_layer_stats(model,x)

(7, torch.Size([64, 392, 392]), 0.04044461250305176, 0.2338630110025406)
(3, torch.Size([64, 196, 196]), -0.040899619460105896, 1.4524664878845215)
(3, torch.Size([64, 196, 196]), -0.023861113935709, 1.0377568006515503)
(3, torch.Size([64, 196, 196]), -0.0027381922118365765, 1.8267364501953125)
(3, torch.Size([64, 196, 196]), -0.041567351669073105, 0.979866623878479)
(3, torch.Size([128, 98, 98]), 0.03197834640741348, 1.5994309186935425)
(3, torch.Size([128, 98, 98]), 0.026550687849521637, 1.0436439514160156)
(1, torch.Size([128, 98, 98]), 0.0032223782036453485, 1.5773289203643799)
(3, torch.Size([128, 98, 98]), -0.052147310227155685, 1.3461328744888306)
(3, torch.Size([128, 98, 98]), 0.011380032636225224, 0.9789604544639587)
(3, torch.Size([256, 49, 49]), -0.05214457958936691, 1.3046921491622925)
(3, torch.Size([256, 49, 49]), 0.047852545976638794, 0.9583020210266113)
(1, torch.Size([256, 49, 49]), 0.020024040713906288, 1.288756012916565)
(3, torch.Size([256, 49, 49]), 0.0178394000977

In [20]:
# runtime init
runtime_init_linear(model, x, hook_init);

In [21]:
# check layer output stats after runtime init
show_layer_stats(model,x)

(7, torch.Size([64, 388, 388]), -0.22475743293762207, 0.9999999403953552)
(3, torch.Size([64, 194, 194]), 0.06133989244699478, 1.0)
(3, torch.Size([64, 194, 194]), 0.00992268230766058, 1.0)
(3, torch.Size([64, 194, 194]), 0.0021607386879622936, 1.0)
(3, torch.Size([64, 194, 194]), -0.07307573407888412, 0.9999999403953552)
(3, torch.Size([64, 194, 194]), 0.04332830384373665, 1.0)
(3, torch.Size([64, 194, 194]), 0.03824002295732498, 0.9999999403953552)
(3, torch.Size([128, 97, 97]), 0.05183012783527374, 1.0)
(3, torch.Size([128, 97, 97]), -0.041025932878255844, 1.0)
(1, torch.Size([128, 97, 97]), 0.04694987088441849, 1.0)
(3, torch.Size([128, 97, 97]), -0.01028610672801733, 0.9999999403953552)
(3, torch.Size([128, 97, 97]), -0.013548360206186771, 0.9999999403953552)
(3, torch.Size([128, 97, 97]), 0.08813803642988205, 1.0)
(3, torch.Size([128, 97, 97]), -0.013529094867408276, 1.0)
(3, torch.Size([128, 97, 97]), 0.025644350796937943, 1.0)
(3, torch.Size([128, 97, 97]), 0.07706756889820099,

In [22]:
# 因为runtime_init耗时较长，保存初始化后的模型，方便快速调用
torch.save(model.state_dict(), './models/resnet34_1ssd_init.pth') # 只在要保存初始化模型时运行该行代码

#### init_to_pretrained

In [23]:
pdict = torch.load('./models/pytorch_pretrained/resnet34-333f7ec4.pth')

In [24]:
# 将模型中与 resnet18 对应的部分的参数从预训练模型加载
link_names = [(r'^conv1',        'conv1'),
              (r'^bn1',          'bn1'),
              (r'^res_blocks.0', 'layer1'),
              (r'^res_blocks.1', 'layer2'),
              (r'^res_blocks.2', 'layer3'),
              (r'^res_blocks.3', 'layer4')]
init_to_pretrained(model, pdict, link_names)

In [25]:
# check layer output stats before runtime init.
show_layer_stats(model,x)

(7, torch.Size([64, 388, 388]), -0.020484235137701035, 1.012465000152588)
(3, torch.Size([64, 194, 194]), -0.6655901074409485, 1.1187868118286133)
(3, torch.Size([64, 194, 194]), 0.01914152316749096, 0.23187008500099182)
(3, torch.Size([64, 194, 194]), -0.6282629370689392, 1.0450106859207153)
(3, torch.Size([64, 194, 194]), -0.019603963941335678, 0.22291164100170135)
(3, torch.Size([64, 194, 194]), -0.2621537744998932, 0.98326575756073)
(3, torch.Size([64, 194, 194]), -0.0099524250254035, 0.15180009603500366)
(3, torch.Size([128, 97, 97]), -0.12007935345172882, 0.8421780467033386)
(3, torch.Size([128, 97, 97]), -0.07687575370073318, 0.2658337354660034)
(1, torch.Size([128, 97, 97]), 0.006520788185298443, 0.4429892301559448)
(3, torch.Size([128, 97, 97]), -0.12312431633472443, 0.68498694896698)
(3, torch.Size([128, 97, 97]), -0.04973595589399338, 0.14783209562301636)
(3, torch.Size([128, 97, 97]), -0.08999964594841003, 0.505359947681427)
(3, torch.Size([128, 97, 97]), -0.020226739346981

In [26]:
torch.save(model.state_dict(), './models/pretrained_res34_1ssd.pth') # 只在要保存初始化模型时运行该行代码

# export

In [1]:
!python ../../notebook2script.py --fname 'init_model.ipynb' --outputDir '../exp/'

Converted init_model.ipynb to ../exp/init_model.py
