In [None]:
""" 本代码仅作为DeepSpeech模型的实现参考
class BNGRU(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(BNGRU, self).__init__()

        self.hidden_size = hidden_size
        self.bn = nn.BatchNorm1d(input_size)
        self.gru = nn.GRU(input_size, hidden_size, bidirectional=True)

    def forward(self, x, xlen):
        maxlen = x.size(2)
        x = self.bn(x)
        # N×C×T -> T×N×C
        x = x.permute(2, 0, 1)
        x = nn.utils.rnn.pack_padded_sequence(x, xlen) 
        x, _ = self.gru(x)
        x, _ = nn.utils.rnn.pad_packed_sequence(x, total_length=maxlen)
        x = x[..., :self.hidden_size] + x[..., self.hidden_size:]
        # T×N×C -> N×C×T
        x = x.permute(1, 2, 0)
        return x

class DeepSpeech(nn.Module):

    def __init__(self, mel_channel, channels, kernel_dims, strides, 
        num_layers, hidden_size, char_size):

        super(DeepSpeech, self).__init__()
        self.kernel_dims = kernel_dims
        self.strides = strides
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.char_size = char_size

        self.cnns = nn.ModuleList()
        in_channel = mel_channel
        for c, k, s in zip(channels, kernel_dims, strides):
            self.cnns.append(nn.Conv1d(in_channel, c, k, 
                stride=s, padding=c//2))
            self.cnns.append(nn.BatchNorm1d(c))
            self.cnns.append(nn.ReLU(inplace=True))
            in_channel = c
        self.cnns = nn.Sequential(*self.cnns)        

        self.rnns = nn.ModuleList()
        for _ in range(num_layers):
            self.rnns.append(BNGRU(in_channel, hidden_size))
            in_channel = hidden_size

        self.norm = nn.BatchNorm1d(hidden_size)
        self.proj = nn.Sequential(
            nn.Linear(hidden_size, char_size)
        ) 

    def forward(self, x, xlen):
        # T×N×C -> N×C×T
        x = x.permute(1, 2, 0)
        x = self.cnns(x)

        for rnn in self.rnns:
            x = rnn(x, xlen)
        x = self.norm(x)

        # N×C×T -> T×N×C
        x = x.permute(2, 0, 1)
        x = self.proj(x)

        return F.log_softmax(x, -1)
"""

import torch
import torch.nn as nn

class BNGRU(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(BNGRU, self).__init__()

        self.hidden_size = hidden_size
        self.bn = nn.BatchNorm1d(input_size)
        self.gru = nn.GRU(input_size, hidden_size, bidirectional=True)

    def forward(self, x, xlen):
        maxlen = x.size(2)
        x = self.bn(x)
        # N×C×T -> T×N×C
        x = x.permute(2, 0, 1)
        x = nn.utils.rnn.pack_padded_sequence(x, xlen) 
        x, _ = self.gru(x)
        x, _ = nn.utils.rnn.pad_packed_sequence(x, total_length=maxlen)
        x = x[..., :self.hidden_size] + x[..., self.hidden_size:]
        # T×N×C -> N×C×T
        x = x.permute(1, 2, 0)
        return x

class DeepSpeech(nn.Module):

    def __init__(self, mel_channel, channels, kernel_dims, strides, 
        num_layers, hidden_size, char_size):

        super(DeepSpeech, self).__init__()
        self.kernel_dims = kernel_dims
        self.strides = strides
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.char_size = char_size

        self.cnns = nn.ModuleList()
        in_channel = mel_channel
        for c, k, s in zip(channels, kernel_dims, strides):
            self.cnns.append(nn.Conv1d(in_channel, c, k, 
                stride=s, padding=c//2))
            self.cnns.append(nn.BatchNorm1d(c))
            self.cnns.append(nn.ReLU(inplace=True))
            in_channel = c
        self.cnns = nn.Sequential(*self.cnns)        

        self.rnns = nn.ModuleList()
        for _ in range(num_layers):
            self.rnns.append(BNGRU(in_channel, hidden_size))
            in_channel = hidden_size

        self.norm = nn.BatchNorm1d(hidden_size)
        self.proj = nn.Sequential(
            nn.Linear(hidden_size, char_size)
        ) 

    def forward(self, x, xlen):
        # T×N×C -> N×C×T
        x = x.permute(1, 2, 0)
        x = self.cnns(x)

        for rnn in self.rnns:
            x = rnn(x, xlen)
        x = self.norm(x)

        # N×C×T -> T×N×C
        x = x.permute(2, 0, 1)
        x = self.proj(x)

        return F.log_softmax(x, -1)


In [None]:
""" 本代码仅作为DQN模型的参考实现
"""

import torch
import torch.nn as nn
import gym

class DQN(nn.Module):
    def __init__(self, naction, nstate, nhidden):
        super(DQN, self).__init__()
        self.naction = naction
        self.nstate = nstate
        self.linear1 = nn.Linear(naction + nstate, nhidden)
        self.linear2 = nn.Linear(nhidden, nhidden)
        self.linear3 = nn.Linear(nhidden, 1)
    
    def forward(self, state, action):
        action_enc = torch.zeros(action.size(0), self.naction)
        action_enc.scatter_(1, action.unsqueeze(-1), 1)
        output = torch.cat((state, action_enc), dim=-1)
        output = torch.relu(self.linear1(output))
        output = torch.relu(self.linear2(output))
        output = self.linear3(output)
        return output.squeeze(-1)

class Memory(object):
    def __init__(self, capacity=1000):

        self.capacity = capacity
        self.size = 0
        self.data = []
        
    def __len__(self):
        return self.size
        
    def push(self, state, action, state_next, reward, is_ended):
        
        if len(self) > self.capacity:
            k = random.randint(self.capacity)
            self.data.pop(k)
            self.size -= 1
        
        self.data.append((state, action, state_next, reward, is_ended))
        
    def sample(self, bs):
        data = random.choices(self.data, k=bs)
        states, actions, states_next, rewards, is_ended = zip(*data)
        
        states = torch.tensor(states, dtype=torch.float32)
        actions = torch.tensor(actions)
        states_next = torch.tensor(states_next, dtype=torch.float32)
        rewards = torch.tensor(rewards, dtype=torch.float32)
        is_ended = torch.tensor(is_ended, dtype=torch.float32)
        
        return states, actions, states_next, rewards, is_ended

# 定义两个网络，用于加速模型收敛
dqn = DQN(2, 4, 8)
dqn_t = DQN(2, 4, 8)
dqn_t.load_state_dict(copy.deepcopy(dqn.state_dict()))
eps = 0.1
# 折扣系数
gamma = 0.999

optim = torch.optim.Adam(dqn.parameters(), lr=1e-3)
criterion = HuberLoss()         
                      
step_cnt = 0
mem = Memory()

for episode in range(300):
    state = env.reset()
    while True:
        action_t = torch.tensor([0, 1])
        state_t = torch.tensor([state, state], dtype=torch.float32)
        
        # 计算最优策略
        torch.set_grad_enabled(False)
        q_t = dqn(state_t, action_t)
        max_t = q_t.argmax()
        torch.set_grad_enabled(True)
        
        # 探索和利用的平衡
        if random.random() < eps:
            max_t = random.choice([0, 1])
        else:
            max_t = max_t.item()
        
        state_next, reward, done, info = env.step(max_t)
        
        mem.push(state, max_t, state_next, reward, done)
        state = state_next
        
        if done:
            break
    
        # 重放训练
        for _ in range(10):
            state_t, action_t, state_next_t, reward_t, is_ended_t = \
                mem.sample(32)

            q1 = dqn(state_t, action_t)
            
            torch.set_grad_enabled(False)
            q2_0 = dqn_t(state_next_t, 
                         torch.zeros(state_t.size(0), dtype=torch.long))
            q2_1 = dqn_t(state_next_t, 
                         torch.ones(state_t.size(0), dtype=torch.long))
            # 利用Bellman方程进行迭代
            q2_max = reward_t + gamma*(1-is_ended_t)*
                (torch.stack((q2_0, q2_1), dim=1).max(1)[0])
            torch.set_grad_enabled(True)
            # 优化损失函数
            delta = q2_max - q1
            loss = criterion(delta)
            optim.zero_grad()
            loss.backward()
            for p in dqn.parameters(): p.grad.data.clamp_(-1, 1)
            optim.step()          
            step_cnt += 1
                            
            # 同步两个网络的参数
            if step_cnt % 1000 == 0:
                dqn_t.load_state_dict(copy.deepcopy(dqn.state_dict()))
env.close()


In [None]:
""" 以下代码仅为函数签名，不能实际运行
"""

# CTC损失函数
class torch.nn.CTCLoss(blank=0, reduction='mean', zero_infinity=False)

# 对应forward方法的定义
def forward(self, log_probs, targets, input_lengths, target_lengths)


In [None]:
""" 本代码仅供半精度模型训练的饿参考
"""

from apex.fp16_utils import *
from apex import amp, optimizers

model = Model()
model = model.cuda()
optimizer = torch.optim.SGD(model.parameters(), args.lr,
                            momentum=args.momentum,
                            weight_decay=args.weight_decay)
model, optimizer = amp.initialize(model, optimizer,
                               opt_level=args.opt_level,
                               keep_batchnorm_fp32=args.keep_batchnorm_fp32,
                               loss_scale=args.loss_scale)
# ...
loss = criterion(output, target)
optimizer.zero_grad()

with amp.scale_loss(loss, optimizer) as scaled_loss:
     scaled_loss.backward()
optimizer.step()

In [None]:
""" 以下代码仅为Tacotron模型的一个参考实现
"""

import torch
import torch.nn as nn

# Tacotron编码器
class Encoder(nn.Module):
    def __init__(self, encoder_n_convolutions,
        encoder_embedding_dim, encoder_kernel_size):
        super(Encoder, self).__init__()

        convolutions = []
        for _ in range(encoder_n_convolutions):
            conv_layer = nn.Sequential(
                nn.Conv1d(
                    encoder_embedding_dim,
                    encoder_embedding_dim,
                    kernel_size=encoder_kernel_size, 
                    stride=1,
                    padding=encoder_kernel_size//2,
                         dilation=1),
                nn.BatchNorm1d(encoder_embedding_dim))
            convolutions.append(conv_layer)
        self.convolutions = nn.ModuleList(convolutions)

        self.lstm = nn.LSTM(encoder_embedding_dim,
                            encoder_embedding_dim // 2, 1,
                            batch_first=True, bidirectional=True)

    def forward(self, x, input_lengths):
        # 假设输入为N×C×T
        for conv in self.convolutions:
            x = F.dropout(F.relu(conv(x)), 0.5, self.training)

        x = x.transpose(1, 2)

        input_lengths = input_lengths.cpu().numpy()
        x = nn.utils.rnn.pack_padded_sequence(
            x, input_lengths, batch_first=True)

        self.lstm.flatten_parameters()
        outputs, _ = self.lstm(x)

        outputs, _ = nn.utils.rnn.pad_packed_sequence(
            outputs, batch_first=True)
        return outputs

# Tacotron前处理/后处理代码
class Prenet(nn.Module):
    def __init__(self, in_dim, sizes):
        super(Prenet, self).__init__()
        in_sizes = [in_dim] + sizes[:-1]
        self.layers = nn.ModuleList(
            [nn.Linear(in_size, out_size, bias=False)
             for (in_size, out_size) in zip(in_sizes, sizes)])

    def forward(self, x):
        for linear in self.layers:
            x = F.dropout(F.relu(linear(x)), p=0.5, training=True)
        return x

class Postnet(nn.Module):

    def __init__(self, n_mel_channels, postnet_embedding_dim,
            postnet_kernel_size, postnet_n_convolutions):

        super(Postnet, self).__init__()
        self.convolutions = nn.ModuleList()

        self.convolutions.append(
            nn.Sequential(
                nn.Conv1d(n_mel_channels, postnet_embedding_dim,
                          kernel_size=postnet_kernel_size, stride=1,
                          padding=postnet_kernel_size // 2),
                          dilation=1),
                nn.BatchNorm1d(postnet_embedding_dim))
        )

        for i in range(1, postnet_n_convolutions - 1):
            self.convolutions.append(
                nn.Sequential(
                    nn.Comv1d(postnet_embedding_dim,
                              postnet_embedding_dim,
                              postnet_kernel_size, stride=1,
                              padding=postnet_kernel_size // 2,
                              dilation=1),
                    nn.BatchNorm1d(postnet_embedding_dim))
            )

        self.convolutions.append(
            nn.Sequential(
                ConvNorm(postnet_embedding_dim, n_mel_channels,
                         kernel_size=postnet_kernel_size, stride=1,
                         padding=postnet_kernel_size // 2,
                         dilation=1, w_init_gain='linear'),
                nn.BatchNorm1d(n_mel_channels))
            )

    def forward(self, x):
        for i in range(len(self.convolutions) - 1):
            x = F.dropout(torch.tanh(self.convolutions[i](x)), 
                0.5, self.training)
        x = F.dropout(self.convolutions[-1](x), 0.5, self.training)

        return x

# Tacotron注意力机制
class LocationLayer(nn.Module):
    def __init__(self, attention_n_filters, attention_kernel_size,
                 attention_dim):
        super(LocationLayer, self).__init__()
        padding = attention_kernel_size // 2
        self.location_conv = nn.Conv2d(2, attention_n_filters,
                                      kernel_size=attention_kernel_size,
                                      padding=padding, bias=False, stride=1,
                                      dilation=1)
        self.location_dense = nn.Linear(attention_n_filters, attention_dim,
                                        bias=False)

    def forward(self, attention_weights_cat):
        processed_attention = self.location_conv(attention_weights_cat)
        processed_attention = processed_attention.transpose(1, 2)
        processed_attention = self.location_dense(processed_attention)
        return processed_attention

class Attention(nn.Module):
    def __init__(self, attention_rnn_dim, embedding_dim, attention_dim,
                 attention_location_n_filters, 
                 attention_location_kernel_size):

        super(Attention, self).__init__()
        self.query_layer = nn.Linear(attention_rnn_dim,
                                     attention_dim,bias=False)
        self.memory_layer = nn.Linear(embedding_dim, 
                                      attention_dim, bias=False)

        self.v = nn.Linear(attention_dim, 1, bias=False)
        self.location_layer = LocationLayer(attention_location_n_filters,
                                            attention_location_kernel_size,
                                            attention_dim)
        self.score_mask_value = -float("inf")

    def get_alignment_energies(self, query, processed_memory,
                               attention_weights_cat):

        processed_query = self.query_layer(query.unsqueeze(1))
        processed_attention_weights = self.location_layer(
                attention_weights_cat)
        energies = self.v(torch.tanh(
            processed_query + processed_attention_weights + \
            processed_memory))

        energies = energies.squeeze(-1)
        return energies

    def forward(self, attention_hidden_state, memory, processed_memory,
                attention_weights_cat, mask):

        alignment = self.get_alignment_energies(
            attention_hidden_state, processed_memory, 
            attention_weights_cat)

        if mask is not None:
            alignment.data.masked_fill_(mask, self.score_mask_value)

        attention_weights = F.softmax(alignment, dim=1)
        attention_context = torch.bmm(attention_weights.unsqueeze(1), 
                                      memory)
        attention_context = attention_context.squeeze(1)

        return attention_context, attention_weights

# Tacotron解码器
class Decoder(nn.Module):
    def __init__(self, n_mel_channels, n_frames_per_step,
        encoder_embedding_dim, attention_rnn_dim,
        decoder_rnn_dim, prenet_dim, max_decoder_steps,
        gate_threshold, p_attention_dropout,
        attention_dim, attention_location_n_filters,
        attention_location_kernel_size, p_decoder_dropout):

        super(Decoder, self).__init__()

        # 将输入参数保存到类的属性中
        # ... （此处省略保存输入参数的代码）
        self.prenet = Prenet(
            n_mel_channels * n_frames_per_step,
            [prenet_dim, prenet_dim])

        self.attention_rnn = nn.LSTMCell(
            prenet_dim + encoder_embedding_dim,
            attention_rnn_dim)

        self.attention_layer = Attention(
            attention_rnn_dim, encoder_embedding_dim,
            attention_dim, attention_location_n_filters,
            attention_location_kernel_size)

        self.decoder_rnn = nn.LSTMCell(
            attention_rnn_dim + encoder_embedding_dim,
            decoder_rnn_dim, 1)

        self.linear_projection = nn.Linear(
            decoder_rnn_dim + encoder_embedding_dim,
            n_mel_channels * n_frames_per_step)

        self.gate_layer = nn.Linear(
            decoder_rnn_dim + encoder_embedding_dim, 1,
            bias=True)

    def decode(self, decoder_input):
        # 输入解码器的梅尔过滤器特征，进行注意力机制的计算和循环神经网络计算
        # 输出解码结果，即是否终止的预测和注意力的权重
        cell_input = torch.cat((decoder_input, self.attention_context), -1)
        self.attention_hidden, self.attention_cell = self.attention_rnn(
            cell_input, (self.attention_hidden, self.attention_cell))
        self.attention_hidden = F.dropout(
            self.attention_hidden, self.p_attention_dropout, self.training)

        attention_weights_cat = torch.cat(
            (self.attention_weights.unsqueeze(1),
             self.attention_weights_cum.unsqueeze(1)), dim=1)
        self.attention_context, self.attention_weights = \
            self.attention_layer(self.attention_hidden,
            self.memory, self.processed_memory,
            attention_weights_cat, self.mask)

        self.attention_weights_cum += self.attention_weights
        decoder_input = torch.cat(
            (self.attention_hidden, self.attention_context), -1)
        self.decoder_hidden, self.decoder_cell = self.decoder_rnn(
            decoder_input, (self.decoder_hidden, self.decoder_cell))
        self.decoder_hidden = F.dropout(
            self.decoder_hidden, self.p_decoder_dropout, self.training)

        decoder_hidden_attention_context = torch.cat(
            (self.decoder_hidden, self.attention_context), dim=1)
        decoder_output = self.linear_projection(
            decoder_hidden_attention_context)

        gate_prediction = self.gate_layer(decoder_hidden_attention_context)
        return decoder_output, gate_prediction, self.attention_weights


In [None]:
""" 以下代码仅作为WaveNet的实现参考
"""

import torch
import torch.nn as nn

# 因果卷积模块
class CausalConv(nn.Module):

    def __init__(self, residual_channels, gate_channels, kernel_size,
                 local_channels, dropout=0.05, dilation=1, bias=True):

        super(CausalConv, self).__init__()
        self.dropout = dropout

        padding = (kernel_size - 1) * dilation
        self.conv = nn.Conv1d(residual_channels, gate_channels, 
                              kernel_size, padding=padding,
                              dilation=dilation, bias=bias)

        self.conv1x1_local = Conv1d1x1(local_channels,
                                       gate_channels, bias=False)
        gate_out_channels = gate_channels // 2
        self.conv1x1_out = Conv1d1x1(gate_out_channels, 
                                     residual_channels, bias=bias)
        self.conv1x1_skip = Conv1d1x1(gate_out_channels,
                                      residual_channels, bias=bias)

    def forward(self, x, x_local):

        # x为音频信号，x_local为梅尔过滤器特征上采样到和x维度相同后的结果
        # 假设输入x的大小为N×C×T，其中N为批次大小，C为输入特征大小，
        # T为序列长度
        # x_local大小和x大小相同

        residual = x
        x = F.dropout(x, p=self.dropout, training=self.training)

        # 因果卷积
        x = self.conv(x)
        x = x[:, :, :residual.size(-1)]

        # 因果卷积结果分割
        a, b = x.split(x.size(-1) // 2, dim=-1)
        # 加入局域特征的调制
        c = self.conv1x1_local(x_local)
        ca, cb = c.split(c.size(-1) // 2, dim=-1)
        a, b = a + ca, b + cb

        x = torch.tanh(a) * torch.sigmoid(b)

        s = self.conv1x1_skip(x)
        x = self.conv1x1_out(x)

        x = (x + residual) * math.sqrt(0.5)
        return x, s

# WaveNet模型代码
class WaveNet(nn.Module):

    def __init__(self, out_channels=256, layers=20,
                 layers_per_stack = 2,
                 residual_channels=512,
                 gate_channels=512,
                 mel_channels = 80,
                 mel_kernel = 1024,
                 mel_stride = 256,
                 skip_out_channels=512,
                 kernel_size=3, dropout= 0.05,
                 local_channels=512):

        super(WaveNet, self).__init__()

        self.out_channels = out_channels
        self.local_channels = local_channels
        self.first_conv = nn.Conv1d(out_channels, residual_channels, 1)

        self.conv_layers = nn.ModuleList()
        for layer in range(layers):
            dilation = 2**(layer % layers_per_stack)
            conv = CausalConv(residual_channels, gate_channels, kernel_size,
                              local_channels, dropout, dilation, True)
            self.conv_layers.append(conv)
        self.last_conv_layers = nn.ModuleList([
            nn.ReLU(inplace=True),
            nn.Conv1d(skip_out_channels, skip_out_channels, 1),
            nn.ReLU(inplace=True),
            nn.Conv1d(skip_out_channels, out_channels, 1),
        ])

        self.upsample_net = nn.ConvTranspose1d(mel_channels, gate_channels, 
                                               mel_kernel, mel_stride)

    def forward(self, x, x_local):

        # x为音频信号，x_local为梅尔过滤器特征
        B, _, T = x.size()
        # 对特征进行上采样，输出和音频信号长度相同的信号
        c = self.upsample_net(x_local)
        x = self.first_conv(x)
        skips = 0
        for f in self.conv_layers:
            x, h = f(x, c, g_bct)
            skips += h
        skips *= math.sqrt(1.0 / len(self.conv_layers))

        x = skips
        for f in self.last_conv_layers:
            x = f(x)

        # 输出每个强度的概率
        x = F.softmax(x, dim=1)
        return x


In [None]:
""" 本代码仅作为Wide&Deep模型的实现参考
"""

import torch
import torch.nn as nn

class WideDeep(nn.Module):
    def __init__(self, num_wide_feat, deep_feat_sizes, 
        deep_feat_dims, nhiddens):

        super(WideDeep, self).__init__()

        self.num_wide_feat = num_wide_feat
        self.deep_feat_sizes = deep_feat_sizes
        self.deep_feat_dims = deep_feat_dims
        self.nhiddens = nhiddens

        # 深模型的嵌入部分
        self.embeds = nn.ModuleList()
        for deep_feat_size, deep_feat_dim in \
            zip(deep_feat_sizes, deep_feat_dims):
            self.embeds.append(nn.Embedding(deep_feat_size, 
                deep_feat_dim))

        self.deep_input_size = sum(deep_feat_dims)

        # 深模型的线性部分 
        self.linears = nn.ModuleList()
        in_size = self.deep_input_size
        for out_size in nhiddens:
            self.linears.append(nn.Linear(in_size, out_size))
            in_size = out_size

        # 宽模型和深模型共同的线性部分 
        self.proj = nn.Linear(in_size + num_wide_feat, 1)

    def forward(self, wide_input, deep_input):
        
        # 假设宽模型的输入为N×W，N为迷你批次的大小，W为宽特征的大小
        # 假设深模型的输入为N×D，N为迷你批次的大小，D为深特征的数目
        embed_feats = []
        for i in range(deep_input.size(1)):
            embed_feats.append(self.embeds[i](deep_input[:, i]))
        deep_feats = torch.cat(embed_feats, 1)
        
        # 深模型特征变换
        for layer in self.linears:
            deep_feats = layer(deep_feats)
            deep_feats = torch.relu(deep_feats)
        print(wide_input.shape, deep_feats.shape)

        # 宽模型和深模型特征拼接
        wide_deep_feats = torch.cat([wide_input, deep_feats], -1)
        return torch.sigmoid(self.proj(wide_deep_feats)).squeeze()


In [None]:
import torch
# 静态模型的保存和载入
from torchvision.models import resnet18
m = resnet18(pretrained=True)
# 将模型从动态图转换为静态图
static_model = torch.jit.trace(m, torch.randn(1, 3, 224, 224))
# 保存模型
torch.jit.save(static_model, "resnet18.pt")
# 读取模型
static_model = torch.load("resnet18.pt")

# 导出到ONNX
from torchvision.models import resnet18
# 需要使用pip install onnx安装onnx的Python接口
import onnx
m = resnet18(pretrained=True)
torch.onnx.export(m, torch.randn(1, 3, 224, 224), 
                  "resnet18.onnx", verbose=True)
# 用onnx读入模型
m = onnx.load("resnet18.onnx")
# 检查模型正确性
onnx.checker.check_model(m)
# 打印计算图
onnx.helper.printable_graph(m.graph)


C语言代码

In [None]:
#include <iostream>
#include <torch/torch.h>
#include <torch/script.h>

int main() {
    auto mod = torch::jit::load("resnet18.pt");
    std::vector<torch::jit::IValue> inputs;
    inputs.push_back(torch::randn({1, 3, 224, 224}));
    std::cout<<mod.forward(inputs).toTensor().argmax(1)<<std::endl;
    return 0;
}


In [None]:
# 静态加载
import torch
import gelu

# 同样可以通过 gelu = GELU.apply使用这个激活函数
class GELU(torch.autograd.Function):

    @staticmethod
    def forward(ctx, input):
        ctx.input = input
        return gelu.forward(input)
    
    @staticmethod
    def backward(ctx, grad_output):
        input = ctx.input
        return gelu.backward(grad_output, input)

# 动态加载
import torch
from torch.utils.cpp_extension import load

# PyTorch会进行自动编译，生成对应的模块
gelu = load(name="gelu", sources=["gelu/gelu.cc"])

class GELU(torch.autograd.Function):

    @staticmethod
    def forward(ctx, input):
        ctx.input = input
        return gelu.forward(input)
    
    @staticmethod
    def backward(ctx, grad_output):
        input = ctx.input
        return gelu.backward(grad_output, input)


In [None]:
""" 本代码仅作为钩子函数的演示代码
"""

# 模块执行之前的前向计算钩子的定义
# 定义nn.Module的一个实例模块
module = ...
def hook(module, input):
    # 对模块权重或者输入进行操作的代码
    # 函数结果可以返回修改后的张量或者None
    return input
handle = module.register_forward_pre_hook(hook)

# 模块执行之后的前向计算钩子的定义
# 定义nn.Module的一个实例模块
module = ...
def hook(module, input, output):
    # 对模块权重或者输入/输出进行操作的代码
    # 函数结果可以返回修改后的张量或者None
    return output
handle = module.register_forward_hook(hook)

# 模块执行之后的反向传播钩子的定义
# 定义nn.Module的一个实例模块
module = ...
def hook(module, grad_input, grad_output):
    # 对模块权重或者输入/输出梯度进行操作的代码
    # 函数结果可以返回修改后的张量或者None
    return output
handle = module.register_backward_hook(hook)

# 钩子的使用方法示例
import torch
import torch.nn as nn
def print_pre_shape(module, input):
    print("模块前钩子")
    print(module.weight.shape)
    print(input[0].shape)
def print_post_shape(module, input, output):
    print("模块后钩子")
    print(module.weight.shape)
    print(input[0].shape)
    print(output[0].shape)
def print_grad_shape(module, grad_input, grad_output):
    print("梯度钩子")
    print(module.weight.grad.shape)
    print(grad_input[0].shape)
    print(grad_output[0].shape)
conv = nn.Conv2d(16, 32, kernel_size=(3,3))
handle1 = conv.register_forward_pre_hook(print_pre_shape)
handle2 = conv.register_forward_hook(print_post_shape)
handle3 = conv.register_backward_hook(print_grad_shape)
input = torch.randn(4, 16, 128, 128, requires_grad=True)
ret = conv(input)

In [None]:
""" 本代码仅供参考
"""

# jit.trace函数的签名
torch.jit.trace(func, example_inputs, optimize=None, 
                check_trace=True, check_inputs=None, check_tolerance=1e-5)

def func(a):
    return a.pow(2) + 1

class Mod(nn.Module):
    def __init__(self):
        super(Mod, self).__init__()

    def forward(self, a):
        return a.pow(2) + 1

ret = torch.jit.trace(func, torch.randn(3,3))
print(ret.graph)
# 打印出的值：
# graph(%a : Float(3, 3)):
#   %1 : int = prim::Constant[value=2]() 
#   %2 : Float(3, 3) = aten::pow(%a, %1)
#   %3 : Long() = prim::Constant[value={1}]()
#   %4 : int = prim::Constant[value=1]() 
#   %5 : Float(3, 3) = aten::add(%2, %3, %4) 
#  return (%5)
m = Mod()
ret = torch.jit.trace(m, torch.randn(3,3))
print(ret.graph)
# 打印出的值：
# graph(%self : ClassType<Mod>,
#       %a : Float(3, 3)):
#   %2 : int = prim::Constant[value=2](), scope: Mod #
#   %3 : Float(3, 3) = aten::pow(%a, %2), scope: Mod #
#   %4 : Long() = prim::Constant[value={1}](), scope: Mod
#   %5 : int = prim::Constant[value=1](), scope: Mod
#   %6 : Float(3, 3) = aten::add(%3, %4, %5), scope: Mod
#   return (%6)

# jit.trace_module函数的签名
torch.jit.trace_module(mod, inputs, optimize=None, check_trace=True,
                       check_inputs=None, check_tolerance=1e-5)

class Mod(nn.Module):
    def __init__(self):
        super(Mod, self).__init__()

    def forward(self, a):
        return a.pow(2) + 1
    
    def square(self, a):
        return a.pow(2)

trace_input = {"forward": torch.randn(3,3), "square": torch.randn(3,3)}
m = Mod()
ret = torch.jit.trace_module(m, trace_input)
print(ret.forward.graph) # 和前面的torch.jit.trace函数输出的结果相同
print(ret.square.graph)
# 打印出的值：
# graph(%self : ClassType<Mod>,
#       %a : Float(3, 3)):
#  %2 : int = prim::Constant[value=2]() #  %3 : Float(3, 3) = aten::pow(%a, %2)
#  return (%3)

# 使用torch.jit.script方法进行修饰
# 也可以使用 @torch.jit.script 对函数进行装饰
def func(a):
    if a.norm() > 1.0:
        return a.abs()
    else:
        return a.pow(2)

ret = torch.jit.script(func)
print(ret.graph)
# 打印出的值：
# graph(%a.1 : Tensor):
#   %4 : float = prim::Constant[value=1]()
#   %10 : int = prim::Constant[value=2]()
#   %3 : Tensor = aten::norm(%a.1, %10)
#   %5 : Tensor = aten::gt(%3, %4)
#   %6 : bool = aten::Bool(%5)
#   %18 : Tensor = prim::If(%6)
#     block0():
#       %8 : Tensor = aten::abs(%a.1)
#       -> (%8)
#     block1():
#       %11 : Tensor = aten::pow(%a.1, %10) # -> (%11)
#   return (%18)

class Mod(nn.Module):
    def __init__(self):
        super(Mod, self).__init__()

    # 默认行为: torch.jit.export
    def forward(self, a):
        if a.norm() > 1.0:
            return a.abs()
        else:
            return a.pow(2)

    # 导出该方法
    @torch.jit.export
    def square(self, a):
        return a.pow(2)

    # 不导出该方法
    @torch.jit.ignore
    def abs(self, a):
        return a.abs()

mod = Mod()
ret = torch.jit.script(mod)

