# Torch to ONNX

### Tacotron2

In [1]:
# Convert from pt to ONNX
!mkdir -p outputs
!python ./exports/convert_tacotron2_to_onnx.py --tacotron2 checkpoints/nvidia_tacotron2pyt_fp16.pt --output outputs/ --fp16

  torch._C._jit_pass_onnx_node_shape_type_inference(
  torch._C._jit_pass_onnx_node_shape_type_inference(
  torch._C._jit_pass_onnx_node_shape_type_inference(
  _C._jit_pass_onnx_graph_shape_type_inference(
  _C._jit_pass_onnx_graph_shape_type_inference(


### WaveGlow

In [2]:
# Convert from pt to ONNX
!python ./exports/convert_waveglow_to_onnx.py --waveglow checkpoints/nvidia_waveglow256pyt_fp16.pt --config-file waveglow/waveglow_config.json --wn-channels 256 --output outputs/ --fp16

  n_half = int(audio.size(1) // 2)


In [3]:
# Check ONNX model
!polygraphy inspect model outputs/waveglow.onnx

[I] Loading model: /workspace/tensorRT/pwavegan/outputs/waveglow.onnx
[I] ==== ONNX Model ====
    Name: torch_jit | Opset: 12
    
    ---- 2 Graph Input(s) ----
    {mel [dtype=float16, shape=('batch_size', 80, 'mel_seq')],
     z [dtype=float16, shape=('batch_size', 8, 'z_seq')]}
    
    ---- 1 Graph Output(s) ----
    {audio [dtype=float16, shape=('batch_size', 'audio_seq')]}
    
    ---- 629 Initializer(s) ----
    
    ---- 3038 Node(s) ----


### ParallelWaveGan

In [4]:
# Get Generator
import torch
import yaml
from parallel_wavegan.models import ParallelWaveGANGenerator

pwg_ckpt=torch.load("checkpoints/ljspeech_parallel_wavegan.v1.long/checkpoint-1000000steps.pkl")
state_dict = pwg_ckpt['model']['generator']
with open("checkpoints/ljspeech_parallel_wavegan.v1.long/config.yml") as f:
    cfg = yaml.safe_load(f)
generator_cfg = cfg['generator_params']

generator = ParallelWaveGANGenerator(**generator_cfg)
generator.load_state_dict(state_dict)
generator.cuda().eval()

upsample_factor=256
sample_c = torch.randn(1,80,380)
sample_x = torch.randn(1, 1, sample_c.shape[2] * upsample_factor).cuda()
sample_c = torch.nn.ReplicationPad1d(2)(sample_c).cuda()

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Export to ONNX
import os
with torch.no_grad():
    opset_version = 12
    output_path = "./outputs/parallelwavegan.onnx"
    torch.onnx.export(generator.half(), (sample_x.half(),sample_c.half()), output_path,
                     opset_version = opset_version,
                     do_constant_folding=True,
                     input_names=["x", "c"],
                     output_names=["audio"],
                     dynamic_axes={"x":{2:"x_seq"},
                                   "c":{2:"c_seq"},
                                   "audio":{2:"audio_seq"}}
                     )

  assert c.size(-1) == x.size(-1)


In [6]:
# Check ONNX model
!polygraphy inspect model outputs/parallelwavegan.onnx

[I] Loading model: /workspace/tensorRT/pwavegan/outputs/parallelwavegan.onnx
[I] ==== ONNX Model ====
    Name: torch_jit | Opset: 12
    
    ---- 2 Graph Input(s) ----
    {x [dtype=float16, shape=(1, 1, 'x_seq')],
     c [dtype=float16, shape=(1, 80, 'c_seq')]}
    
    ---- 1 Graph Output(s) ----
    {audio [dtype=float16, shape=(1, 1, 'audio_seq')]}
    
    ---- 191 Initializer(s) ----
    
    ---- 501 Node(s) ----


In [7]:
# Do some constant-folding
!polygraphy surgeon sanitize outputs/parallelwavegan.onnx --fold-constants -o outputs/parallelwavegan.onnx

[I] Loading model: /tmp/tmp_polygraphy_a0a80f8714047f4b38701f2b0e68bc9ea659a026286e28bc.onnx
[I] Original Model:
    Name: torch_jit | Opset: 12
    
    ---- 2 Graph Input(s) ----
    {x [dtype=float16, shape=(1, 1, 'x_seq')],
     c [dtype=float16, shape=(1, 80, 'c_seq')]}
    
    ---- 1 Graph Output(s) ----
    {audio [dtype=float16, shape=(1, 1, 'audio_seq')]}
    
    ---- 191 Initializer(s) ----
    
    ---- 501 Node(s) ----
    
[38;5;14m[I] Folding Constants | Pass 1[0m
[38;5;10m[I]     Total Nodes | Original:   501, After Folding:   434 |    67 Nodes Folded[0m
[38;5;14m[I] Folding Constants | Pass 2[0m
[38;5;10m[I]     Total Nodes | Original:   434, After Folding:   434 |     0 Nodes Folded[0m
[I] Saving ONNX model to: outputs/parallelwavegan.onnx
[I] New Model:
    Name: torch_jit | Opset: 12
    
    ---- 2 Graph Input(s) ----
    {x [dtype=float16, shape=(1, 1, 'x_seq')],
     c [dtype=float16, shape=(1, 80, 'c_seq')]}
    
    ---- 1 Graph Output(s) ----
    {aud

# ONNX to TRT

### Tacotron2

In [8]:
# Convert from onnx to TRT
!python ./exports/convert_onnx_to_trt.py --encoder outputs/encoder.onnx --decoder outputs/decoder_iter.onnx --postnet outputs/postnet.onnx -o outputs/ --fp16

Building Encoder ...
[10/18/2022-00:14:35] [TRT] [W] onnx2trt_utils.cpp:369: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
[10/18/2022-00:14:36] [TRT] [W] TensorRT was linked against cuDNN 8.4.1 but loaded cuDNN 8.4.0
[10/18/2022-00:14:39] [TRT] [W] Weights [name=tacotron2.embedding.weight] had the following issues when converted to FP16:
[10/18/2022-00:14:39] [TRT] [W]  - Subnormal FP16 values detected. 
[10/18/2022-00:14:39] [TRT] [W] If this is not the desired behavior, please modify the weights or retrain with regularization to reduce the magnitude of the weights.
[10/18/2022-00:14:52] [TRT] [W] Weights [name=onnx::LSTM_300] had the following issues when converted to FP16:
[10/18/2022-00:14:52] [TRT] [W]  - Subnormal FP16 values detected. 
[10/18/2022-00:14:52] [TRT] [W] If this is not the desired behavior, please modify the weights or retrain with regularization to reduce the magnitude of th

### WaveGlow

sigma value is fixed as 0.6 at infer_onnx function for onnx conversion

In [9]:
# Convert from onnx to TRT
!python ./exports/convert_onnx_to_trt.py --waveglow outputs/waveglow.onnx -o outputs/ --fp16

Building WaveGlow ...
[10/18/2022-00:16:07] [TRT] [W] onnx2trt_utils.cpp:369: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
[10/18/2022-00:16:07] [TRT] [W] onnx2trt_utils.cpp:395: One or more weights outside the range of INT32 was clamped
[10/18/2022-00:16:07] [TRT] [W] onnx2trt_utils.cpp:395: One or more weights outside the range of INT32 was clamped
[10/18/2022-00:16:07] [TRT] [W] onnx2trt_utils.cpp:395: One or more weights outside the range of INT32 was clamped
[10/18/2022-00:16:07] [TRT] [W] onnx2trt_utils.cpp:395: One or more weights outside the range of INT32 was clamped
[10/18/2022-00:16:07] [TRT] [W] onnx2trt_utils.cpp:395: One or more weights outside the range of INT32 was clamped
[10/18/2022-00:16:07] [TRT] [W] onnx2trt_utils.cpp:395: One or more weights outside the range of INT32 was clamped
[10/18/2022-00:16:07] [TRT] [W] onnx2trt_utils.cpp:395: One or more weights outside the range o

### ParallelWaveGan

In [11]:
# Convert from onnx to TRT
!python ./exports/convert_onnx_to_trt.py  --parallelwavegan outputs/parallelwavegan.onnx -o outputs/ --fp16

Building ParallelWaveGan ...
[10/18/2022-01:52:23] [TRT] [W] onnx2trt_utils.cpp:369: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
[10/18/2022-01:52:39] [TRT] [W] TensorRT was linked against cuDNN 8.4.1 but loaded cuDNN 8.4.0
[10/18/2022-01:55:35] [TRT] [W] Min value of this profile is not valid
[10/18/2022-01:55:37] [TRT] [W] TensorRT was linked against cuDNN 8.4.1 but loaded cuDNN 8.4.0
[10/18/2022-01:55:37] [TRT] [W] The getMaxBatchSize() function should not be used with an engine built from a network created with NetworkDefinitionCreationFlag::kEXPLICIT_BATCH flag. This function will always return 1.
[10/18/2022-01:55:37] [TRT] [W] The getMaxBatchSize() function should not be used with an engine built from a network created with NetworkDefinitionCreationFlag::kEXPLICIT_BATCH flag. This function will always return 1.
