## Sample Text

In [None]:
!cat phrases/phrase.txt

# Tacotron2 + Waveglow

### PyTorch

In [None]:
!rm -rf __pycache__
!python inference.py -i phrases/phrase.txt --tacotron2 "checkpoints/nvidia_tacotron2pyt_fp16.pt" --waveglow "checkpoints/nvidia_waveglow256pyt_fp16.pt" --wn-channels 256 --sigma-infer 0.6 -o outputs/ --include-warmup --fp16

In [None]:
from IPython.display import Audio
display(Audio("outputs/waveglow_audio_0.wav"))

### TensorRT

In [None]:
!rm -rf __pycache__
!python inference_trt.py -i phrases/phrase.txt --encoder outputs/encoder_fp16.engine --decoder outputs/decoder_iter_fp16.engine --postnet outputs/postnet_fp16.engine --waveglow outputs/waveglow_fp16.engine -o outputs/ --include-warmup --fp16

In [None]:
from IPython.display import Audio
display(Audio("outputs/waveglow_audio_0_trt.wav"))

### Latency Comparison

Run ten times per each inference case, and then average the latencies

In [None]:
torch_latency_all = []
trt_latency_all = []
for i in range(10):
    !rm -rf __pycache__
    !python inference.py -i phrases/phrase.txt --tacotron2 "checkpoints/nvidia_tacotron2pyt_fp16.pt" --waveglow "checkpoints/nvidia_waveglow256pyt_fp16.pt" --wn-channels 256 --sigma-infer 0.6 -o outputs/ --include-warmup --fp16
    latency = !cat logs/nvlog.json | tail -1 | awk '{print $(NF)}' | sed 's/}//g'
    torch_latency_all = torch_latency_all + latency    
    
    !rm -rf __pycache__
    !python inference_trt.py -i phrases/phrase.txt --encoder outputs/encoder_fp16.engine --decoder outputs/decoder_iter_fp16.engine --postnet outputs/postnet_fp16.engine --waveglow outputs/waveglow_fp16.engine -o outputs/ --include-warmup --fp16
    latency = !cat logs/nvlog.json | tail -1 | awk '{print $(NF)}' | sed 's/}//g'
    trt_latency_all = trt_latency_all + latency
     

In [None]:
import numpy as np
torch_latency_all = [float(l) for l in torch_latency_all]
torch_latency_mean = np.array(torch_latency_all).mean()
torch_latency_std = np.array(torch_latency_all).std()
print("PyTorch")
print("avg latency:", torch_latency_mean)
print("latency std:", torch_latency_std)
print("")
trt_latency_all = [float(l) for l in trt_latency_all]
trt_latency_mean = np.array(trt_latency_all).mean()
trt_latency_std = np.array(trt_latency_all).std()
print("TensorRT")
print("avg latency:", trt_latency_mean)
print("latency std:", trt_latency_std)

# Tacotron2 + ParallelWaveGan

Some noises in the synthesis output due to the mel basis differnece between Tacotron2 and Parallelwavegan pretrained models.

### PyTorch

In [None]:
!rm -rf __pycache__
!python inference.py -i phrases/phrase.txt --tacotron2 "checkpoints/nvidia_tacotron2pyt_fp16.pt" --parallelwavegan "checkpoints/ljspeech_parallel_wavegan.v1.long/checkpoint-1000000steps.pkl" -o outputs/ --include-warmup --fp16

In [None]:
from IPython.display import Audio
display(Audio("outputs/parallelwavegan_audio_0.wav"))

### TensorRT

In [None]:
!rm -rf __pycache__
!python inference_trt.py -i phrases/phrase.txt --encoder outputs/encoder_fp16.engine --decoder outputs/decoder_iter_fp16.engine --postnet outputs/postnet_fp16.engine --parallelwavegan outputs/parallelwavegan_fp16.engine -o outputs/ --include-warmup --fp16

In [None]:
from IPython.display import Audio
display(Audio("outputs/parallelwavegan_audio_0_trt.wav"))

### Latency Comparison

Run ten times per each inference case, and average the latencies

In [None]:
pwg_torch_latency_all = []
pwg_trt_latency_all = []
for i in range(10):
    !rm -rf __pycache__
    !python inference.py -i phrases/phrase.txt --tacotron2 "checkpoints/nvidia_tacotron2pyt_fp16.pt" --parallelwavegan "checkpoints/ljspeech_parallel_wavegan.v1.long/checkpoint-1000000steps.pkl" -o outputs/ --include-warmup --fp16
    latency = !cat logs/nvlog.json | tail -1 | awk '{print $(NF)}' | sed 's/}//g'
    pwg_torch_latency_all = pwg_torch_latency_all + latency    
    
    !rm -rf __pycache__
    !python inference_trt.py -i phrases/phrase.txt --encoder outputs/encoder_fp16.engine --decoder outputs/decoder_iter_fp16.engine --postnet outputs/postnet_fp16.engine --parallelwavegan outputs/parallelwavegan_fp16.engine -o outputs/ --include-warmup --fp16
    latency = !cat logs/nvlog.json | tail -1 | awk '{print $(NF)}' | sed 's/}//g'
    pwg_trt_latency_all = pwg_trt_latency_all + latency
     

In [None]:
import numpy as np
pwg_torch_latency_all = [float(l) for l in pwg_torch_latency_all]
pwg_torch_latency_mean = np.array(pwg_torch_latency_all).mean()
pwg_torch_latency_std = np.array(pwg_torch_latency_all).std()
print("PyTorch")
print("avg latency:", pwg_torch_latency_mean)
print("latency std:", pwg_torch_latency_std)
print("")
pwg_trt_latency_all = [float(l) for l in pwg_trt_latency_all]
pwg_trt_latency_mean = np.array(pwg_trt_latency_all).mean()
pwg_trt_latency_std = np.array(pwg_trt_latency_all).std()
print("TensorRT")
print("avg latency:", pwg_trt_latency_mean)
print("latency std:", pwg_trt_latency_std)