docs/_notebooks/Hugging-Face-BERT.html



<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  <meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />

  <meta name="viewport" content="width=device-width, initial-scale=1.0">

  <title>Masked Language Modeling (MLM) with Hugging Face BERT Transformer &mdash; Torch-TensorRT v1.3.0 documentation</title>


  <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
  <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../_static/collapsible-lists/css/tree_view.css" type="text/css" />
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
  <!-- Google Analytics -->

  <!-- End Google Analytics -->


  <script src="../_static/js/modernizr.min.js"></script>

  <!-- Preload the theme fonts -->

<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-book.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-medium.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/IBMPlexMono/IBMPlexMono-Medium.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-bold.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-medium-italic.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/IBMPlexMono/IBMPlexMono-SemiBold.woff2" as="font" type="font/woff2" crossorigin="anonymous">

<!-- Preload the katex fonts -->

<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Math-Italic.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Main-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Main-Bold.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size1-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size4-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size2-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size3-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Caligraphic-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
  <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.2/css/all.css" integrity="sha384-vSIIfh2YWi9wW0r9iZe7RJPrKwp6bG+s9QZMoITbCckVJqGCCRhc+ccxNcdpHuYu" crossorigin="anonymous">
</head>

<div class="container-fluid header-holder tutorials-header" id="header-holder">
  <div class="container">
    <div class="header-container">
      <a class="header-logo" href="https://pytorch.org/" aria-label="PyTorch"></a>

      <div class="main-menu">
        <ul>
          <li>
            <a href="https://pytorch.org/get-started">Get Started</a>
          </li>

          <li>
            <a href="https://pytorch.org/ecosystem">Ecosystem</a>
          </li>

          <li>
            <a href="https://pytorch.org/mobile">Mobile</a>
          </li>

          <li>
            <a href="https://pytorch.org/blog/">Blog</a>
          </li>

          <li>
            <a href="https://pytorch.org/tutorials">Tutorials</a>
          </li>

          <li>
            <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="resource-option with-down-orange-arrow">
                Docs
              </a>
              <div class="resources-dropdown-menu">
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/docs/stable/index.html">
                  <span class="dropdown-title">PyTorch</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/audio/stable/index.html">
                  <span class="dropdown-title">torchaudio</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/text/stable/index.html">
                  <span class="dropdown-title">torchtext</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/vision/stable/index.html">
                  <span class="dropdown-title">torchvision</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/torcharrow">
                  <span class="dropdown-title">torcharrow</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/data">
                  <span class="dropdown-title">TorchData</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/torchrec">
                  <span class="dropdown-title">TorchRec</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/serve/">
                  <span class="dropdown-title">TorchServe</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/torchx/">
                  <span class="dropdown-title">TorchX</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/xla">
                  <span class="dropdown-title">PyTorch on XLA Devices</span>
                  <p></p>
                </a>
            </div>
          </li>

          <li>
            <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="resource-option with-down-arrow">
                Resources
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/features">
                  <span class="dropdown-title">About</span>
                  <p>Learn about PyTorch’s features and capabilities</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/foundation">
                  <span class="dropdown-title">PyTorch Foundation</span>
                  <p>Learn about the PyTorch foundation</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/#community-module">
                  <span class="dropdown-title">Community</span>
                  <p>Join the PyTorch developer community to contribute, learn, and get your questions answered.</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/community-stories">
                  <span class="dropdown-title">Community Stories</span>
                  <p>Learn how our community solves real, everyday machine learning problems with PyTorch.</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/resources">
                  <span class="dropdown-title">Developer Resources</span>
                  <p>Find resources and get questions answered</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/events">
                  <span class="dropdown-title">Events</span>
                  <p>Find events, webinars, and podcasts</p>
                </a>
                <a class="nav-dropdown-item" href="https://discuss.pytorch.org/" target="_blank">
                  <span class="dropdown-title">Forums</span>
                  <p>A place to discuss PyTorch code, issues, install, research</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/hub">
                  <span class="dropdown-title">Models (Beta)</span>
                  <p>Discover, publish, and reuse pre-trained models</p>
                </a>
              </div>
            </div>
          </li>

          <li>
            <a href="https://github.com/pytorch/pytorch">GitHub</a>
          </li>
        </ul>
      </div>

      <a class="main-menu-open-button" href="#" data-behavior="open-mobile-menu"></a>
    </div>
  </div>
</div>

<body class="pytorch-body">


    <div class="table-of-contents-link-wrapper">
      <span>Table of Contents</span>
      <a href="#" class="toggle-table-of-contents" data-behavior="toggle-table-of-contents"></a>
    </div>

    <nav data-toggle="wy-nav-shift" class="pytorch-left-menu" id="pytorch-left-menu">
      <div class="pytorch-side-scroll">
        <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          <div class="pytorch-left-menu-search">


                <div class="version">
                  v1.3.0
                </div>


<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
    <input type="text" name="q" placeholder="Search Docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>


          </div>


              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../getting_started/installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../getting_started/getting_started_with_python_api.html">Using Torch-TensorRT in Python</a></li>
<li class="toctree-l1"><a class="reference internal" href="../getting_started/getting_started_with_cpp_api.html">Using Torch-TensorRT in  C++</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Tutorials</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/creating_torchscript_module_in_python.html">Creating a TorchScript Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/creating_torchscript_module_in_python.html#working-with-torchscript-in-python">Working with TorchScript in Python</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/creating_torchscript_module_in_python.html#saving-torchscript-module-to-disk">Saving TorchScript Module to Disk</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/getting_started_with_fx_path.html">Torch-TensorRT (FX Frontend) User Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/ptq.html">Post Training Quantization (PTQ)</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/runtime.html">Deploying Torch-TensorRT Programs</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/serving_torch_tensorrt_with_triton.html">Serving a Torch-TensorRT model with Triton</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/use_from_pytorch.html">Using Torch-TensorRT Directly From PyTorch</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/using_dla.html">DLA</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/notebooks.html">Example notebooks</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Python API Documenation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../py_api/torch_tensorrt.html">torch_tensorrt</a></li>
<li class="toctree-l1"><a class="reference internal" href="../py_api/logging.html">torch_tensorrt.logging</a></li>
<li class="toctree-l1"><a class="reference internal" href="../py_api/ptq.html">torch_tensorrt.ptq</a></li>
<li class="toctree-l1"><a class="reference internal" href="../py_api/ts.html">torch_tensorrt.ts</a></li>
<li class="toctree-l1"><a class="reference internal" href="../py_api/fx.html">torch_tensorrt.fx</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">C++ API Documenation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../_cpp_api/torch_tensort_cpp.html">Torch-TensorRT C++ API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../_cpp_api/namespace_torch_tensorrt.html">Namespace torch_tensorrt</a></li>
<li class="toctree-l1"><a class="reference internal" href="../_cpp_api/namespace_torch_tensorrt__logging.html">Namespace torch_tensorrt::logging</a></li>
<li class="toctree-l1"><a class="reference internal" href="../_cpp_api/namespace_torch_tensorrt__torchscript.html">Namespace torch_tensorrt::torchscript</a></li>
<li class="toctree-l1"><a class="reference internal" href="../_cpp_api/namespace_torch_tensorrt__ptq.html">Namespace torch_tensorrt::ptq</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">CLI Documenation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../cli/torchtrtc.html">torchtrtc</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Contributor Documentation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../contributors/system_overview.html">System Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contributors/writing_converters.html">Writing Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contributors/useful_links.html">Useful Links for Torch-TensorRT Development</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Indices</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../indices/supported_ops.html">Operators Supported</a></li>
</ul>


        </div>
      </div>
    </nav>

    <div class="pytorch-container">
      <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
        <div class="pytorch-breadcrumbs-wrapper">


<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="pytorch-breadcrumbs">

      <li>
        <a href="../index.html">

            Docs

        </a> &gt;
      </li>


      <li>Masked Language Modeling (MLM) with Hugging Face BERT Transformer</li>


      <li class="pytorch-breadcrumbs-aside">


            <a href="../_sources/_notebooks/Hugging-Face-BERT.ipynb.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>


      </li>

  </ul>


</div>
        </div>

        <div class="pytorch-shortcuts-wrapper" id="pytorch-shortcuts-wrapper">
          Shortcuts
        </div>
      </div>

      <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
        <div class="pytorch-content-left">


          <div class="rst-content">

            <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
             <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">


<style>
/* CSS for nbsphinx extension */

/* remove conflicting styling from Sphinx themes */
div.nbinput.container div.prompt *,
div.nboutput.container div.prompt *,
div.nbinput.container div.input_area pre,
div.nboutput.container div.output_area pre,
div.nbinput.container div.input_area .highlight,
div.nboutput.container div.output_area .highlight {
    border: none;
    padding: 0;
    margin: 0;
    box-shadow: none;
}

div.nbinput.container > div[class*=highlight],
div.nboutput.container > div[class*=highlight] {
    margin: 0;
}

div.nbinput.container div.prompt *,
div.nboutput.container div.prompt * {
    background: none;
}

div.nboutput.container div.output_area .highlight,
div.nboutput.container div.output_area pre {
    background: unset;
}

div.nboutput.container div.output_area div.highlight {
    color: unset;  /* override Pygments text color */
}

/* avoid gaps between output lines */
div.nboutput.container div[class*=highlight] pre {
    line-height: normal;
}

/* input/output containers */
div.nbinput.container,
div.nboutput.container {
    display: -webkit-flex;
    display: flex;
    align-items: flex-start;
    margin: 0;
    width: 100%;
}
@media (max-width: 540px) {
    div.nbinput.container,
    div.nboutput.container {
        flex-direction: column;
    }
}

/* input container */
div.nbinput.container {
    padding-top: 5px;
}

/* last container */
div.nblast.container {
    padding-bottom: 5px;
}

/* input prompt */
div.nbinput.container div.prompt pre {
    color: #307FC1;
}

/* output prompt */
div.nboutput.container div.prompt pre {
    color: #BF5B3D;
}

/* all prompts */
div.nbinput.container div.prompt,
div.nboutput.container div.prompt {
    width: 4.5ex;
    padding-top: 5px;
    position: relative;
    user-select: none;
}

div.nbinput.container div.prompt > div,
div.nboutput.container div.prompt > div {
    position: absolute;
    right: 0;
    margin-right: 0.3ex;
}

@media (max-width: 540px) {
    div.nbinput.container div.prompt,
    div.nboutput.container div.prompt {
        width: unset;
        text-align: left;
        padding: 0.4em;
    }
    div.nboutput.container div.prompt.empty {
        padding: 0;
    }

    div.nbinput.container div.prompt > div,
    div.nboutput.container div.prompt > div {
        position: unset;
    }
}

/* disable scrollbars on prompts */
div.nbinput.container div.prompt pre,
div.nboutput.container div.prompt pre {
    overflow: hidden;
}

/* input/output area */
div.nbinput.container div.input_area,
div.nboutput.container div.output_area {
    -webkit-flex: 1;
    flex: 1;
    overflow: auto;
}
@media (max-width: 540px) {
    div.nbinput.container div.input_area,
    div.nboutput.container div.output_area {
        width: 100%;
    }
}

/* input area */
div.nbinput.container div.input_area {
    border: 1px solid #e0e0e0;
    border-radius: 2px;
    /*background: #f5f5f5;*/
}

/* override MathJax center alignment in output cells */
div.nboutput.container div[class*=MathJax] {
    text-align: left !important;
}

/* override sphinx.ext.imgmath center alignment in output cells */
div.nboutput.container div.math p {
    text-align: left;
}

/* standard error */
div.nboutput.container div.output_area.stderr {
    background: #fdd;
}

/* ANSI colors */
.ansi-black-fg { color: #3E424D; }
.ansi-black-bg { background-color: #3E424D; }
.ansi-black-intense-fg { color: #282C36; }
.ansi-black-intense-bg { background-color: #282C36; }
.ansi-red-fg { color: #E75C58; }
.ansi-red-bg { background-color: #E75C58; }
.ansi-red-intense-fg { color: #B22B31; }
.ansi-red-intense-bg { background-color: #B22B31; }
.ansi-green-fg { color: #00A250; }
.ansi-green-bg { background-color: #00A250; }
.ansi-green-intense-fg { color: #007427; }
.ansi-green-intense-bg { background-color: #007427; }
.ansi-yellow-fg { color: #DDB62B; }
.ansi-yellow-bg { background-color: #DDB62B; }
.ansi-yellow-intense-fg { color: #B27D12; }
.ansi-yellow-intense-bg { background-color: #B27D12; }
.ansi-blue-fg { color: #208FFB; }
.ansi-blue-bg { background-color: #208FFB; }
.ansi-blue-intense-fg { color: #0065CA; }
.ansi-blue-intense-bg { background-color: #0065CA; }
.ansi-magenta-fg { color: #D160C4; }
.ansi-magenta-bg { background-color: #D160C4; }
.ansi-magenta-intense-fg { color: #A03196; }
.ansi-magenta-intense-bg { background-color: #A03196; }
.ansi-cyan-fg { color: #60C6C8; }
.ansi-cyan-bg { background-color: #60C6C8; }
.ansi-cyan-intense-fg { color: #258F8F; }
.ansi-cyan-intense-bg { background-color: #258F8F; }
.ansi-white-fg { color: #C5C1B4; }
.ansi-white-bg { background-color: #C5C1B4; }
.ansi-white-intense-fg { color: #A1A6B2; }
.ansi-white-intense-bg { background-color: #A1A6B2; }

.ansi-default-inverse-fg { color: #FFFFFF; }
.ansi-default-inverse-bg { background-color: #000000; }

.ansi-bold { font-weight: bold; }
.ansi-underline { text-decoration: underline; }


div.nbinput.container div.input_area div[class*=highlight] > pre,
div.nboutput.container div.output_area div[class*=highlight] > pre,
div.nboutput.container div.output_area div[class*=highlight].math,
div.nboutput.container div.output_area.rendered_html,
div.nboutput.container div.output_area > div.output_javascript,
div.nboutput.container div.output_area:not(.rendered_html) > img{
    padding: 5px;
    margin: 0;
}

/* fix copybtn overflow problem in chromium (needed for 'sphinx_copybutton') */
div.nbinput.container div.input_area > div[class^='highlight'],
div.nboutput.container div.output_area > div[class^='highlight']{
    overflow-y: hidden;
}

/* hide copybtn icon on prompts (needed for 'sphinx_copybutton') */
.prompt .copybtn {
    display: none;
}

/* Some additional styling taken form the Jupyter notebook CSS */
div.rendered_html table {
  border: none;
  border-collapse: collapse;
  border-spacing: 0;
  color: black;
  font-size: 12px;
  table-layout: fixed;
}
div.rendered_html thead {
  border-bottom: 1px solid black;
  vertical-align: bottom;
}
div.rendered_html tr,
div.rendered_html th,
div.rendered_html td {
  text-align: right;
  vertical-align: middle;
  padding: 0.5em 0.5em;
  line-height: normal;
  white-space: normal;
  max-width: none;
  border: none;
}
div.rendered_html th {
  font-weight: bold;
}
div.rendered_html tbody tr:nth-child(odd) {
  background: #f5f5f5;
}
div.rendered_html tbody tr:hover {
  background: rgba(66, 165, 245, 0.2);
}
</style>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[1]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># Copyright 2022 NVIDIA Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the &quot;License&quot;);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an &quot;AS IS&quot; BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
</pre></div>
</div>
</div>
<p><img alt="8895119d8f634fe5b8ca2b679a440289" src="https://developer.download.nvidia.com/tesla/notebook_assets/nv_logo_torch_trt_resnet_notebook.png" /></p>
<section id="Masked-Language-Modeling-(MLM)-with-Hugging-Face-BERT-Transformer">
<h1>Masked Language Modeling (MLM) with Hugging Face BERT Transformer<a class="headerlink" href="#Masked-Language-Modeling-(MLM)-with-Hugging-Face-BERT-Transformer" title="Permalink to this headline">¶</a></h1>
<section id="Learning-objectives">
<h2>Learning objectives<a class="headerlink" href="#Learning-objectives" title="Permalink to this headline">¶</a></h2>
<p>This notebook demonstrates the steps for compiling a TorchScript module with Torch-TensorRT on a pretrained BERT transformer from Hugging Face, and running it to test the speedup obtained.</p>
</section>
<section id="Contents">
<h2>Contents<a class="headerlink" href="#Contents" title="Permalink to this headline">¶</a></h2>
<ol class="arabic simple">
<li><p><a class="reference external" href="#1">Requirements</a></p></li>
<li><p><a class="reference external" href="#2">BERT Overview</a></p></li>
<li><p><a class="reference external" href="#3">Creating TorchScript modules</a></p></li>
<li><p><a class="reference external" href="#4">Compiling with Torch-TensorRT</a></p></li>
<li><p><a class="reference external" href="#5">Benchmarking</a></p></li>
<li><p><a class="reference external" href="#6">Conclusion</a></p></li>
</ol>
<p>## 1. Requirements</p>
<p>NVIDIA’s NGC provides a PyTorch Docker Container which contains PyTorch and Torch-TensorRT. Starting with version <code class="docutils literal notranslate"><span class="pre">22.05-py3</span></code>, we can make use of <a class="reference external" href="https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch">latest pytorch</a> container to run this notebook.</p>
<p>Otherwise, you can follow the steps in <code class="docutils literal notranslate"><span class="pre">notebooks/README</span></code> to prepare a Docker container yourself, within which you can run this demo notebook.</p>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[2]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>!pip install transformers
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Requirement already satisfied: transformers in /opt/conda/lib/python3.8/site-packages (4.18.0)
Requirement already satisfied: tqdm&gt;=4.27 in /opt/conda/lib/python3.8/site-packages (from transformers) (4.63.0)
Requirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.8/site-packages (from transformers) (2022.3.15)
Requirement already satisfied: huggingface-hub&lt;1.0,&gt;=0.1.0 in /opt/conda/lib/python3.8/site-packages (from transformers) (0.5.1)
Requirement already satisfied: tokenizers!=0.11.3,&lt;0.13,&gt;=0.11.1 in /opt/conda/lib/python3.8/site-packages (from transformers) (0.12.1)
Requirement already satisfied: numpy&gt;=1.17 in /opt/conda/lib/python3.8/site-packages (from transformers) (1.22.3)
Requirement already satisfied: sacremoses in /opt/conda/lib/python3.8/site-packages (from transformers) (0.0.49)
Requirement already satisfied: requests in /opt/conda/lib/python3.8/site-packages (from transformers) (2.27.1)
Requirement already satisfied: pyyaml&gt;=5.1 in /opt/conda/lib/python3.8/site-packages (from transformers) (6.0)
Requirement already satisfied: filelock in /opt/conda/lib/python3.8/site-packages (from transformers) (3.6.0)
Requirement already satisfied: packaging&gt;=20.0 in /opt/conda/lib/python3.8/site-packages (from transformers) (21.3)
Requirement already satisfied: typing-extensions&gt;=3.7.4.3 in /opt/conda/lib/python3.8/site-packages (from huggingface-hub&lt;1.0,&gt;=0.1.0-&gt;transformers) (4.1.1)
Requirement already satisfied: pyparsing!=3.0.5,&gt;=2.0.2 in /opt/conda/lib/python3.8/site-packages (from packaging&gt;=20.0-&gt;transformers) (3.0.7)
Requirement already satisfied: urllib3&lt;1.27,&gt;=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests-&gt;transformers) (1.26.8)
Requirement already satisfied: charset-normalizer~=2.0.0 in /opt/conda/lib/python3.8/site-packages (from requests-&gt;transformers) (2.0.12)
Requirement already satisfied: certifi&gt;=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests-&gt;transformers) (2021.10.8)
Requirement already satisfied: idna&lt;4,&gt;=2.5 in /opt/conda/lib/python3.8/site-packages (from requests-&gt;transformers) (3.3)
Requirement already satisfied: six in /opt/conda/lib/python3.8/site-packages (from sacremoses-&gt;transformers) (1.16.0)
Requirement already satisfied: click in /opt/conda/lib/python3.8/site-packages (from sacremoses-&gt;transformers) (8.0.4)
Requirement already satisfied: joblib in /opt/conda/lib/python3.8/site-packages (from sacremoses-&gt;transformers) (1.1.0)
<span class="ansi-yellow-fg">WARNING: Running pip as the &#39;root&#39; user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv</span>
</pre></div></div>
</div>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[3]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>from transformers import BertTokenizer, BertForMaskedLM
import torch
import timeit
import numpy as np
import torch_tensorrt
import torch.backends.cudnn as cudnn
</pre></div>
</div>
</div>
<p>## 2. BERT Overview</p>
<p>Transformers comprise a class of deep learning algorithms employing self-attention; broadly speaking, the models learn large matrices of numbers, each element of which denotes how important one component of input data is to another. Since their introduction in 2017, transformers have enjoyed widespread adoption, particularly in natural language processing, but also in computer vision problems. This is largely because they are easier to parallelize than the sequence models which attention
mechanisms were originally designed to augment.</p>
<p>Hugging Face is a company that maintains a huge respository of pre-trained transformer models. The company also provides tools for integrating those models into PyTorch code and running inference with them.</p>
<p>One of the most popular transformer models is BERT (Bidirectional Encoder Representations from Transformers). First developed at Google and released in 2018, it has become the backbone of Google’s search engine and a standard benchmark for NLP experiments. BERT was originally trained for next sentence prediction and masked language modeling (MLM), which aims to predict hidden words in sentences. In this notebook, we will use Hugging Face’s <code class="docutils literal notranslate"><span class="pre">bert-base-uncased</span></code> model (BERT’s smallest and
simplest form, which does not employ text capitalization) for MLM.</p>
<p>## 3. Creating TorchScript modules</p>
<p>First, create a pretrained BERT tokenizer from the <code class="docutils literal notranslate"><span class="pre">bert-base-uncased</span></code> model</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[4]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>enc = BertTokenizer.from_pretrained(&#39;bert-base-uncased&#39;)
</pre></div>
</div>
</div>
<p>Create dummy inputs to generate a traced TorchScript model later</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[5]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>batch_size = 4

batched_indexed_tokens = [[101, 64]*64]*batch_size
batched_segment_ids = [[0, 1]*64]*batch_size
batched_attention_masks = [[1, 1]*64]*batch_size

tokens_tensor = torch.tensor(batched_indexed_tokens)
segments_tensor = torch.tensor(batched_segment_ids)
attention_masks_tensor = torch.tensor(batched_attention_masks)
</pre></div>
</div>
</div>
<p>Obtain a BERT masked language model from Hugging Face in the (scripted) TorchScript, then use the dummy inputs to trace it</p>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[6]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>mlm_model_ts = BertForMaskedLM.from_pretrained(&#39;bert-base-uncased&#39;, torchscript=True)
traced_mlm_model = torch.jit.trace(mlm_model_ts, [tokens_tensor, segments_tensor, attention_masks_tensor])
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area stderr docutils container">
<div class="highlight"><pre>
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: [&#39;cls.seq_relationship.bias&#39;, &#39;cls.seq_relationship.weight&#39;]
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
</pre></div></div>
</div>
<p>Define 4 masked sentences, with 1 word in each sentence hidden from the model. Fluent English speakers will probably be able to guess the masked words, but just in case, they are <code class="docutils literal notranslate"><span class="pre">'capital'</span></code>, <code class="docutils literal notranslate"><span class="pre">'language'</span></code>, <code class="docutils literal notranslate"><span class="pre">'innings'</span></code>, and <code class="docutils literal notranslate"><span class="pre">'mathematics'</span></code>.</p>
<p>Also create a list containing the position of the masked word within each sentence. Given Python’s 0-based indexing convention, the numbers are each higher by 1 than might be expected. This is because the token at index 0 in each sentence is a beginning-of-sentence token, denoted <code class="docutils literal notranslate"><span class="pre">[CLS]</span></code> when entered explicitly.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[7]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>masked_sentences = [&#39;Paris is the [MASK] of France.&#39;,
                    &#39;The primary [MASK] of the United States is English.&#39;,
                    &#39;A baseball game consists of at least nine [MASK].&#39;,
                    &#39;Topology is a branch of [MASK] concerned with the properties of geometric objects that remain unchanged under continuous transformations.&#39;]
pos_masks = [4, 3, 9, 6]
</pre></div>
</div>
</div>
<p>Pass the masked sentences into the (scripted) TorchScript MLM model and verify that the unmasked sentences yield the expected results.</p>
<p>Because the sentences are of different lengths, we must specify the <code class="docutils literal notranslate"><span class="pre">padding</span></code> argument in calling our encoder/tokenizer. There are several possible padding strategies, but we’ll use <code class="docutils literal notranslate"><span class="pre">'max_length'</span></code> padding with <code class="docutils literal notranslate"><span class="pre">max_length=128</span></code>. Later, when we compile an optimized version of the model with Torch-TensorRT, the optimized model will expect inputs of length 128, hence our choice of padding strategy and length here.</p>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[8]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>encoded_inputs = enc(masked_sentences, return_tensors=&#39;pt&#39;, padding=&#39;max_length&#39;, max_length=128)
outputs = mlm_model_ts(**encoded_inputs)
most_likely_token_ids = [torch.argmax(outputs[0][i, pos, :]) for i, pos in enumerate(pos_masks)]
unmasked_tokens = enc.decode(most_likely_token_ids).split(&#39; &#39;)
unmasked_sentences = [masked_sentences[i].replace(&#39;[MASK]&#39;, token) for i, token in enumerate(unmasked_tokens)]
for sentence in unmasked_sentences:
    print(sentence)
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
Paris is the capital of France.
The primary language of the United States is English.
A baseball game consists of at least nine innings.
Topology is a branch of mathematics concerned with the properties of geometric objects that remain unchanged under continuous transformations.
</pre></div></div>
</div>
<p>Pass the masked sentences into the traced MLM model and verify that the unmasked sentences yield the expected results.</p>
<p>Note the difference in how the <code class="docutils literal notranslate"><span class="pre">encoded_inputs</span></code> are passed into the model in the following cell compared to the previous one. If you examine <code class="docutils literal notranslate"><span class="pre">encoded_inputs</span></code>, you’ll find that it’s a dictionary with 3 keys, <code class="docutils literal notranslate"><span class="pre">'input_ids'</span></code>, <code class="docutils literal notranslate"><span class="pre">'token_type_ids'</span></code>, and <code class="docutils literal notranslate"><span class="pre">'attention_mask'</span></code>, each with a PyTorch tensor as an associated value. The traced model will accept <code class="docutils literal notranslate"><span class="pre">**encoded_inputs</span></code> as an input, but the Torch-TensorRT-optimized model (to be defined later) will not.</p>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[9]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>encoded_inputs = enc(masked_sentences, return_tensors=&#39;pt&#39;, padding=&#39;max_length&#39;, max_length=128)
outputs = traced_mlm_model(encoded_inputs[&#39;input_ids&#39;], encoded_inputs[&#39;token_type_ids&#39;], encoded_inputs[&#39;attention_mask&#39;])
most_likely_token_ids = [torch.argmax(outputs[0][i, pos, :]) for i, pos in enumerate(pos_masks)]
unmasked_tokens = enc.decode(most_likely_token_ids).split(&#39; &#39;)
unmasked_sentences = [masked_sentences[i].replace(&#39;[MASK]&#39;, token) for i, token in enumerate(unmasked_tokens)]
for sentence in unmasked_sentences:
    print(sentence)
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
Paris is the capital of France.
The primary language of the United States is English.
A baseball game consists of at least nine innings.
Topology is a branch of mathematics concerned with the properties of geometric objects that remain unchanged under continuous transformations.
</pre></div></div>
</div>
<p>## 4. Compiling with Torch-TensorRT</p>
<p>Change the logging level to avoid long printouts</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[10]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>new_level = torch_tensorrt.logging.Level.Error
torch_tensorrt.logging.set_reportable_log_level(new_level)
</pre></div>
</div>
</div>
<p>Compile the model</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[11]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>trt_model = torch_tensorrt.compile(traced_mlm_model,
    inputs= [torch_tensorrt.Input(shape=[batch_size, 128], dtype=torch.int32),  # input_ids
             torch_tensorrt.Input(shape=[batch_size, 128], dtype=torch.int32),  # token_type_ids
             torch_tensorrt.Input(shape=[batch_size, 128], dtype=torch.int32)], # attention_mask
    enabled_precisions= {torch.float32}, # Run with 32-bit precision
    workspace_size=2000000000,
    truncate_long_and_double=True
)
</pre></div>
</div>
</div>
<p>Pass the masked sentences into the compiled model and verify that the unmasked sentences yield the expected results.</p>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[12]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>enc_inputs = enc(masked_sentences, return_tensors=&#39;pt&#39;, padding=&#39;max_length&#39;, max_length=128)
enc_inputs = {k: v.type(torch.int32).cuda() for k, v in enc_inputs.items()}
output_trt = trt_model(enc_inputs[&#39;input_ids&#39;], enc_inputs[&#39;token_type_ids&#39;], enc_inputs[&#39;attention_mask&#39;])
most_likely_token_ids_trt = [torch.argmax(output_trt[i, pos, :]) for i, pos in enumerate(pos_masks)]
unmasked_tokens_trt = enc.decode(most_likely_token_ids_trt).split(&#39; &#39;)
unmasked_sentences_trt = [masked_sentences[i].replace(&#39;[MASK]&#39;, token) for i, token in enumerate(unmasked_tokens_trt)]
for sentence in unmasked_sentences_trt:
    print(sentence)
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
Paris is the capital of France.
The primary language of the United States is English.
A baseball game consists of at least nine innings.
Topology is a branch of mathematics concerned with the properties of geometric objects that remain unchanged under continuous transformations.
</pre></div></div>
</div>
<p>Compile the model again, this time with 16-bit precision</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[13]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>trt_model_fp16 = torch_tensorrt.compile(traced_mlm_model,
    inputs= [torch_tensorrt.Input(shape=[batch_size, 128], dtype=torch.int32),  # input_ids
             torch_tensorrt.Input(shape=[batch_size, 128], dtype=torch.int32),  # token_type_ids
             torch_tensorrt.Input(shape=[batch_size, 128], dtype=torch.int32)], # attention_mask
    enabled_precisions= {torch.half}, # Run with 16-bit precision
    workspace_size=2000000000,
    truncate_long_and_double=True
)
</pre></div>
</div>
</div>
<p>## 5. Benchmarking</p>
<p>In developing this notebook, we conducted our benchmarking on a single NVIDIA A100 GPU. Your results may differ from those shown, particularly on a different GPU.</p>
<p>This function passes the inputs into the model and runs inference <code class="docutils literal notranslate"><span class="pre">num_loops</span></code> times, then returns a list of length containing the amount of time in seconds that each instance of inference took.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[14]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>def timeGraph(model, input_tensor1, input_tensor2, input_tensor3, num_loops=50):
    print(&quot;Warm up ...&quot;)
    with torch.no_grad():
        for _ in range(20):
            features = model(input_tensor1, input_tensor2, input_tensor3)

    torch.cuda.synchronize()

    print(&quot;Start timing ...&quot;)
    timings = []
    with torch.no_grad():
        for i in range(num_loops):
            start_time = timeit.default_timer()
            features = model(input_tensor1, input_tensor2, input_tensor3)
            torch.cuda.synchronize()
            end_time = timeit.default_timer()
            timings.append(end_time - start_time)
            # print(&quot;Iteration {}: {:.6f} s&quot;.format(i, end_time - start_time))

    return timings
</pre></div>
</div>
</div>
<p>This function prints the number of input batches the model is able to process each second and summary statistics of the model’s latency.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[15]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>def printStats(graphName, timings, batch_size):
    times = np.array(timings)
    steps = len(times)
    speeds = batch_size / times
    time_mean = np.mean(times)
    time_med = np.median(times)
    time_99th = np.percentile(times, 99)
    time_std = np.std(times, ddof=0)
    speed_mean = np.mean(speeds)
    speed_med = np.median(speeds)

    msg = (&quot;\n%s =================================\n&quot;
            &quot;batch size=%d, num iterations=%d\n&quot;
            &quot;  Median text batches/second: %.1f, mean: %.1f\n&quot;
            &quot;  Median latency: %.6f, mean: %.6f, 99th_p: %.6f, std_dev: %.6f\n&quot;
            ) % (graphName,
                batch_size, steps,
                speed_med, speed_mean,
                time_med, time_mean, time_99th, time_std)
    print(msg)
</pre></div>
</div>
</div>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[16]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>cudnn.benchmark = True
</pre></div>
</div>
</div>
<p>Benchmark the (scripted) TorchScript model on GPU</p>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[17]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>timings = timeGraph(mlm_model_ts.cuda(), enc_inputs[&#39;input_ids&#39;], enc_inputs[&#39;token_type_ids&#39;], enc_inputs[&#39;attention_mask&#39;])

printStats(&quot;BERT&quot;, timings, batch_size)
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
Warm up ...
Start timing ...

BERT =================================
batch size=4, num iterations=50
  Median text batches/second: 599.1, mean: 597.6
  Median latency: 0.006677, mean: 0.006693, 99th_p: 0.006943, std_dev: 0.000059

</pre></div></div>
</div>
<p>Benchmark the traced model on GPU</p>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[18]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>timings = timeGraph(traced_mlm_model.cuda(), enc_inputs[&#39;input_ids&#39;], enc_inputs[&#39;token_type_ids&#39;], enc_inputs[&#39;attention_mask&#39;])

printStats(&quot;BERT&quot;, timings, batch_size)
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
Warm up ...
Start timing ...

BERT =================================
batch size=4, num iterations=50
  Median text batches/second: 951.2, mean: 951.0
  Median latency: 0.004205, mean: 0.004206, 99th_p: 0.004256, std_dev: 0.000015

</pre></div></div>
</div>
<p>Benchmark the compiled FP32 model on GPU</p>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[19]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>timings = timeGraph(trt_model, enc_inputs[&#39;input_ids&#39;], enc_inputs[&#39;token_type_ids&#39;], enc_inputs[&#39;attention_mask&#39;])

printStats(&quot;BERT&quot;, timings, batch_size)
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
Warm up ...
Start timing ...

BERT =================================
batch size=4, num iterations=50
  Median text batches/second: 1216.9, mean: 1216.4
  Median latency: 0.003287, mean: 0.003289, 99th_p: 0.003317, std_dev: 0.000007

</pre></div></div>
</div>
<p>Benchmark the compiled FP16 model on GPU</p>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[20]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>timings = timeGraph(trt_model_fp16, enc_inputs[&#39;input_ids&#39;], enc_inputs[&#39;token_type_ids&#39;], enc_inputs[&#39;attention_mask&#39;])

printStats(&quot;BERT&quot;, timings, batch_size)
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
Warm up ...
Start timing ...

BERT =================================
batch size=4, num iterations=50
  Median text batches/second: 1776.7, mean: 1771.1
  Median latency: 0.002251, mean: 0.002259, 99th_p: 0.002305, std_dev: 0.000015

</pre></div></div>
</div>
<p>## 6. Conclusion</p>
<p>In this notebook, we have walked through the complete process of compiling TorchScript models with Torch-TensorRT for Masked Language Modeling with Hugging Face’s <code class="docutils literal notranslate"><span class="pre">bert-base-uncased</span></code> transformer and testing the performance impact of the optimization. With Torch-TensorRT on an NVIDIA A100 GPU, we observe the speedups indicated below. These acceleration numbers will vary from GPU to GPU (as well as implementation to implementation based on the ops used) and we encorage you to try out latest
generation of Data center compute cards for maximum acceleration.</p>
<p>Scripted (GPU): 1.0x Traced (GPU): 1.62x Torch-TensorRT (FP32): 2.14x Torch-TensorRT (FP16): 3.15x</p>
<section id="What’s-next">
<h3>What’s next<a class="headerlink" href="#What’s-next" title="Permalink to this headline">¶</a></h3>
<p>Now it’s time to try Torch-TensorRT on your own model. If you run into any issues, you can fill them at <a class="reference external" href="https://github.com/NVIDIA/Torch-TensorRT">https://github.com/NVIDIA/Torch-TensorRT</a>. Your involvement will help future development of Torch-TensorRT.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>
</pre></div>
</div>
</div>
</section>
</section>
</section>


             </article>

            </div>
            <footer>


    <hr>


  <div role="contentinfo">
    <p>
        &copy; Copyright 2022, NVIDIA Corporation.

    </p>
  </div>

      <div>
        Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
      </div>


</footer>

          </div>
        </div>

        <div class="pytorch-content-right" id="pytorch-content-right">
          <div class="pytorch-right-menu" id="pytorch-right-menu">
            <div class="pytorch-side-scroll" id="pytorch-side-scroll-right">
              <ul>
<li><a class="reference internal" href="#">Masked Language Modeling (MLM) with Hugging Face BERT Transformer</a><ul>
<li><a class="reference internal" href="#Learning-objectives">Learning objectives</a></li>
<li><a class="reference internal" href="#Contents">Contents</a><ul>
<li><a class="reference internal" href="#What’s-next">What’s next</a></li>
</ul>
</li>
</ul>
</li>
</ul>

            </div>
          </div>
        </div>
      </section>
    </div>


       <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
         <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
         <script src="../_static/jquery.js"></script>
         <script src="../_static/underscore.js"></script>
         <script src="../_static/doctools.js"></script>
         <script src="../_static/collapsible-lists/js/CollapsibleLists.compressed.js"></script>
         <script src="../_static/collapsible-lists/js/apply-collapsible-lists.js"></script>
         <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
         <script>window.MathJax = {"tex": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true}, "options": {"ignoreHtmlClass": "tex2jax_ignore|mathjax_ignore|document", "processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
         <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>


  <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
  <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/list.js/1.5.0/list.min.js"></script>
  <script type="text/javascript" src="../_static/js/theme.js"></script>

  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>

  <!-- Begin Footer -->

  <div class="container-fluid docs-tutorials-resources" id="docs-tutorials-resources">
    <div class="container">
      <div class="row">
        <div class="col-md-4 text-center">
          <h2>Docs</h2>
          <p>Access comprehensive developer documentation for PyTorch</p>
          <a class="with-right-arrow" href="https://pytorch.org/docs/stable/index.html">View Docs</a>
        </div>

        <div class="col-md-4 text-center">
          <h2>Tutorials</h2>
          <p>Get in-depth tutorials for beginners and advanced developers</p>
          <a class="with-right-arrow" href="https://pytorch.org/tutorials">View Tutorials</a>
        </div>

        <div class="col-md-4 text-center">
          <h2>Resources</h2>
          <p>Find development resources and get your questions answered</p>
          <a class="with-right-arrow" href="https://pytorch.org/resources">View Resources</a>
        </div>
      </div>
    </div>
  </div>

  <footer class="site-footer">
    <div class="container footer-container">
      <div class="footer-logo-wrapper">
        <a href="https://pytorch.org/" class="footer-logo"></a>
      </div>

      <div class="footer-links-wrapper">
        <div class="footer-links-col">
          <ul>
            <li class="list-title"><a href="https://pytorch.org/">PyTorch</a></li>
            <li><a href="https://pytorch.org/get-started">Get Started</a></li>
            <li><a href="https://pytorch.org/features">Features</a></li>
            <li><a href="https://pytorch.org/ecosystem">Ecosystem</a></li>
            <li><a href="https://pytorch.org/blog/">Blog</a></li>
            <li><a href="https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md">Contributing</a></li>
          </ul>
        </div>

        <div class="footer-links-col">
          <ul>
            <li class="list-title"><a href="https://pytorch.org/resources">Resources</a></li>
            <li><a href="https://pytorch.org/tutorials">Tutorials</a></li>
            <li><a href="https://pytorch.org/docs/stable/index.html">Docs</a></li>
            <li><a href="https://discuss.pytorch.org" target="_blank">Discuss</a></li>
            <li><a href="https://github.com/pytorch/pytorch/issues" target="_blank">Github Issues</a></li>
            <li><a href="https://pytorch.org/assets/brand-guidelines/PyTorch-Brand-Guidelines.pdf" target="_blank">Brand Guidelines</a></li>
          </ul>
        </div>

        <div class="footer-links-col">
          <ul>
            <li class="list-title">Stay up to date</li>
            <li><a href="https://www.facebook.com/pytorch" target="_blank">Facebook</a></li>
            <li><a href="https://twitter.com/pytorch" target="_blank">Twitter</a></li>
            <li><a href="https://www.youtube.com/pytorch" target="_blank">YouTube</a></li>
            <li><a href="https://www.linkedin.com/company/pytorch" target="_blank">LinkedIn</a></li>
          </ul>
          </div>

        <div class="footer-links-col">
          <ul>
            <li class="list-title">PyTorch Podcasts</li>
            <li><a href="https://open.spotify.com/show/6UzHKeiy368jKfQMKKvJY5" target="_blank">Spotify</a></li>
            <li><a href="https://podcasts.apple.com/us/podcast/pytorch-developer-podcast/id1566080008" target="_blank">Apple</a></li>
            <li><a href="https://www.google.com/podcasts?feed=aHR0cHM6Ly9mZWVkcy5zaW1wbGVjYXN0LmNvbS9PQjVGa0lsOA%3D%3D" target="_blank">Google</a></li>
            <li><a href="https://music.amazon.com/podcasts/7a4e6f0e-26c2-49e9-a478-41bd244197d0/PyTorch-Developer-Podcast?" target="_blank">Amazon</a></li>
          </ul>
         </div>
        </div>

        <div class="privacy-policy">
          <ul>
            <li class="privacy-policy-links"><a href="https://www.linuxfoundation.org/terms/" target="_blank">Terms</a></li>
            <li class="privacy-policy-links">|</li>
            <li class="privacy-policy-links"><a href="https://www.linuxfoundation.org/privacy-policy/" target="_blank">Privacy</a></li>
          </ul>
        </div>
        <div class="copyright">
        <p>© Copyright The Linux Foundation. The PyTorch Foundation is a project of The Linux Foundation.
          For web site terms of use, trademark policy and other policies applicable to The PyTorch Foundation please see
          <a href="www.linuxfoundation.org/policies/">www.linuxfoundation.org/policies/</a>. The PyTorch Foundation supports the PyTorch open source
          project, which has been established as PyTorch Project a Series of LF Projects, LLC. For policies applicable to the PyTorch Project a Series of LF Projects, LLC,
          please see <a href="www.lfprojects.org/policies/">www.lfprojects.org/policies/</a>.</p>
      </div>
     </div>

  </footer>

  <div class="cookie-banner-wrapper">
  <div class="container">
    <p class="gdpr-notice">To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. As the current maintainers of this site, Facebook’s Cookies Policy applies. Learn more, including about available controls: <a href="https://www.facebook.com/policies/cookies/">Cookies Policy</a>.</p>
    <img class="close-button" src="../_static/images/pytorch-x.svg">
  </div>
</div>

  <!-- End Footer -->

  <!-- Begin Mobile Menu -->

  <div class="mobile-main-menu">
    <div class="container-fluid">
      <div class="container">
        <div class="mobile-main-menu-header-container">
          <a class="header-logo" href="https://pytorch.org/" aria-label="PyTorch"></a>
          <a class="main-menu-close-button" href="#" data-behavior="close-mobile-menu"></a>
        </div>
      </div>
    </div>

    <div class="mobile-main-menu-links-container">
      <div class="main-menu">
        <ul>
          <li>
            <a href="https://pytorch.org/get-started">Get Started</a>
          </li>

          <li>
            <a href="https://pytorch.org/ecosystem">Ecosystem</a>
          </li>

          <li>
            <a href="https://pytorch.org/mobile">Mobile</a>
          </li>

          <li>
            <a href="https://pytorch.org/blog/">Blog</a>
          </li>

          <li>
            <a href="https://pytorch.org/tutorials">Tutorials</a>
          </li>

          <li class="resources-mobile-menu-title">
            Docs
          </li>

          <ul class="resources-mobile-menu-items">
            <li>
              <a href="https://pytorch.org/docs/stable/index.html">PyTorch</a>
            </li>

            <li>
              <a href="https://pytorch.org/audio/stable/index.html">torchaudio</a>
            </li>

            <li>
              <a href="https://pytorch.org/text/stable/index.html">torchtext</a>
            </li>

            <li>
              <a href="https://pytorch.org/vision/stable/index.html">torchvision</a>
            </li>

            <li>
              <a href="https://pytorch.org/torcharrow">torcharrow</a>
            </li>

            <li>
              <a href="https://pytorch.org/data">TorchData</a>
            </li>

            <li>
              <a href="https://pytorch.org/torchrec">TorchRec</a>
            </li>

            <li>
              <a href="https://pytorch.org/serve/">TorchServe</a>
            </li>

            <li>
              <a href="https://pytorch.org/torchx/">TorchX</a>
            </li>

            <li>
              <a href="https://pytorch.org/xla">PyTorch on XLA Devices</a>
            </li>
          </ul>

          <li class="resources-mobile-menu-title">
            Resources
          </li>

           <ul class="resources-mobile-menu-items">

            <li>
              <a href="https://pytorch.org/features">About</a>
            </li>

            <li>
              <a href="https://pytorch.org/foundation">PyTorch Foundation</a>
            </li>

            <li>
              <a href="https://pytorch.org/#community-module">Community</a>
            </li>

            <li>
              <a href="https://pytorch.org/community-stories">Community Stories</a>
            </li>

            <li>
              <a href="https://pytorch.org/resources">Developer Resources</a>
            </li>

            <li>
              <a href="https://pytorch.org/events">Events</a>
            </li>

            <li>
              <a href="https://discuss.pytorch.org/">Forums</a>
            </li>

            <li>
              <a href="https://pytorch.org/hub">Models (Beta)</a>
            </li>
          </ul>

          <li>
            <a href="https://github.com/pytorch/pytorch">Github</a>
          </li>
        </ul>
      </div>
    </div>
  </div>

  <!-- End Mobile Menu -->

  <script type="text/javascript" src="../_static/js/vendor/anchor.min.js"></script>

  <script type="text/javascript">
    $(document).ready(function() {
      mobileMenu.bind();
      mobileTOC.bind();
      pytorchAnchors.bind();
      sideMenus.bind();
      scrollToAnchor.bind();
      highlightNavigation.bind();
      mainMenuDropdown.bind();
      filterTags.bind();

      // Add class to links that have code blocks, since we cannot create links in code blocks
      $("article.pytorch-article a span.pre").each(function(e) {
        $(this).closest("a").addClass("has-code");
      });
    })
  </script>
</body>
</html>