embark.html

<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="utf-8">
  <meta http-equiv="Content-Type" content="text/html" charset="UTF-8" />
  <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />


  <title>How to Engineer Your Way Out of Slow Models</title>


  <meta name="HandheldFriendly" content="True" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <meta name="referrer" content="origin" />
  <meta name="generator" content="Pelican" />
<link href="https://anotherdatum.com/embark.html" rel="canonical" />
  <!-- Feed -->
        <link href="https://anotherdatum.com/feeds/all.atom.xml" type="application/atom+xml" rel="alternate" title="Another Datum Full Atom Feed" />

  <link href="https://anotherdatum.com/theme/css/style.css" type="text/css" rel="stylesheet" />

  <!-- Code highlight color scheme -->
      <link href="https://anotherdatum.com/theme/css/code_blocks/tomorrow.css" rel="stylesheet">

    <!-- CSS specified by the user -->


    <link href="https://anotherdatum.com/css/overrides.css" type="text/css" rel="stylesheet" />

  <!-- Custom fonts -->
  <link href='https://fonts.googleapis.com/css?family=Montserrat:400,300' rel='stylesheet' type='text/css' />
  <link href="https://fonts.googleapis.com/css?family=Lato" rel="stylesheet" type="text/css" />

  <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.5.0/css/font-awesome.min.css" rel="stylesheet" type="text/css">
  <link href='https://fonts.googleapis.com/css?family=Lora:400,700,400italic,700italic' rel='stylesheet' type='text/css'>
  <link href='https://fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800' rel='stylesheet' type='text/css'>

  <!-- HTML5 Shim and Respond.js IE8 support of HTML5 elements and media queries -->
  <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
  <!--[if lt IE 9]>
    <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
    <script src="https://oss.maxcdn.com/libs/respond.js/1.4.2/respond.min.js"></script>
  <![endif]-->


    <meta name="description" content="So you just finished designing that great neural network architecture. But how do you handle the fact it is slow?">

    <meta name="author" content="Yoel Zeldes">

    <meta name="tags" content="deep learning">
    <meta name="tags" content="architecture">


<!-- Open Graph -->
<meta property="og:site_name" content="Another Datum"/>
<meta property="og:title" content="How to Engineer Your Way Out of Slow Models"/>
<meta property="og:description" content="So you just finished designing that great neural network architecture. But how do you handle the fact it is slow?"/>
<meta property="og:locale" content="en_US"/>
<meta property="og:url" content="https://anotherdatum.com/embark.html"/>
<meta property="og:type" content="article"/>
<meta property="article:published_time" content="2018-10-28 23:00:00+02:00"/>
<meta property="article:modified_time" content=""/>
<meta property="article:author" content="https://anotherdatum.com/author/yoel-zeldes.html">
  <meta property="article:publisher" content="https://www.facebook.com/yoel.zeldes" />
<meta property="article:section" content="embark"/>
<meta property="article:tag" content="deep learning"/>
<meta property="article:tag" content="architecture"/>
<meta property="og:image" content="https://anotherdatum.com/images/embark/cover.jpg">

<!-- Twitter Card -->
    <meta name="twitter:card" content="summary_large_image">
    <meta name="twitter:site" content="@YZeldes">
    <meta name="twitter:title" content="How to Engineer Your Way Out of Slow Models">
    <meta name="twitter:url" content="https://anotherdatum.com/embark.html">

        <meta name="twitter:image:src" content="https://anotherdatum.com/images/embark/cover.jpg">

      <meta name="twitter:description" content="So you just finished designing that great neural network architecture. But how do you handle the fact it is slow?">

<script type="application/ld+json">
{
  "@context": "http://schema.org",
  "@type": "Article",
  "name": "How to Engineer Your Way Out of Slow Models",
  "headline": "How to Engineer Your Way Out of Slow Models",
  "datePublished": "2018-10-28 23:00:00+02:00",
  "dateModified": "",
  "author": {
    "@type": "Person",
    "name": "Yoel Zeldes",
    "url": "https://anotherdatum.com/author/yoel-zeldes.html"
  },
  "image": "https://anotherdatum.com/images/embark/cover.jpg",
  "url": "https://anotherdatum.com/embark.html",
  "description": "So you just finished designing that great neural network architecture. But how do you handle the fact it is slow?"
}
</script>
</head>
<!-- TODO : Body class -->
<body class="home-template">

<nav id="menu">
  <a class="close-button">Close</a>
  <div class="nav-wrapper">
    <p class="nav-label">Menu</p>
    <ul>
          <li><a href="https://anotherdatum.com" role="presentation">Posts</a></li>

              <li role="presentation"><a href="https://anotherdatum.com/pages/about.html">about me</a></li>
              <li role="presentation"><a href="https://anotherdatum.com/pages/resources.html">Resources</a></li>

    </ul>
  </div>
</nav>
    <!-- Progressbar -->
    <div class="progress-container">
        <span class="progress-bar"></span>
    </div>

    <!-- Page Header -->
    <!-- Set your background image for this header on the line below. -->
    <header id="post-header" class="has-cover">
      <div class="inner">
        <nav id="navigation">
            <span id="home-button" class="nav-button">
                <a class="home-button" href="https://anotherdatum.com/" title="Home"><i class="ic ic-arrow-left"></i> Home</a>
            </span>
          <span id="menu-button" class="nav-button">
            <a class="menu-button"><i class="ic ic-menu"></i> Menu</a>
          </span>
        </nav>
        <h1 class="post-title">How to Engineer Your Way Out of Slow Models</h1>
        <!-- TODO : Proper class for headline -->
        <span class="post-meta">
            <time datetime="28 October 2018">28 October 2018</time>
        </span>
        <!-- TODO : Modified check -->
            <div class="post-cover cover" style="background-image: url('https://anotherdatum.com/images/embark/cover.jpg')">
      </div>
    </header>

  <section id="wrapper">
    <a class="hidden-close"></a>

    <!-- Post content -->
    <main class="content" role="main">
        <article class="post">
        <div class="inner">
            <section class="post-content">
                <p>So you just finished designing that great neural network architecture of yours.
It has a blazing number of 300 fully connected layers interleaved with 200
<a href="https://en.wikipedia.org/wiki/Convolutional_neural_network#Convolutional">convolutional
layers</a>
with 20 channels each, where the result is fed as the seed of a glorious
<a href="https://en.wikipedia.org/wiki/Bidirectional_recurrent_neural_networks">bidirectional</a>
<a href="https://machinelearningmastery.com/stacked-long-short-term-memory-networks/">stacked</a>
<a href="http://colah.github.io/posts/2015-08-Understanding-LSTMs/">LSTM</a> with a pinch
of
<a href="http://www.wildml.com/2016/01/attention-and-memory-in-deep-learning-and-nlp/">attention</a>.
After training you get an accuracy of 99.99%, and you’re ready to ship it to
production.</p>
<p>But then you realize the production constraints won’t allow you to run inference
using this beast. You need the inference to be done in under 200 milliseconds.</p>
<p>In other words, you need to chop off half of the layers, give up on using
convolutions, and let’s not get started about the costly LSTM...</p>
<p>If only you could make that amazing model faster!</p>
<p><img alt="" src="images/embark/sad.jpg"></p>
<h1>Sometimes you can</h1>
<p>Here at Taboola we did it. Well, not exactly... Let me explain.</p>
<p>One of our models has to predict CTR (Click Through Rate) of an item, or in
other words — the probability the user will like an article recommendation and
click on it.</p>
<p>The model has multiple modalities as input, each goes through a different
transformation. Some of them are:</p>
<ul>
<li>categorical features: these are
<a href="https://engineering.taboola.com/using-word2vec-better-embeddings-categorical-features/">embedded</a>
into a dense representation</li>
<li>image: the pixels are passed through convolutional and fully connected layers</li>
<li>text: after being tokenized, the text is passed through a LSTM which is followed
by <a href="https://arxiv.org/abs/1703.03130">self attention</a></li>
</ul>
<p>These processed modalities are then passed through fully connected layers in
order to learn the interactions between the modalities, and finally, they are
passed through a
<a href="https://engineering.taboola.com/uncertainty-ctr-prediction-one-model-clarify">MDN</a>
layer.</p>
<p>As you can imagine, this model is slow.</p>
<p>We decided to insist on the predictive power of the model, instead of trimming
components, and came up with an engineering solution.</p>
<h1>Cache me if you can</h1>
<p>Let’s focus on the image component. The output of this component is a learned
representation of the image. In other words, given an image, the image component
outputs an embedding.</p>
<p>The model is deterministic, so given the same image will result with the same
embedding. This is costly, so we can cache it. Let me elaborate on how we
implemented it.</p>
<h1>The architecture (of the cache, not the model)</h1>
<p><img alt="" src="images/embark/architecture.png"></p>
<ul>
<li>We used a <a href="http://cassandra.apache.org/">Cassandra</a> database as the cache which
maps an image URL to its embedding.</li>
<li>The service which queries Cassandra is called EmbArk (Embedding Archive,
<a href="https://techcrunch.com/2017/05/20/the-bizarre-naming-trends-that-modern-startups-follow/">misspelled of
course</a>).
It’s a <a href="https://grpc.io/">gRPC</a> server which gets an image URL from a client and
retrieves the embedding from Cassandra. On cache miss EmbArk sends an async
request to embed that image. Why async? Because we need EmbArk to respond with
the result as fast as it can. Given it can’t wait for the image to be embedded,
it returns a special OOV (Out Of Vocabulary) embedding.</li>
<li>The async mechanism we chose to use is <a href="https://kafka.apache.org/">Kafka</a> — a
streaming platform used as a message queue.</li>
<li>The next link is KFC (Kafka Frontend Client) — a Kafka consumer we implemented
to pass messages synchronously to the embedding service, and save the resulting
embeddings in Cassandra.</li>
<li>The embedding service is called Retina. It gets an image URL from KFC, downloads
it, preprocesses it, and evaluates the convolutional layers to get the final
embedding.</li>
<li>The load balancing of all the components is done using
<a href="https://linkerd.io/">Linkerd</a>.</li>
<li>EmbArk, KFC, Retina and Linkerd run inside <a href="https://www.docker.com/">Docker</a>,
and they are orchestrated by <a href="https://www.nomadproject.io/">Nomad</a>. This allows
us to easily scale each component as we see fit.</li>
</ul>
<p>This architecture was initially used for images. After proving its worth, we
decided to use it for other components as well, such as text.</p>
<p>EmbArk proved to be a nice solution for <a href="https://arxiv.org/abs/1403.6382">transfer
learning</a> too. Let’s say we believe the content
of the image has a good signal for predicting CTR. Thus, a model trained for
classifying the object in an image such as
<a href="https://ai.googleblog.com/2016/03/train-your-own-image-classifier-with.html">Inception</a>
would be valuable for our needs. We can load Inception into Retina, tell the
model we intend to train that we want to use Inception embedding, and that’s it.</p>
<p>Not only that the inference time was improved, but also the training process.
This is possible only when we don’t want to train end to end, since gradients
can’t backpropagate through EmbArk.</p>
<p>So whenever you use a model in production you should use EmbArk, right? Well,
not always...</p>
<p><img alt="" src="images/embark/nope.jpg"></p>
<h1>Caveats</h1>
<p>There are three pretty strict assumptions here.</p>
<h3>1. OOV embedding for new inputs is not a big deal</h3>
<p>It doesn’t hurt us that the first time we see an image we won’t have its
embedding.</p>
<p>In our production system it’s ok, since CTR is evaluated multiple times for the
same item during a short period of time. We create lists of items we want to
recommend every few minutes, so even if an item won’t make it into the list
because of non optimal CTR prediction, it will in the next cycle.</p>
<h3>2. The rate of new inputs is low</h3>
<p>It’s true that in Taboola we get lots of new items all the time. But relative to
the number of inferences we need to perform for already known items are not that
much.</p>
<h3>3. Embeddings don’t change frequently</h3>
<p>Since the embeddings are cached, we count on the fact they don’t change over
time. If they do, we’ll need to perform cache invalidation, and recalculate the
embeddings using Retina. If this would happen a lot we would lose the advantage
of the architecture. For cases such as inception or language modeling, this
assumption holds, since semantics don’t change significantly over time.</p>
<h1>Some final thoughts</h1>
<p>Sometimes using state of the art models can be problematic due to their
computational demands. By caching intermediate results (embeddings) we were able
to overcome this challenge, and still enjoy state of the art results.</p>
<p>This solution isn’t right for everyone, but if the three aforementioned
assumptions hold for your application, you could consider using a similar
architecture.</p>
<p>By using a microservices paradigm, other teams in the company were able to use
EmbArk for needs other than CTR prediction. One team for instance used EmbArk to
get image and text embeddings for detecting duplicates across different items.
But I’ll leave that story for another post...</p>
<hr>
<p><em>Originally published by me at
<a href="https://engineering.taboola.com/engineer-way-slow-models">engineering.taboola.com</a>.</em></p>
            </section>

            <section class="post-info">
                <div class="post-share">
                    <a class="twitter" href="https://twitter.com/share?text=How to Engineer Your Way Out of Slow Models&amp;url=https://anotherdatum.com/embark.html" onclick="window.open(this.href, 'twitter-share', 'width=550,height=235');return false;">
                    <i class="ic ic-twitter"></i><span class="hidden">Twitter</span>
                    </a>
                    <a class="facebook" href="https://www.facebook.com/sharer/sharer.php?u=https://anotherdatum.com/embark.html" onclick="window.open(this.href, 'facebook-share','width=580,height=296');return false;">
                    <i class="ic ic-facebook"></i><span class="hidden">Facebook</span>
                    </a>
                    <div class="clear"></div>
                </div>

                <aside class="post-tags">
<a href="https://anotherdatum.com/tag/deep-learning.html">deep learning</a><a href="https://anotherdatum.com/tag/architecture.html">architecture</a>                </aside>

                <div class="clear"></div>


                </section>

<!-- Begin MailChimp Signup Form -->
<link href="//cdn-images.mailchimp.com/embedcode/classic-10_7.css" rel="stylesheet" type="text/css">
<style type="text/css">
	#mc_embed_signup{background:#fff; clear:left; font:14px Helvetica,Arial,sans-serif;  width:300px;}
	#mc_embed_signup form{padding: 0;}
	/* Add your own MailChimp form style overrides in your site stylesheet or in this style block.
	   We recommend moving this block and the preceding CSS link to the HEAD of your HTML file. */
</style>
<div id="mc_embed_signup">
<form action="https://anotherdatum.us14.list-manage.com/subscribe/post?u=6894d7badcfb253606fa3fb54&amp;id=c6f34ad6b7" method="post" id="mc-embedded-subscribe-form" name="mc-embedded-subscribe-form" class="validate" target="_blank" novalidate>
    <div id="mc_embed_signup_scroll">
	<h2>Get updated of new posts</h2>
<div class="mc-field-group">
	<label for="mce-EMAIL">Email Address </label>
	<input type="email" value="" name="EMAIL" class="required email" id="mce-EMAIL">
</div>
	<div id="mce-responses" class="clear">
		<div class="response" id="mce-error-response" style="display:none"></div>
		<div class="response" id="mce-success-response" style="display:none"></div>
	</div>    <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
    <div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_6894d7badcfb253606fa3fb54_c6f34ad6b7" tabindex="-1" value=""></div>
    <div class="clear"><input type="submit" value="Subscribe" name="subscribe" id="mc-embedded-subscribe" class="button"></div>
    </div>
</form>
</div>
<script type='text/javascript' src='//s3.amazonaws.com/downloads.mailchimp.com/js/mc-validate.js'></script><script type='text/javascript'>(function($) {window.fnames = new Array(); window.ftypes = new Array();fnames[0]='EMAIL';ftypes[0]='email';fnames[1]='FNAME';ftypes[1]='text';fnames[2]='LNAME';ftypes[2]='text';}(jQuery));var $mcj = jQuery.noConflict(true);</script>
<!--End mc_embed_signup-->
<hr />
                <aside class="post-nav">
                    <a class="post-nav-next" href="https://anotherdatum.com/vae2.html">
                        <section class="post-nav-teaser">
                            <i class="ic ic-arrow-left"></i>
                                <h2 class="post-nav-title">Variational Autoencoders Explained in Detail</h2>
                            <p class="post-nav-excerpt">Learn all the details needed to implement a variational autoencoder, code included.</p>
                        </section>
                    </a>
                    <a class="post-nav-prev" href="https://anotherdatum.com/taboola-hackathon-2018.html">
                        <section class="post-nav-teaser">
                            <i class="ic ic-arrow-right"></i>
                                <h2 class="post-nav-title">Zooming Past the Competition</h2>
                            <p class="post-nav-excerpt">How to create an Augmented Reality app that allows a user to get content recommendations.</p>
                        </section>
                    </a>
                    <div class="clear"></div>
                </aside>

                <div class="comments">
                    <h2>Comments !</h2>
                    <div id="disqus_thread"></div>
                    <script type="text/javascript">
                        var disqus_shortname = 'anotherdatum';
                        var disqus_identifier = 'embark.html';
                        var disqus_url = 'https://anotherdatum.com/embark.html';
                        (function() {
                            var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
                            dsq.src = '//anotherdatum.disqus.com/embed.js';
                            (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
                        })();
                    </script>
                    <noscript>Please enable JavaScript to view the comments.</noscript>
                </div>
            </div>
        </article>
    </main>
      <!-- TODO : Body class -->
    <div id="body-class" style="display: none;" class=""></div>

    <footer id="footer">
            <div class="social">
                <a href="https://il.linkedin.com/in/yoelzeldes">
                    <span class="fa-stack fa-lg">
                        <i class="fa fa-circle fa-stack-2x"></i>
                        <i class="fa fa-linkedin fa-stack-1x fa-inverse"></i>
                    </span>
                </a>
                <a href="https://github.com/yoel-zeldes">
                    <span class="fa-stack fa-lg">
                        <i class="fa fa-circle fa-stack-2x"></i>
                        <i class="fa fa-github fa-stack-1x fa-inverse"></i>
                    </span>
                </a>
                <a href="https://www.facebook.com/yoel.zeldes">
                    <span class="fa-stack fa-lg">
                        <i class="fa fa-circle fa-stack-2x"></i>
                        <i class="fa fa-facebook fa-stack-1x fa-inverse"></i>
                    </span>
                </a>
                <a href="https://twitter.com/YZeldes">
                    <span class="fa-stack fa-lg">
                        <i class="fa fa-circle fa-stack-2x"></i>
                        <i class="fa fa-twitter fa-stack-1x fa-inverse"></i>
                    </span>
                </a>
            </div>

      <div class="inner">
        <section class="credits">
          <span class="credits-theme">Have a look at <a href="https://github.com/yoel-zeldes/yoel-zeldes.github.io/tree/source">the source code</a> of this blog.</span>
        </section>
      </div>
    </footer>
  </section>

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"></script>
  <script type="text/javascript" src="https://anotherdatum.com/theme/js/script.js"></script>

    <!-- Global Site Tag (gtag.js) - Google Analytics -->
    <script async src="https://www.googletagmanager.com/gtag/js?id=UA-83684090-1"></script>
    <script>
        window.dataLayer = window.dataLayer || [];
        function gtag(){dataLayer.push(arguments);}
        gtag('js', new Date());
        gtag('config', 'UA-83684090-1', { 'anonymize_ip': true });
    </script>
<script type="text/javascript">
    var disqus_shortname = 'anotherdatum';
    (function () {
        var s = document.createElement('script'); s.async = true;
        s.type = 'text/javascript';
        s.src = '//' + disqus_shortname + '.disqus.com/count.js';
        (document.getElementsByTagName('HEAD')[0] || document.getElementsByTagName('BODY')[0]).appendChild(s);
    }());
</script>
</body>
</html>