-
Notifications
You must be signed in to change notification settings - Fork 15
/
embark.html
422 lines (379 loc) · 21.8 KB
/
embark.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="Content-Type" content="text/html" charset="UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
<title>How to Engineer Your Way Out of Slow Models</title>
<meta name="HandheldFriendly" content="True" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="referrer" content="origin" />
<meta name="generator" content="Pelican" />
<link href="https://anotherdatum.com/embark.html" rel="canonical" />
<!-- Feed -->
<link href="https://anotherdatum.com/feeds/all.atom.xml" type="application/atom+xml" rel="alternate" title="Another Datum Full Atom Feed" />
<link href="https://anotherdatum.com/theme/css/style.css" type="text/css" rel="stylesheet" />
<!-- Code highlight color scheme -->
<link href="https://anotherdatum.com/theme/css/code_blocks/tomorrow.css" rel="stylesheet">
<!-- CSS specified by the user -->
<link href="https://anotherdatum.com/css/overrides.css" type="text/css" rel="stylesheet" />
<!-- Custom fonts -->
<link href='https://fonts.googleapis.com/css?family=Montserrat:400,300' rel='stylesheet' type='text/css' />
<link href="https://fonts.googleapis.com/css?family=Lato" rel="stylesheet" type="text/css" />
<link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.5.0/css/font-awesome.min.css" rel="stylesheet" type="text/css">
<link href='https://fonts.googleapis.com/css?family=Lora:400,700,400italic,700italic' rel='stylesheet' type='text/css'>
<link href='https://fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800' rel='stylesheet' type='text/css'>
<!-- HTML5 Shim and Respond.js IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
<script src="https://oss.maxcdn.com/libs/respond.js/1.4.2/respond.min.js"></script>
<![endif]-->
<meta name="description" content="So you just finished designing that great neural network architecture. But how do you handle the fact it is slow?">
<meta name="author" content="Yoel Zeldes">
<meta name="tags" content="deep learning">
<meta name="tags" content="architecture">
<!-- Open Graph -->
<meta property="og:site_name" content="Another Datum"/>
<meta property="og:title" content="How to Engineer Your Way Out of Slow Models"/>
<meta property="og:description" content="So you just finished designing that great neural network architecture. But how do you handle the fact it is slow?"/>
<meta property="og:locale" content="en_US"/>
<meta property="og:url" content="https://anotherdatum.com/embark.html"/>
<meta property="og:type" content="article"/>
<meta property="article:published_time" content="2018-10-28 23:00:00+02:00"/>
<meta property="article:modified_time" content=""/>
<meta property="article:author" content="https://anotherdatum.com/author/yoel-zeldes.html">
<meta property="article:publisher" content="https://www.facebook.com/yoel.zeldes" />
<meta property="article:section" content="embark"/>
<meta property="article:tag" content="deep learning"/>
<meta property="article:tag" content="architecture"/>
<meta property="og:image" content="https://anotherdatum.com/images/embark/cover.jpg">
<!-- Twitter Card -->
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:site" content="@YZeldes">
<meta name="twitter:title" content="How to Engineer Your Way Out of Slow Models">
<meta name="twitter:url" content="https://anotherdatum.com/embark.html">
<meta name="twitter:image:src" content="https://anotherdatum.com/images/embark/cover.jpg">
<meta name="twitter:description" content="So you just finished designing that great neural network architecture. But how do you handle the fact it is slow?">
<script type="application/ld+json">
{
"@context": "http://schema.org",
"@type": "Article",
"name": "How to Engineer Your Way Out of Slow Models",
"headline": "How to Engineer Your Way Out of Slow Models",
"datePublished": "2018-10-28 23:00:00+02:00",
"dateModified": "",
"author": {
"@type": "Person",
"name": "Yoel Zeldes",
"url": "https://anotherdatum.com/author/yoel-zeldes.html"
},
"image": "https://anotherdatum.com/images/embark/cover.jpg",
"url": "https://anotherdatum.com/embark.html",
"description": "So you just finished designing that great neural network architecture. But how do you handle the fact it is slow?"
}
</script>
</head>
<!-- TODO : Body class -->
<body class="home-template">
<nav id="menu">
<a class="close-button">Close</a>
<div class="nav-wrapper">
<p class="nav-label">Menu</p>
<ul>
<li><a href="https://anotherdatum.com" role="presentation">Posts</a></li>
<li role="presentation"><a href="https://anotherdatum.com/pages/about.html">about me</a></li>
<li role="presentation"><a href="https://anotherdatum.com/pages/resources.html">Resources</a></li>
</ul>
</div>
</nav>
<!-- Progressbar -->
<div class="progress-container">
<span class="progress-bar"></span>
</div>
<!-- Page Header -->
<!-- Set your background image for this header on the line below. -->
<header id="post-header" class="has-cover">
<div class="inner">
<nav id="navigation">
<span id="home-button" class="nav-button">
<a class="home-button" href="https://anotherdatum.com/" title="Home"><i class="ic ic-arrow-left"></i> Home</a>
</span>
<span id="menu-button" class="nav-button">
<a class="menu-button"><i class="ic ic-menu"></i> Menu</a>
</span>
</nav>
<h1 class="post-title">How to Engineer Your Way Out of Slow Models</h1>
<!-- TODO : Proper class for headline -->
<span class="post-meta">
<time datetime="28 October 2018">28 October 2018</time>
</span>
<!-- TODO : Modified check -->
<div class="post-cover cover" style="background-image: url('https://anotherdatum.com/images/embark/cover.jpg')">
</div>
</header>
<section id="wrapper">
<a class="hidden-close"></a>
<!-- Post content -->
<main class="content" role="main">
<article class="post">
<div class="inner">
<section class="post-content">
<p>So you just finished designing that great neural network architecture of yours.
It has a blazing number of 300 fully connected layers interleaved with 200
<a href="https://en.wikipedia.org/wiki/Convolutional_neural_network#Convolutional">convolutional
layers</a>
with 20 channels each, where the result is fed as the seed of a glorious
<a href="https://en.wikipedia.org/wiki/Bidirectional_recurrent_neural_networks">bidirectional</a>
<a href="https://machinelearningmastery.com/stacked-long-short-term-memory-networks/">stacked</a>
<a href="http://colah.github.io/posts/2015-08-Understanding-LSTMs/">LSTM</a> with a pinch
of
<a href="http://www.wildml.com/2016/01/attention-and-memory-in-deep-learning-and-nlp/">attention</a>.
After training you get an accuracy of 99.99%, and you’re ready to ship it to
production.</p>
<p>But then you realize the production constraints won’t allow you to run inference
using this beast. You need the inference to be done in under 200 milliseconds.</p>
<p>In other words, you need to chop off half of the layers, give up on using
convolutions, and let’s not get started about the costly LSTM...</p>
<p>If only you could make that amazing model faster!</p>
<p><img alt="" src="images/embark/sad.jpg"></p>
<h1>Sometimes you can</h1>
<p>Here at Taboola we did it. Well, not exactly... Let me explain.</p>
<p>One of our models has to predict CTR (Click Through Rate) of an item, or in
other words — the probability the user will like an article recommendation and
click on it.</p>
<p>The model has multiple modalities as input, each goes through a different
transformation. Some of them are:</p>
<ul>
<li>categorical features: these are
<a href="https://engineering.taboola.com/using-word2vec-better-embeddings-categorical-features/">embedded</a>
into a dense representation</li>
<li>image: the pixels are passed through convolutional and fully connected layers</li>
<li>text: after being tokenized, the text is passed through a LSTM which is followed
by <a href="https://arxiv.org/abs/1703.03130">self attention</a></li>
</ul>
<p>These processed modalities are then passed through fully connected layers in
order to learn the interactions between the modalities, and finally, they are
passed through a
<a href="https://engineering.taboola.com/uncertainty-ctr-prediction-one-model-clarify">MDN</a>
layer.</p>
<p>As you can imagine, this model is slow.</p>
<p>We decided to insist on the predictive power of the model, instead of trimming
components, and came up with an engineering solution.</p>
<h1>Cache me if you can</h1>
<p>Let’s focus on the image component. The output of this component is a learned
representation of the image. In other words, given an image, the image component
outputs an embedding.</p>
<p>The model is deterministic, so given the same image will result with the same
embedding. This is costly, so we can cache it. Let me elaborate on how we
implemented it.</p>
<h1>The architecture (of the cache, not the model)</h1>
<p><img alt="" src="images/embark/architecture.png"></p>
<ul>
<li>We used a <a href="http://cassandra.apache.org/">Cassandra</a> database as the cache which
maps an image URL to its embedding.</li>
<li>The service which queries Cassandra is called EmbArk (Embedding Archive,
<a href="https://techcrunch.com/2017/05/20/the-bizarre-naming-trends-that-modern-startups-follow/">misspelled of
course</a>).
It’s a <a href="https://grpc.io/">gRPC</a> server which gets an image URL from a client and
retrieves the embedding from Cassandra. On cache miss EmbArk sends an async
request to embed that image. Why async? Because we need EmbArk to respond with
the result as fast as it can. Given it can’t wait for the image to be embedded,
it returns a special OOV (Out Of Vocabulary) embedding.</li>
<li>The async mechanism we chose to use is <a href="https://kafka.apache.org/">Kafka</a> — a
streaming platform used as a message queue.</li>
<li>The next link is KFC (Kafka Frontend Client) — a Kafka consumer we implemented
to pass messages synchronously to the embedding service, and save the resulting
embeddings in Cassandra.</li>
<li>The embedding service is called Retina. It gets an image URL from KFC, downloads
it, preprocesses it, and evaluates the convolutional layers to get the final
embedding.</li>
<li>The load balancing of all the components is done using
<a href="https://linkerd.io/">Linkerd</a>.</li>
<li>EmbArk, KFC, Retina and Linkerd run inside <a href="https://www.docker.com/">Docker</a>,
and they are orchestrated by <a href="https://www.nomadproject.io/">Nomad</a>. This allows
us to easily scale each component as we see fit.</li>
</ul>
<p>This architecture was initially used for images. After proving its worth, we
decided to use it for other components as well, such as text.</p>
<p>EmbArk proved to be a nice solution for <a href="https://arxiv.org/abs/1403.6382">transfer
learning</a> too. Let’s say we believe the content
of the image has a good signal for predicting CTR. Thus, a model trained for
classifying the object in an image such as
<a href="https://ai.googleblog.com/2016/03/train-your-own-image-classifier-with.html">Inception</a>
would be valuable for our needs. We can load Inception into Retina, tell the
model we intend to train that we want to use Inception embedding, and that’s it.</p>
<p>Not only that the inference time was improved, but also the training process.
This is possible only when we don’t want to train end to end, since gradients
can’t backpropagate through EmbArk.</p>
<p>So whenever you use a model in production you should use EmbArk, right? Well,
not always...</p>
<p><img alt="" src="images/embark/nope.jpg"></p>
<h1>Caveats</h1>
<p>There are three pretty strict assumptions here.</p>
<h3>1. OOV embedding for new inputs is not a big deal</h3>
<p>It doesn’t hurt us that the first time we see an image we won’t have its
embedding.</p>
<p>In our production system it’s ok, since CTR is evaluated multiple times for the
same item during a short period of time. We create lists of items we want to
recommend every few minutes, so even if an item won’t make it into the list
because of non optimal CTR prediction, it will in the next cycle.</p>
<h3>2. The rate of new inputs is low</h3>
<p>It’s true that in Taboola we get lots of new items all the time. But relative to
the number of inferences we need to perform for already known items are not that
much.</p>
<h3>3. Embeddings don’t change frequently</h3>
<p>Since the embeddings are cached, we count on the fact they don’t change over
time. If they do, we’ll need to perform cache invalidation, and recalculate the
embeddings using Retina. If this would happen a lot we would lose the advantage
of the architecture. For cases such as inception or language modeling, this
assumption holds, since semantics don’t change significantly over time.</p>
<h1>Some final thoughts</h1>
<p>Sometimes using state of the art models can be problematic due to their
computational demands. By caching intermediate results (embeddings) we were able
to overcome this challenge, and still enjoy state of the art results.</p>
<p>This solution isn’t right for everyone, but if the three aforementioned
assumptions hold for your application, you could consider using a similar
architecture.</p>
<p>By using a microservices paradigm, other teams in the company were able to use
EmbArk for needs other than CTR prediction. One team for instance used EmbArk to
get image and text embeddings for detecting duplicates across different items.
But I’ll leave that story for another post...</p>
<hr>
<p><em>Originally published by me at
<a href="https://engineering.taboola.com/engineer-way-slow-models">engineering.taboola.com</a>.</em></p>
</section>
<section class="post-info">
<div class="post-share">
<a class="twitter" href="https://twitter.com/share?text=How to Engineer Your Way Out of Slow Models&url=https://anotherdatum.com/embark.html" onclick="window.open(this.href, 'twitter-share', 'width=550,height=235');return false;">
<i class="ic ic-twitter"></i><span class="hidden">Twitter</span>
</a>
<a class="facebook" href="https://www.facebook.com/sharer/sharer.php?u=https://anotherdatum.com/embark.html" onclick="window.open(this.href, 'facebook-share','width=580,height=296');return false;">
<i class="ic ic-facebook"></i><span class="hidden">Facebook</span>
</a>
<div class="clear"></div>
</div>
<aside class="post-tags">
<a href="https://anotherdatum.com/tag/deep-learning.html">deep learning</a><a href="https://anotherdatum.com/tag/architecture.html">architecture</a> </aside>
<div class="clear"></div>
</section>
<!-- Begin MailChimp Signup Form -->
<link href="//cdn-images.mailchimp.com/embedcode/classic-10_7.css" rel="stylesheet" type="text/css">
<style type="text/css">
#mc_embed_signup{background:#fff; clear:left; font:14px Helvetica,Arial,sans-serif; width:300px;}
#mc_embed_signup form{padding: 0;}
/* Add your own MailChimp form style overrides in your site stylesheet or in this style block.
We recommend moving this block and the preceding CSS link to the HEAD of your HTML file. */
</style>
<div id="mc_embed_signup">
<form action="https://anotherdatum.us14.list-manage.com/subscribe/post?u=6894d7badcfb253606fa3fb54&id=c6f34ad6b7" method="post" id="mc-embedded-subscribe-form" name="mc-embedded-subscribe-form" class="validate" target="_blank" novalidate>
<div id="mc_embed_signup_scroll">
<h2>Get updated of new posts</h2>
<div class="mc-field-group">
<label for="mce-EMAIL">Email Address </label>
<input type="email" value="" name="EMAIL" class="required email" id="mce-EMAIL">
</div>
<div id="mce-responses" class="clear">
<div class="response" id="mce-error-response" style="display:none"></div>
<div class="response" id="mce-success-response" style="display:none"></div>
</div> <!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
<div style="position: absolute; left: -5000px;" aria-hidden="true"><input type="text" name="b_6894d7badcfb253606fa3fb54_c6f34ad6b7" tabindex="-1" value=""></div>
<div class="clear"><input type="submit" value="Subscribe" name="subscribe" id="mc-embedded-subscribe" class="button"></div>
</div>
</form>
</div>
<script type='text/javascript' src='//s3.amazonaws.com/downloads.mailchimp.com/js/mc-validate.js'></script><script type='text/javascript'>(function($) {window.fnames = new Array(); window.ftypes = new Array();fnames[0]='EMAIL';ftypes[0]='email';fnames[1]='FNAME';ftypes[1]='text';fnames[2]='LNAME';ftypes[2]='text';}(jQuery));var $mcj = jQuery.noConflict(true);</script>
<!--End mc_embed_signup-->
<hr />
<aside class="post-nav">
<a class="post-nav-next" href="https://anotherdatum.com/vae2.html">
<section class="post-nav-teaser">
<i class="ic ic-arrow-left"></i>
<h2 class="post-nav-title">Variational Autoencoders Explained in Detail</h2>
<p class="post-nav-excerpt">Learn all the details needed to implement a variational autoencoder, code included.</p>
</section>
</a>
<a class="post-nav-prev" href="https://anotherdatum.com/taboola-hackathon-2018.html">
<section class="post-nav-teaser">
<i class="ic ic-arrow-right"></i>
<h2 class="post-nav-title">Zooming Past the Competition</h2>
<p class="post-nav-excerpt">How to create an Augmented Reality app that allows a user to get content recommendations.</p>
</section>
</a>
<div class="clear"></div>
</aside>
<div class="comments">
<h2>Comments !</h2>
<div id="disqus_thread"></div>
<script type="text/javascript">
var disqus_shortname = 'anotherdatum';
var disqus_identifier = 'embark.html';
var disqus_url = 'https://anotherdatum.com/embark.html';
(function() {
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
dsq.src = '//anotherdatum.disqus.com/embed.js';
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
})();
</script>
<noscript>Please enable JavaScript to view the comments.</noscript>
</div>
</div>
</article>
</main>
<!-- TODO : Body class -->
<div id="body-class" style="display: none;" class=""></div>
<footer id="footer">
<div class="social">
<a href="https://il.linkedin.com/in/yoelzeldes">
<span class="fa-stack fa-lg">
<i class="fa fa-circle fa-stack-2x"></i>
<i class="fa fa-linkedin fa-stack-1x fa-inverse"></i>
</span>
</a>
<a href="https://github.com/yoel-zeldes">
<span class="fa-stack fa-lg">
<i class="fa fa-circle fa-stack-2x"></i>
<i class="fa fa-github fa-stack-1x fa-inverse"></i>
</span>
</a>
<a href="https://www.facebook.com/yoel.zeldes">
<span class="fa-stack fa-lg">
<i class="fa fa-circle fa-stack-2x"></i>
<i class="fa fa-facebook fa-stack-1x fa-inverse"></i>
</span>
</a>
<a href="https://twitter.com/YZeldes">
<span class="fa-stack fa-lg">
<i class="fa fa-circle fa-stack-2x"></i>
<i class="fa fa-twitter fa-stack-1x fa-inverse"></i>
</span>
</a>
</div>
<div class="inner">
<section class="credits">
<span class="credits-theme">Have a look at <a href="https://github.com/yoel-zeldes/yoel-zeldes.github.io/tree/source">the source code</a> of this blog.</span>
</section>
</div>
</footer>
</section>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"></script>
<script type="text/javascript" src="https://anotherdatum.com/theme/js/script.js"></script>
<!-- Global Site Tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-83684090-1"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-83684090-1', { 'anonymize_ip': true });
</script>
<script type="text/javascript">
var disqus_shortname = 'anotherdatum';
(function () {
var s = document.createElement('script'); s.async = true;
s.type = 'text/javascript';
s.src = '//' + disqus_shortname + '.disqus.com/count.js';
(document.getElementsByTagName('HEAD')[0] || document.getElementsByTagName('BODY')[0]).appendChild(s);
}());
</script>
</body>
</html>