-
Notifications
You must be signed in to change notification settings - Fork 54
/
Copy pathBroadcasting.html
892 lines (825 loc) · 111 KB
/
Broadcasting.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8" /><meta content="Topic: Numpy array broadcasting, Difficulty: Medium, Category: Section" name="description" />
<meta content="broadcasting, vectorization, rules, mismatched shapes, distances" name="keywords" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Array Broadcasting — Python Like You Mean It</title>
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="../_static/my_theme.css" type="text/css" />
<!--[if lt IE 9]>
<script src="../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
<script src="../_static/jquery.js"></script>
<script src="../_static/underscore.js"></script>
<script src="../_static/doctools.js"></script>
<script async="async" src="https://www.googletagmanager.com/gtag/js?id=UA-115029372-1"></script>
<script src="../_static/gtag.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script>window.MathJax = {"tex": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true}, "options": {"ignoreHtmlClass": "tex2jax_ignore|mathjax_ignore|document", "processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
<script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
<script src="../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="Introducing Basic and Advanced Indexing" href="BasicIndexing.html" />
<link rel="prev" title="“Vectorized” Operations: Optimized Computations on NumPy Arrays" href="VectorizedOperations.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../index.html" class="icon icon-home"> Python Like You Mean It
</a>
<div class="version">
1.4
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Table of Contents:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../intro.html">Python Like You Mean It</a></li>
<li class="toctree-l1"><a class="reference internal" href="../module_1.html">Module 1: Getting Started with Python</a></li>
<li class="toctree-l1"><a class="reference internal" href="../module_2.html">Module 2: The Essentials of Python</a></li>
<li class="toctree-l1"><a class="reference internal" href="../module_2_problems.html">Module 2: Problems</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="../module_3.html">Module 3: The Essentials of NumPy</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="IntroducingTheNDarray.html">Introducing the ND-array</a></li>
<li class="toctree-l2"><a class="reference internal" href="AccessingDataAlongMultipleDimensions.html">Accessing Data Along Multiple Dimensions in an Array</a></li>
<li class="toctree-l2"><a class="reference internal" href="BasicArrayAttributes.html">Basic Array Attributes</a></li>
<li class="toctree-l2"><a class="reference internal" href="FunctionsForCreatingNumpyArrays.html">Functions for Creating NumPy Arrays</a></li>
<li class="toctree-l2"><a class="reference internal" href="ArrayTraversal.html">Iterating Over Arrays & Array-Traversal Order</a></li>
<li class="toctree-l2"><a class="reference internal" href="VectorizedOperations.html">“Vectorized” Operations: Optimized Computations on NumPy Arrays</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Array Broadcasting</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#Rules-of-Broadcasting">Rules of Broadcasting</a></li>
<li class="toctree-l3"><a class="reference internal" href="#A-Simple-Application-of-Array-Broadcasting">A Simple Application of Array Broadcasting</a></li>
<li class="toctree-l3"><a class="reference internal" href="#Size-1-Axes-&-The-newaxis-Object">Size-1 Axes & The <code class="docutils literal notranslate"><span class="pre">newaxis</span></code> Object</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#Inserting-Size-1-Dimensions-into-An-Array">Inserting Size-1 Dimensions into An Array</a></li>
<li class="toctree-l4"><a class="reference internal" href="#Utilizing-Size-1-Dimensions-for-Broadcasting">Utilizing Size-1 Dimensions for Broadcasting</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#An-Advanced-Application-of-Broadcasting:-Pairwise-Distances">An Advanced Application of Broadcasting: Pairwise Distances</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#Pairwise-Distances-Using-For-Loops">Pairwise Distances Using For-Loops</a></li>
<li class="toctree-l4"><a class="reference internal" href="#Pairwise-Distances-Using-Broadcasting-(Unoptimized)">Pairwise Distances Using Broadcasting (Unoptimized)</a></li>
<li class="toctree-l4"><a class="reference internal" href="#Optimized-Pairwise-Distances">Optimized Pairwise Distances</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#Links-to-Official-Documentation">Links to Official Documentation</a></li>
<li class="toctree-l3"><a class="reference internal" href="#Reading-Comprehension-Solutions">Reading Comprehension Solutions</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="BasicIndexing.html">Introducing Basic and Advanced Indexing</a></li>
<li class="toctree-l2"><a class="reference internal" href="AdvancedIndexing.html">Advanced Indexing</a></li>
<li class="toctree-l2"><a class="reference internal" href="AutoDiff.html">Automatic Differentiation</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../module_3_problems.html">Module 3: Problems</a></li>
<li class="toctree-l1"><a class="reference internal" href="../module_4.html">Module 4: Object Oriented Programming</a></li>
<li class="toctree-l1"><a class="reference internal" href="../module_5.html">Module 5: Odds and Ends</a></li>
<li class="toctree-l1"><a class="reference internal" href="../changes.html">Changelog</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../index.html">Python Like You Mean It</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../index.html" class="icon icon-home"></a> »</li>
<li><a href="../module_3.html">Module 3: The Essentials of NumPy</a> »</li>
<li>Array Broadcasting</li>
<li class="wy-breadcrumbs-aside">
<a href="../_sources/Module3_IntroducingNumpy/Broadcasting.md.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<style>
/* CSS overrides for sphinx_rtd_theme */
/* 24px margin */
.nbinput.nblast.container,
.nboutput.nblast.container {
margin-bottom: 19px; /* padding has already 5px */
}
/* ... except between code cells! */
.nblast.container + .nbinput.container {
margin-top: -19px;
}
.admonition > p:before {
margin-right: 4px; /* make room for the exclamation icon */
}
/* Fix math alignment, see https://github.com/rtfd/sphinx_rtd_theme/pull/686 */
.math {
text-align: unset;
}
</style>
<div class="section" id="Array-Broadcasting">
<h1>Array Broadcasting<a class="headerlink" href="#Array-Broadcasting" title="Permalink to this headline"></a></h1>
<p>NumPy provides a mechanism for performing mathematical operations on arrays of <em>unequal</em> shapes:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="go"># a shape-(3, 4) array</span>
<span class="gp">>>> </span><span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="o">-</span><span class="mf">0.</span> <span class="p">,</span> <span class="o">-</span><span class="mf">0.1</span><span class="p">,</span> <span class="o">-</span><span class="mf">0.2</span><span class="p">,</span> <span class="o">-</span><span class="mf">0.3</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="o">-</span><span class="mf">0.4</span><span class="p">,</span> <span class="o">-</span><span class="mf">0.5</span><span class="p">,</span> <span class="o">-</span><span class="mf">0.6</span><span class="p">,</span> <span class="o">-</span><span class="mf">0.7</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="o">-</span><span class="mf">0.8</span><span class="p">,</span> <span class="o">-</span><span class="mf">0.9</span><span class="p">,</span> <span class="o">-</span><span class="mf">1.</span> <span class="p">,</span> <span class="o">-</span><span class="mf">1.1</span><span class="p">]])</span>
<span class="go"># a shape-(4,) array</span>
<span class="gp">>>> </span><span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
<span class="go"># multiplying a shape-(4,) array with a shape-(3, 4) array</span>
<span class="go"># `y` is multiplied by each row of `x`</span>
<span class="gp">>>> </span><span class="n">x</span> <span class="o">*</span> <span class="n">y</span>
<span class="go">array([[-0. , -0.2, -0.6, -1.2],</span>
<span class="go"> [-0.4, -1. , -1.8, -2.8],</span>
<span class="go"> [-0.8, -1.8, -3. , -4.4]])</span>
</pre></div>
</div>
<p>In effect, NumPy treated <code class="docutils literal notranslate"><span class="pre">y</span></code> as if its contents had been broadcasted along a new dimension, such that <code class="docutils literal notranslate"><span class="pre">y</span></code> was a shape-(3, 4) 2D array, which makes it compatible for multiplying with <code class="docutils literal notranslate"><span class="pre">x</span></code>:</p>
<div class="math notranslate nohighlight">
\begin{equation}
\left( \begin{array}{*{3}{X}}
-0.0 & -0.1 & -0.2 & -0.3 \\
-0.4 & -0.5 & -0.6 & -0.7 \\
-0.8 & -0.9 & -1.0 & -1.1
\end{array} \right)
%
\cdot \left( \begin{array}{*{3}{X}}
1 & 2 & 3 & 4
\end{array}\right)
%
\rightarrow \left( \begin{array}{*{3}{X}}
-0.0 & -0.1 & -0.2 & -0.3 \\
-0.4 & -0.5 & -0.6 & -0.7 \\
-0.8 & -0.9 & -1.0 & -1.1
\end{array} \right)
%
\cdot \left( \begin{array}{*{3}{X}}
1 & 2 & 3 & 4 \\
1 & 2 & 3 & 4 \\
1 & 2 & 3 & 4
\end{array}\right)
\
\end{equation}</div><p>It is important to note that NumPy doesn’t really create this broadcasted version of <code class="docutils literal notranslate"><span class="pre">y</span></code> behind the scenes; it is able to do the necessary computations without having to redundantly copy its contents into a shape-(3,4) array. Doing so would be a waste of memory and computation. That being said, this replication process conveys exactly the mathematics of broadcast operations between arrays; thus the preceding diagram reflects how you should always envision broadcasting.</p>
<p>Broadcasting is not reserved for operations between 1-D and 2-D arrays, and furthermore both arrays in an operation may undergo broadcasting. That being said, not all pairs of arrays are broadcast-compatible.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Broadcast multiplications between a</span>
<span class="c1"># shape-(3, 1, 2) array and a shape-(3, 1)</span>
<span class="c1"># array.</span>
<span class="o">>>></span> <span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">]],</span>
<span class="o">...</span>
<span class="o">...</span> <span class="p">[[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">]],</span>
<span class="o">...</span>
<span class="o">...</span> <span class="p">[[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">]]])</span>
<span class="o">>>></span> <span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span> <span class="mi">0</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mi">1</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]])</span>
<span class="c1"># shape-(3, 1, 2) broadcast-multiply with</span>
<span class="c1"># shape-(3, 1) produces shape-(3, 3, 2)</span>
<span class="o">>>></span> <span class="n">x</span> <span class="o">*</span> <span class="n">y</span>
<span class="n">array</span><span class="p">([[[</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span>
<span class="p">[</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span>
<span class="p">[</span> <span class="mi">0</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">]],</span>
<span class="p">[[</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span>
<span class="p">[</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span>
<span class="p">[</span><span class="o">-</span><span class="mi">2</span><span class="p">,</span> <span class="o">-</span><span class="mi">3</span><span class="p">]],</span>
<span class="p">[[</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span>
<span class="p">[</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">],</span>
<span class="p">[</span><span class="o">-</span><span class="mi">4</span><span class="p">,</span> <span class="o">-</span><span class="mi">5</span><span class="p">]]])</span>
<span class="c1"># an example of broadcast-incompatible arrays</span>
<span class="c1"># a shape-(2,) array with a shape-(3,) array</span>
<span class="o">>>></span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span> <span class="o">*</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span>
<span class="ne">ValueError</span><span class="p">:</span> <span class="n">operands</span> <span class="n">could</span> <span class="ow">not</span> <span class="n">be</span> <span class="n">broadcast</span> <span class="n">together</span> <span class="k">with</span> <span class="n">shapes</span> <span class="p">(</span><span class="mi">2</span><span class="p">,)</span> <span class="p">(</span><span class="mi">3</span><span class="p">,)</span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title fa fa-exclamation-circle"><strong>Definition: Array Broadcasting</strong></p>
<p>Array Broadcasting is a mechanism used by NumPy to permit vectorized mathematical operations between arrays of unequal, but compatible shapes. Specifically, an array will be treated as if its contents have been replicated along the appropriate dimensions, such that the shape of this new, higher-dimensional array suits the mathematical operation being performed.</p>
</div>
<p>We will now summarize the rules that determine if two arrays are broadcast-compatible with one another, and what the shape of the resulting array will be after the mathematical operation between the two arrays is performed.</p>
<div class="section" id="Rules-of-Broadcasting">
<h2>Rules of Broadcasting<a class="headerlink" href="#Rules-of-Broadcasting" title="Permalink to this headline"></a></h2>
<p>Array broadcasting cannot accommodate arbitrary combinations of array shapes. For example, a (7,5)-shape array is incompatible with a shape-(11,3) array. Trying to add two such arrays would produce a <code class="docutils literal notranslate"><span class="pre">ValueError</span></code>. The following rules determine if two arrays are broadcast-compatible:</p>
<div class="admonition warning">
<p class="admonition-title fa fa-exclamation-circle"><strong>Definition: Rules of Broadcasting</strong>:</p>
<p>To determine if two arrays are broadcast-compatible, align the entries of their shapes such that their trailing dimensions are aligned, and then check that each pair of aligned dimensions satisfy either of the following conditions:</p>
<ul class="simple">
<li><p>the aligned dimensions have the same size</p></li>
<li><p>one of the dimensions has a size of 1</p></li>
</ul>
<p>The two arrays are broadcast-compatible if either of these conditions are satisfied for each pair of aligned dimensions.</p>
</div>
<p>Note that it is okay to have one array with a higher-dimensionality and thus to have “dangling” leading dimensions. Any size-1 dimension or “missing” dimension will be filled-in by broadcasting the content of that array.</p>
<p>Considering the example from the preceding subsection, let’s see that the shape-(4,3) and shape-(3,) arrays satisfy these rules for broadcast-compatibility:</p>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span> array-1: 4 x 3
array-2: 3
result-shape: 4 x 3
</pre></div>
</div>
<p>Let’s look an assortment of pairs of array-shapes and see whether or not they are broadcast-compatible:</p>
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span> array-1: 8
array-2: 5 x 2 x 8
result-shape: 5 x 2 x 8
array-1: 5 x 2
array-2: 5 x 4 x 2
result-shape: INCOMPATIBLE
array-1: 4 x 2
array-2: 5 x 4 x 2
result-shape: 5 x 4 x 2
array-1: 8 x 1 x 3
array-2: 8 x 5 x 3
result-shape: 8 x 5 x 3
array-1: 5 x 1 x 3 x 2
array-2: 9 x 1 x 2
result-shape: 5 x 9 x 3 x 2
array-1: 1 x 3 x 2
array-2: 8 x 2
result-shape: INCOMPATIBLE
array-1: 2 x 1
array-2: 1
result-shape: 2 x 1
</pre></div>
</div>
<p>NumPy provides the function <a class="reference external" href="https://numpy.org/doc/stable/reference/generated/numpy.broadcast_to.html#numpy.broadcast_to">broadcast_to</a>, which can be used to broadcast an array to a specified shape. This can help us build our intuition for broadcasting. Let’s broadcast a shape-(3,4) array to a shape-(2,3,4) array:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Demonstrating `np.broadcast_to`</span>
<span class="o">>>></span> <span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">11</span><span class="p">]])</span>
<span class="c1"># Explicitly broadcast a shape-(3,4) array</span>
<span class="c1"># to a shape-(2,3,4) array.</span>
<span class="o">>>></span> <span class="n">np</span><span class="o">.</span><span class="n">broadcast_to</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">))</span>
<span class="n">array</span><span class="p">([[[</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span>
<span class="p">[</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">],</span>
<span class="p">[</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">11</span><span class="p">]],</span>
<span class="p">[[</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span>
<span class="p">[</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">],</span>
<span class="p">[</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">11</span><span class="p">]]])</span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title fa fa-exclamation-circle"><strong>Reading Comprehension: Broadcast Compatibility</strong></p>
<p>Given the following pairs of array-shapes, determine what the resulting broadcasted shapes will be. Indicate if a pair is broadcast-incompatible.</p>
<ol class="arabic simple">
<li><p><code class="docutils literal notranslate"><span class="pre">7</span> <span class="pre">x</span> <span class="pre">2</span></code> with <code class="docutils literal notranslate"><span class="pre">7</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">4</span></code> with <code class="docutils literal notranslate"><span class="pre">3</span> <span class="pre">x</span> <span class="pre">4</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">1</span> <span class="pre">x</span> <span class="pre">3</span> <span class="pre">x</span> <span class="pre">1</span></code> with <code class="docutils literal notranslate"><span class="pre">8</span> <span class="pre">x</span> <span class="pre">1</span> <span class="pre">x</span> <span class="pre">1</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">9</span> <span class="pre">x</span> <span class="pre">2</span> <span class="pre">x</span> <span class="pre">5</span></code> with <code class="docutils literal notranslate"><span class="pre">2</span> <span class="pre">x</span> <span class="pre">5</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">3</span></code> with <code class="docutils literal notranslate"><span class="pre">3</span> <span class="pre">x</span> <span class="pre">3</span> <span class="pre">x</span> <span class="pre">2</span></code></p></li>
</ol>
</div>
</div>
<div class="section" id="A-Simple-Application-of-Array-Broadcasting">
<h2>A Simple Application of Array Broadcasting<a class="headerlink" href="#A-Simple-Application-of-Array-Broadcasting" title="Permalink to this headline"></a></h2>
<p>Here we provide a simple real-world example where broadcasting is useful. Suppose you have a grade book for 6 students, each of whom have taken 3 exams; naturally, you store these scores in a shape-(6,3) array:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># grades for 6 students who have taken 3 exams</span>
<span class="c1"># axis-0 (rows): student</span>
<span class="c1"># axis-1 (columns): exams</span>
<span class="o">>>></span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="o">>>></span> <span class="n">grades</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span> <span class="mf">0.79</span><span class="p">,</span> <span class="mf">0.84</span><span class="p">,</span> <span class="mf">0.84</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mf">0.87</span><span class="p">,</span> <span class="mf">0.93</span><span class="p">,</span> <span class="mf">0.78</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mf">0.77</span><span class="p">,</span> <span class="mf">1.00</span><span class="p">,</span> <span class="mf">0.87</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mf">0.66</span><span class="p">,</span> <span class="mf">0.75</span><span class="p">,</span> <span class="mf">0.82</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mf">0.84</span><span class="p">,</span> <span class="mf">0.89</span><span class="p">,</span> <span class="mf">0.76</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mf">0.83</span><span class="p">,</span> <span class="mf">0.71</span><span class="p">,</span> <span class="mf">0.85</span><span class="p">]])</span>
</pre></div>
</div>
<p>We might be interested to see how each of these scores compare to the mean score for that specific exam. Based on our discussion from the last section, we can easily compute the mean-score for each exam (rounded to 2 decimal places):</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># compute the mean score for each exam (rounded to 2 decimal places)</span>
<span class="o">>>></span> <span class="n">mean_exam_scores</span> <span class="o">=</span> <span class="n">grades</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="o">>>></span> <span class="n">mean_exam_scores</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="n">mean_exam_scores</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span>
<span class="o">>>></span> <span class="n">mean_exam_scores</span>
<span class="n">array</span><span class="p">([</span> <span class="mf">0.79</span><span class="p">,</span> <span class="mf">0.85</span><span class="p">,</span> <span class="mf">0.82</span><span class="p">])</span>
</pre></div>
</div>
<p><code class="docutils literal notranslate"><span class="pre">grades</span></code> is a shape-(6,3) array and <code class="docutils literal notranslate"><span class="pre">mean_exam_scores</span></code> is a shape-(3,) array, and we want to compute the offset of each exam score from its respective mean. At first glance, it seems like we will have to loop over each row of our <code class="docutils literal notranslate"><span class="pre">grades</span></code> array and subtract from it the <code class="docutils literal notranslate"><span class="pre">mean_exam_scores</span></code>, to compute the offset of each exam score from the respective mean-score:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Using a for-loop to compute score offsets.</span>
<span class="c1"># Shape-(6,3) array that will store (score - mean) for each</span>
<span class="c1"># exam score.</span>
<span class="n">score_offset</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros_like</span><span class="p">(</span><span class="n">grades</span><span class="p">)</span>
<span class="c1"># iterates over each row of `grades`</span>
<span class="k">for</span> <span class="n">n</span><span class="p">,</span> <span class="n">scores_per_student</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">grades</span><span class="p">):</span>
<span class="c1"># `scores_per_student` is a shape-(3,) array of exam scores</span>
<span class="c1"># for a given student. This matches the shape of</span>
<span class="c1"># `mean_exam_scores`, thus we can perform this subtraction</span>
<span class="n">score_offset</span><span class="p">[</span><span class="n">n</span><span class="p">]</span> <span class="o">=</span> <span class="n">scores_per_student</span> <span class="o">-</span> <span class="n">mean_exam_scores</span>
</pre></div>
</div>
<p>Given our discussion of vectorized operations from the last section, you should recoil at the sight of a for-loop in code that is performing array-arithmetic. We might as well get out our abacuses and spreadsheets at this point. Fortunately, we can make use of broadcasting to compute these offsets in a concise, vectorized way:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Using broadcasting to subtract a shape-(3,) array</span>
<span class="c1"># from a shape-(6,3) array.</span>
<span class="o">>>></span> <span class="n">score_offset</span> <span class="o">=</span> <span class="n">grades</span> <span class="o">-</span> <span class="n">mean_exam_scores</span>
<span class="o">>>></span> <span class="n">score_offset</span>
<span class="n">array</span><span class="p">([[</span> <span class="mf">0.</span> <span class="p">,</span> <span class="o">-</span><span class="mf">0.01</span><span class="p">,</span> <span class="mf">0.02</span><span class="p">],</span>
<span class="p">[</span> <span class="mf">0.08</span><span class="p">,</span> <span class="mf">0.08</span><span class="p">,</span> <span class="o">-</span><span class="mf">0.04</span><span class="p">],</span>
<span class="p">[</span><span class="o">-</span><span class="mf">0.02</span><span class="p">,</span> <span class="mf">0.15</span><span class="p">,</span> <span class="mf">0.05</span><span class="p">],</span>
<span class="p">[</span><span class="o">-</span><span class="mf">0.13</span><span class="p">,</span> <span class="o">-</span><span class="mf">0.1</span> <span class="p">,</span> <span class="mf">0.</span> <span class="p">],</span>
<span class="p">[</span> <span class="mf">0.05</span><span class="p">,</span> <span class="mf">0.04</span><span class="p">,</span> <span class="o">-</span><span class="mf">0.06</span><span class="p">],</span>
<span class="p">[</span> <span class="mf">0.04</span><span class="p">,</span> <span class="o">-</span><span class="mf">0.14</span><span class="p">,</span> <span class="mf">0.03</span><span class="p">]])</span>
</pre></div>
</div>
<p>According to the broadcasting rules detailed above, when you invoke <code class="docutils literal notranslate"><span class="pre">grades</span> <span class="pre">-</span> <span class="pre">mean_exam_scores</span></code>, NumPy will recognize that <code class="docutils literal notranslate"><span class="pre">mean_exam_scores</span></code> has the same shape as each row of <code class="docutils literal notranslate"><span class="pre">grades</span></code> and thus it will apply the subtraction operation on <em>each</em> row of <code class="docutils literal notranslate"><span class="pre">grades</span></code> with <code class="docutils literal notranslate"><span class="pre">mean_exam_scores</span></code>. In effect, the content of <code class="docutils literal notranslate"><span class="pre">mean_exam_scores</span></code> has been <em>broadcasted</em> to fill a shape-(6,3) array, so that the element-wise subtraction can be performed. Again, we emphasize that NumPy doesn’t actually
unnecessarily replicate the data of <code class="docutils literal notranslate"><span class="pre">mean_exam_scores</span></code>, and that this model of broadcasting merely conveys the mathematical process that is transpiring.</p>
<div class="admonition note">
<p class="admonition-title fa fa-exclamation-circle"><strong>Reading Comprehension: Basic Broadcasting</strong></p>
<p>Generate a random array of 10,000 2D points using <code class="docutils literal notranslate"><span class="pre">np.random.rand</span></code>. Compute the “center of mass” of these points, which is simply the average x-coordinate and the average y-coordinate of these 10,000 points. Then, use broadcasting to compute the shape-(10000,2) array that stores the position of the points <em>relative</em> to the center of mass. For example, if the center of mass is <span class="math notranslate nohighlight">\((0.5, 1)\)</span>, and the absolute position of a point is <span class="math notranslate nohighlight">\((2, 3)\)</span>, then the position of that point <em>relative</em> to
the center of mass is simply <span class="math notranslate nohighlight">\((2, 3) - (0.5, 1) = (1.5, 2)\)</span></p>
</div>
</div>
<div class="section" id="Size-1-Axes-&-The-newaxis-Object">
<h2>Size-1 Axes & The <code class="docutils literal notranslate"><span class="pre">newaxis</span></code> Object<a class="headerlink" href="#Size-1-Axes-&-The-newaxis-Object" title="Permalink to this headline"></a></h2>
<div class="section" id="Inserting-Size-1-Dimensions-into-An-Array">
<h3>Inserting Size-1 Dimensions into An Array<a class="headerlink" href="#Inserting-Size-1-Dimensions-into-An-Array" title="Permalink to this headline"></a></h3>
<p>As conveyed by the broadcasting rules, dimensions of size-1 are special in that they can be broadcasted to any size. Here we will learn about introducing size-1 dimensions into an array, for the purpose of tailoring its shape for broadcasting.</p>
<p>You can introduce size-1 dimensions to an array without changing the overall size (i.e. total number of entries in an array). Thus we are free to add size-1 dimensions to an array via the <code class="docutils literal notranslate"><span class="pre">reshape</span></code> function. Let’s reshape a shape-(3,) array into a shape-(1, 3, 1, 1) array:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="go"># Reshaping an array to introduce size-1 dimensions.</span>
<span class="go"># The size of the array is 3, regardless of introducing</span>
<span class="go"># these extra size-1 dimensions.</span>
<span class="gp">>>> </span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="go">array([[[[1]],</span>
<span class="go"> [[2]],</span>
<span class="go"> [[3]]]])</span>
</pre></div>
</div>
<p>Thus the 1-D array with three entries has been reshaped to possess 4-dimensions: “one stack of three sheets, each containing a single row and column”. There is another way to introduce size-1 dimensions. NumPy provides the <code class="docutils literal notranslate"><span class="pre">newaxis</span></code> object for this purpose. Let’s immediately demonstrate how <code class="docutils literal notranslate"><span class="pre">np.newaxis</span></code> can be used:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># demonstrating the usage of the `numpy.newaxis` object</span>
<span class="o">>>></span> <span class="n">x</span><span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
<span class="o">>>></span> <span class="n">y</span><span class="o">=</span> <span class="n">x</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">newaxis</span><span class="p">,</span> <span class="p">:,</span> <span class="n">np</span><span class="o">.</span><span class="n">newaxis</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">newaxis</span><span class="p">]</span>
<span class="o">>>></span> <span class="n">y</span>
<span class="n">array</span><span class="p">([[[[</span><span class="mi">1</span><span class="p">]],</span>
<span class="p">[[</span><span class="mi">2</span><span class="p">]],</span>
<span class="p">[[</span><span class="mi">3</span><span class="p">]]]])</span>
<span class="o">>>></span> <span class="n">y</span><span class="o">.</span><span class="n">shape</span>
<span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
</pre></div>
</div>
<p>Indexing <code class="docutils literal notranslate"><span class="pre">x</span></code> as <code class="docutils literal notranslate"><span class="pre">x[np.newaxis,</span> <span class="pre">:,</span> <span class="pre">np.newaxis,</span> <span class="pre">np.newaxis]</span></code> returns a “view” of <code class="docutils literal notranslate"><span class="pre">x</span></code> as a 4D array with size-1 dimensions inserted as axes 0, 2, and 3. The resulting array is not a copy of <code class="docutils literal notranslate"><span class="pre">x</span></code>; it points to the exact same data as <code class="docutils literal notranslate"><span class="pre">x</span></code>, but merely with a different indexing layout. This is no different than what we achieved via reshaping: <code class="docutils literal notranslate"><span class="pre">x.reshape(1,</span> <span class="pre">3,</span> <span class="pre">1,</span> <span class="pre">1)</span></code>.</p>
</div>
<div class="section" id="Utilizing-Size-1-Dimensions-for-Broadcasting">
<h3>Utilizing Size-1 Dimensions for Broadcasting<a class="headerlink" href="#Utilizing-Size-1-Dimensions-for-Broadcasting" title="Permalink to this headline"></a></h3>
<p>Moving on to a more pressing matter: why would we ever want to introduce these spurious dimensions into an array? Let’s take an example to demonstrate the utility of size-1 dimensions.</p>
<p>Suppose that we want to multiply all possible pairs of entries between two arrays: <code class="docutils literal notranslate"><span class="pre">array([1,</span> <span class="pre">2,</span> <span class="pre">3])</span></code> with <code class="docutils literal notranslate"><span class="pre">array([4,</span> <span class="pre">5,</span> <span class="pre">6,</span> <span class="pre">7])</span></code>. That is, we want to perform twelve multiplications, and have access to each result. At first glance, combining a shape-(3,) array with a shape-(4,) array seems inadmissible for broadcasting; we seem to be doomed to perform nested for-loops like a bunch of cavemen and cavewomen. Fortunately, we can make clever use of size-1 dimensions so that we can perform this
computation in a vectorized way.</p>
<p>Let’s introduce size-1 dimensions into <code class="docutils literal notranslate"><span class="pre">x</span></code>:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Inserting size-1 dimensions into `x` and `y` in</span>
<span class="c1"># preparation of broadcasting.</span>
<span class="o">>>></span> <span class="n">x_1d</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
<span class="o">>>></span> <span class="n">x</span> <span class="o">=</span> <span class="n">x_1d</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="o">>>></span> <span class="n">x</span>
<span class="n">array</span><span class="p">([[</span><span class="mi">1</span><span class="p">],</span>
<span class="p">[</span><span class="mi">2</span><span class="p">],</span>
<span class="p">[</span><span class="mi">3</span><span class="p">]])</span>
<span class="o">>>></span> <span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">])</span>
</pre></div>
</div>
<p><code class="docutils literal notranslate"><span class="pre">x</span></code> is now a shape-(3, 1) array and <code class="docutils literal notranslate"><span class="pre">y</span></code> is a shape-(4,) array. According to the broadcasting rules, these arrays are broadcast-compatible and will multiply to produce a shape-(3, 4) array. Let’s see that multiplying these two arrays will exactly produce the twelve numbers that we are after:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># broadcast-multiplying `x` and `y`</span>
<span class="o">>>></span> <span class="n">x</span> <span class="o">*</span> <span class="n">y</span>
<span class="n">array</span><span class="p">([[</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">],</span>
<span class="p">[</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">12</span><span class="p">,</span> <span class="mi">14</span><span class="p">],</span>
<span class="p">[</span><span class="mi">12</span><span class="p">,</span> <span class="mi">15</span><span class="p">,</span> <span class="mi">18</span><span class="p">,</span> <span class="mi">21</span><span class="p">]])</span>
</pre></div>
</div>
<div class="math notranslate nohighlight">
\begin{equation}
\left(
\begin{array}{*{1}{X}}
1 \\
2 \\
3
\end{array} \right)
%
\cdot \left( \begin{array}{*{4}{X}}
4 & 5 & 6 & 7
\end{array}\right)
%
\rightarrow \left( \begin{array}{*{4}{X}}
1 & 1 & 1 & 1 \\
2 & 2 & 2 & 2 \\
3 & 3 & 3 & 3
\end{array}\right)
%
\cdot \left( \begin{array}{*{4}{X}}
4 & 5 & 6 & 7 \\
4 & 5 & 6 & 7 \\
4 & 5 & 6 & 7
\end{array}\right)
%
= \left( \begin{array}{*{4}{X}}
1\cdot4 & 1\cdot5 & 1\cdot6 & 1\cdot7 \\
2\cdot4 & 2\cdot5 & 2\cdot6 & 2\cdot7 \\
3\cdot4 & 3\cdot5 & 3\cdot6 & 3\cdot7
\end{array}\right)
\
\end{equation}</div><p>See that entry <code class="docutils literal notranslate"><span class="pre">(i,</span> <span class="pre">j)</span></code> of the resulting array corresponds to <code class="docutils literal notranslate"><span class="pre">x_1d[i]</span> <span class="pre">*</span> <span class="pre">y[j]</span></code>.</p>
<p>Through the use of simple reshaping, shrewdly inserting size-1 dimensions allowed us to coerce NumPy into performing exactly the combination-multiplication that we desired. Furthermore, a keen understanding of what broadcasting is provides us with a clear interpretation of the structure of the result of this calculation. That is, if I reshape <code class="docutils literal notranslate"><span class="pre">x</span></code> to be a shape-<span class="math notranslate nohighlight">\((M, 1)\)</span> array, and <code class="docutils literal notranslate"><span class="pre">y</span></code> is a shape-<span class="math notranslate nohighlight">\((N,)\)</span> array, then (according to broadcasting rules) <code class="docutils literal notranslate"><span class="pre">x</span> <span class="pre">*</span> <span class="pre">y</span></code> would produce a
shape-<span class="math notranslate nohighlight">\((M, N)\)</span> array storing the product of each of <code class="docutils literal notranslate"><span class="pre">x</span></code>’s <span class="math notranslate nohighlight">\(M\)</span> numbers with each of <code class="docutils literal notranslate"><span class="pre">y</span></code>’s <span class="math notranslate nohighlight">\(N\)</span> numbers.</p>
<div class="admonition note">
<p class="admonition-title fa fa-exclamation-circle"><strong>Reading Comprehension: Basic Broadcasting II</strong></p>
<p>Given the shape-(2,3,4) array:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[[</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">11</span><span class="p">]],</span>
<span class="gp">...</span>
<span class="gp">... </span> <span class="p">[[</span><span class="mi">12</span><span class="p">,</span> <span class="mi">13</span><span class="p">,</span> <span class="mi">14</span><span class="p">,</span> <span class="mi">15</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">16</span><span class="p">,</span> <span class="mi">17</span><span class="p">,</span> <span class="mi">18</span><span class="p">,</span> <span class="mi">19</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">20</span><span class="p">,</span> <span class="mi">21</span><span class="p">,</span> <span class="mi">22</span><span class="p">,</span> <span class="mi">23</span><span class="p">]]])</span>
</pre></div>
</div>
<p>Normalize <code class="docutils literal notranslate"><span class="pre">x</span></code> such that <em>each of its rows, within each sheet, will sum to a value of 1</em>. Make use of the sequential function <code class="docutils literal notranslate"><span class="pre">np.sum</span></code>, which should be called only once, and broadcast-division.</p>
</div>
<div class="admonition note">
<p class="admonition-title fa fa-exclamation-circle"><strong>Reading Comprehension: Basic Broadcasting III</strong></p>
<p>A digital image is simply an array of numbers, which instructs a grid of pixels on a monitor to shine light of specific colors, according to the numerical values in that array.</p>
<p>An RGB-image can thus be stored as a 3D NumPy array of shape-<span class="math notranslate nohighlight">\((V, H, 3)\)</span>. <span class="math notranslate nohighlight">\(V\)</span> is the number of pixels along the vertical direction, <span class="math notranslate nohighlight">\(H\)</span> is the number of pixels along the horizontal, and the size-3 dimension stores the red, blue, and green color values for a given pixel. Thus a <span class="math notranslate nohighlight">\((32, 32, 3)\)</span> array would be a 32x32 RGB image.</p>
<p>You often work with a collection of images. Suppose we want to store N images in a single array; thus we now consider a 4D shape-(N, V, H, 3) array. For the sake of convenience, let’s simply generate a 4D-array of random numbers as a placeholder for real image data. We will generate 500, 48x48 RGB images:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">images</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">500</span><span class="p">,</span> <span class="mi">48</span><span class="p">,</span> <span class="mi">48</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
</pre></div>
</div>
<p>Using the sequential function <code class="docutils literal notranslate"><span class="pre">np.max</span></code> and broadcasting, normalize <code class="docutils literal notranslate"><span class="pre">images</span></code> such that the largest value within <em>each color-channel of each image</em> is 1.</p>
</div>
</div>
</div>
<div class="section" id="An-Advanced-Application-of-Broadcasting:-Pairwise-Distances">
<h2>An Advanced Application of Broadcasting: Pairwise Distances<a class="headerlink" href="#An-Advanced-Application-of-Broadcasting:-Pairwise-Distances" title="Permalink to this headline"></a></h2>
<p>We will conclude this section by demonstrating an important, non-trivial example of array broadcasting. Here, we will find that the most straightforward use of broadcasting is <em>not</em> necessarily the right solution for our problem, and we will see that it can be important to first refactor the mathematical approach taken to perform a calculation before using broadcasting. Specifically, we will see that our initial approach for using of broadcasting is memory-inefficient.</p>
<p>Suppose we have two, 2D arrays. <code class="docutils literal notranslate"><span class="pre">x</span></code> has a shape of <span class="math notranslate nohighlight">\((M, D)\)</span> and <code class="docutils literal notranslate"><span class="pre">y</span></code> has a shape of <span class="math notranslate nohighlight">\((N, D)\)</span>. We want to compute the Euclidean distance (a.k.a. the <span class="math notranslate nohighlight">\(L_2\)</span>-distance) between <em>each pair</em> of rows between the two arrays. That is, if a given row of <code class="docutils literal notranslate"><span class="pre">x</span></code> is represented by <span class="math notranslate nohighlight">\(D\)</span> numbers <span class="math notranslate nohighlight">\((x_0, x_1, \ldots, x_{D-1})\)</span>, and similarly, a row <code class="docutils literal notranslate"><span class="pre">y</span></code> is represented by <span class="math notranslate nohighlight">\((y_0, y_1, \ldots, y_{D-1})\)</span>, and we want to compute the Euclidean distance between the two rows:</p>
<div class="math notranslate nohighlight">
\begin{equation}
\sqrt{(x_{0} - y_{0})^2 + (x_{1} - y_{1})^2 + \ldots + (x_{D-1} - y_{D-1})^2} = \sqrt{\sum_{i=0}^{D-1}{(x_{i} - y_{i})^2}}
\end{equation}</div><p>Doing this for each pair of rows should produce a total of <span class="math notranslate nohighlight">\(M\times N\)</span> distances. The previous subsection stepped us through a very similar calculation, albeit with lower-dimensional arrays. Let’s proceed by performing this computation in three different ways:</p>
<ol class="arabic simple">
<li><p>Using explicit for-loops</p></li>
<li><p>Using straight-forward broadcasting</p></li>
<li><p>Refactoring the problem and then using broadcasting</p></li>
</ol>
<p>For the sake of being concrete, we will compute all of the pairwise Euclidean distances between the rows of these two arrays:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># a shape-(5, 3) array</span>
<span class="o">>>></span> <span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span> <span class="mf">8.54</span><span class="p">,</span> <span class="mf">1.54</span><span class="p">,</span> <span class="mf">8.12</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mf">3.13</span><span class="p">,</span> <span class="mf">8.76</span><span class="p">,</span> <span class="mf">5.29</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mf">7.73</span><span class="p">,</span> <span class="mf">6.71</span><span class="p">,</span> <span class="mf">1.31</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mf">6.44</span><span class="p">,</span> <span class="mf">9.64</span><span class="p">,</span> <span class="mf">8.44</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mf">7.27</span><span class="p">,</span> <span class="mf">8.42</span><span class="p">,</span> <span class="mf">5.27</span><span class="p">]])</span>
<span class="c1"># a shape-(6, 3) array</span>
<span class="o">>>></span> <span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span> <span class="mf">8.65</span><span class="p">,</span> <span class="mf">0.27</span><span class="p">,</span> <span class="mf">4.67</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mf">7.73</span><span class="p">,</span> <span class="mf">7.26</span><span class="p">,</span> <span class="mf">1.95</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mf">1.27</span><span class="p">,</span> <span class="mf">7.27</span><span class="p">,</span> <span class="mf">3.59</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mf">4.05</span><span class="p">,</span> <span class="mf">5.16</span><span class="p">,</span> <span class="mf">3.53</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mf">4.77</span><span class="p">,</span> <span class="mf">6.48</span><span class="p">,</span> <span class="mf">8.01</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mf">7.85</span><span class="p">,</span> <span class="mf">6.68</span><span class="p">,</span> <span class="mf">6.13</span><span class="p">]])</span>
</pre></div>
</div>
<p>Thus we will want to compute a total of 30 distances, one for each pair of rows from <code class="docutils literal notranslate"><span class="pre">x</span></code> and <code class="docutils literal notranslate"><span class="pre">y</span></code>.</p>
<div class="section" id="Pairwise-Distances-Using-For-Loops">
<h3>Pairwise Distances Using For-Loops<a class="headerlink" href="#Pairwise-Distances-Using-For-Loops" title="Permalink to this headline"></a></h3>
<p>Performing this computation using for-loops proceeds as follows:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">pairwise_dists_looped</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
<span class="sd">""" Computing pairwise distances using for-loops</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> x : numpy.ndarray, shape=(M, D)</span>
<span class="sd"> y : numpy.ndarray, shape=(N, D)</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> numpy.ndarray, shape=(M, N)</span>
<span class="sd"> The Euclidean distance between each pair of</span>
<span class="sd"> rows between `x` and `y`."""</span>
<span class="c1"># `dists[i, j]` will store the Euclidean</span>
<span class="c1"># distance between `x[i]` and `y[j]`</span>
<span class="n">dists</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">((</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">))</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">row_x</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">x</span><span class="p">):</span> <span class="c1"># loops over rows of `x`</span>
<span class="k">for</span> <span class="n">j</span><span class="p">,</span> <span class="n">row_y</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">y</span><span class="p">):</span> <span class="c1"># loops over rows of `y`</span>
<span class="c1"># Subtract corresponding entries of the rows,</span>
<span class="c1"># squares each difference, and then sums them. This</span>
<span class="c1"># exactly matches our equation for Euclidean</span>
<span class="c1"># distance (we will do the square root later)</span>
<span class="n">dists</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">((</span><span class="n">row_x</span> <span class="o">-</span> <span class="n">row_y</span><span class="p">)</span><span class="o">**</span><span class="mi">2</span><span class="p">)</span>
<span class="c1"># we still need to take the square root of</span>
<span class="c1"># each of our numbers</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">dists</span><span class="p">)</span>
</pre></div>
</div>
<p>Be sure to step through this code and see that <code class="docutils literal notranslate"><span class="pre">dists</span></code> stores each pair of Euclidean distances between the rows of <code class="docutils literal notranslate"><span class="pre">x</span></code> and <code class="docutils literal notranslate"><span class="pre">y</span></code>.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># produces a shape-(5, 6) result</span>
<span class="o">>>></span> <span class="n">pairwise_dists_looped</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="n">array</span><span class="p">([[</span> <span class="mf">3.678</span> <span class="p">,</span> <span class="mf">8.4524</span><span class="p">,</span> <span class="mf">10.3057</span><span class="p">,</span> <span class="mf">7.3711</span><span class="p">,</span> <span class="mf">6.2152</span><span class="p">,</span> <span class="mf">5.5548</span><span class="p">],</span>
<span class="p">[</span><span class="mf">10.1457</span><span class="p">,</span> <span class="mf">5.8793</span><span class="p">,</span> <span class="mf">2.9274</span><span class="p">,</span> <span class="mf">4.1114</span><span class="p">,</span> <span class="mf">3.9098</span><span class="p">,</span> <span class="mf">5.2259</span><span class="p">],</span>
<span class="p">[</span> <span class="mf">7.3219</span><span class="p">,</span> <span class="mf">0.8439</span><span class="p">,</span> <span class="mf">6.8734</span><span class="p">,</span> <span class="mf">4.5687</span><span class="p">,</span> <span class="mf">7.3283</span><span class="p">,</span> <span class="mf">4.8216</span><span class="p">],</span>
<span class="p">[</span><span class="mf">10.339</span> <span class="p">,</span> <span class="mf">7.032</span> <span class="p">,</span> <span class="mf">7.4745</span><span class="p">,</span> <span class="mf">7.0633</span><span class="p">,</span> <span class="mf">3.5999</span><span class="p">,</span> <span class="mf">4.0107</span><span class="p">],</span>
<span class="p">[</span> <span class="mf">8.2878</span><span class="p">,</span> <span class="mf">3.5468</span><span class="p">,</span> <span class="mf">6.336</span> <span class="p">,</span> <span class="mf">4.9014</span><span class="p">,</span> <span class="mf">4.1858</span><span class="p">,</span> <span class="mf">2.0257</span><span class="p">]])</span>
</pre></div>
</div>
</div>
<div class="section" id="Pairwise-Distances-Using-Broadcasting-(Unoptimized)">
<h3>Pairwise Distances Using Broadcasting (Unoptimized)<a class="headerlink" href="#Pairwise-Distances-Using-Broadcasting-(Unoptimized)" title="Permalink to this headline"></a></h3>
<p>Now, let’s make use of vectorization to perform this distance computation. It must be established immediately that the method that we are about to develop here is memory-inefficient. We will address this issue in detail at the end of this subsection.</p>
<p>We start off our vectorized computation by shrewdly inserting size-1 dimensions into <code class="docutils literal notranslate"><span class="pre">x</span></code> and <code class="docutils literal notranslate"><span class="pre">y</span></code>, so that we can perform <span class="math notranslate nohighlight">\(M \times N\)</span> subtractions between their pairs of length-<span class="math notranslate nohighlight">\(D\)</span> rows. <em>This creates a shape-</em><span class="math notranslate nohighlight">\((M, N, D)\)</span> <em>array</em>.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># subtract shape-(5, 1, 3) with shape-(1, 6, 3)</span>
<span class="c1"># produces shape-(5, 6, 3)</span>
<span class="o">>>></span> <span class="n">diffs</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span> <span class="o">-</span> <span class="n">y</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
<span class="o">>>></span> <span class="n">diffs</span><span class="o">.</span><span class="n">shape</span>
<span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
</pre></div>
</div>
<p>It is important to see, via broadcasting, that <code class="docutils literal notranslate"><span class="pre">diffs[i,</span> <span class="pre">j]</span></code> stores <code class="docutils literal notranslate"><span class="pre">x[i]</span> <span class="pre">-</span> <span class="pre">y[j]</span></code>. Thus we need to square each entry of <code class="docutils literal notranslate"><span class="pre">diffs</span></code>, sum over its last axis, and take the square root, in order to produce our <span class="math notranslate nohighlight">\(M \times N\)</span> Euclidean distances:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># producing the Euclidean distances</span>
<span class="o">>>></span> <span class="n">dists</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">diffs</span><span class="o">**</span><span class="mi">2</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">2</span><span class="p">))</span>
<span class="o">>>></span> <span class="n">dists</span><span class="o">.</span><span class="n">shape</span>
<span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span>
</pre></div>
</div>
<p>Voilà! We have produced the distances in a vectorized way. Let’s write this out formally as a function:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">pairwise_dists_crude</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
<span class="sd">""" Computing pairwise distances using vectorization.</span>
<span class="sd"> This method uses memory-inefficient broadcasting.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> x : numpy.ndarray, shape=(M, D)</span>
<span class="sd"> y : numpy.ndarray, shape=(N, D)</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> numpy.ndarray, shape=(M, N)</span>
<span class="sd"> The Euclidean distance between each pair of</span>
<span class="sd"> rows between `x` and `y`."""</span>
<span class="c1"># The use of `np.newaxis` here is equivalent to our</span>
<span class="c1"># use of the `reshape` function</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">((</span><span class="n">x</span><span class="p">[:,</span> <span class="n">np</span><span class="o">.</span><span class="n">newaxis</span><span class="p">]</span> <span class="o">-</span> <span class="n">y</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">newaxis</span><span class="p">])</span><span class="o">**</span><span class="mi">2</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">2</span><span class="p">))</span>
</pre></div>
</div>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># produces a shape-(5, 6) result</span>
<span class="o">>>></span> <span class="n">pairwise_dists_crude</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="n">array</span><span class="p">([[</span> <span class="mf">3.678</span> <span class="p">,</span> <span class="mf">8.4524</span><span class="p">,</span> <span class="mf">10.3057</span><span class="p">,</span> <span class="mf">7.3711</span><span class="p">,</span> <span class="mf">6.2152</span><span class="p">,</span> <span class="mf">5.5548</span><span class="p">],</span>
<span class="p">[</span><span class="mf">10.1457</span><span class="p">,</span> <span class="mf">5.8793</span><span class="p">,</span> <span class="mf">2.9274</span><span class="p">,</span> <span class="mf">4.1114</span><span class="p">,</span> <span class="mf">3.9098</span><span class="p">,</span> <span class="mf">5.2259</span><span class="p">],</span>
<span class="p">[</span> <span class="mf">7.3219</span><span class="p">,</span> <span class="mf">0.8439</span><span class="p">,</span> <span class="mf">6.8734</span><span class="p">,</span> <span class="mf">4.5687</span><span class="p">,</span> <span class="mf">7.3283</span><span class="p">,</span> <span class="mf">4.8216</span><span class="p">],</span>
<span class="p">[</span><span class="mf">10.339</span> <span class="p">,</span> <span class="mf">7.032</span> <span class="p">,</span> <span class="mf">7.4745</span><span class="p">,</span> <span class="mf">7.0633</span><span class="p">,</span> <span class="mf">3.5999</span><span class="p">,</span> <span class="mf">4.0107</span><span class="p">],</span>
<span class="p">[</span> <span class="mf">8.2878</span><span class="p">,</span> <span class="mf">3.5468</span><span class="p">,</span> <span class="mf">6.336</span> <span class="p">,</span> <span class="mf">4.9014</span><span class="p">,</span> <span class="mf">4.1858</span><span class="p">,</span> <span class="mf">2.0257</span><span class="p">]])</span>
</pre></div>
</div>
<p>Regrettably, there is a glaring issue with the vectorized computation that we just performed. Consider the largest sized array that is created in the for-loop computation, compared to that of this vectorized computation. The for-loop version need only create a shape-<span class="math notranslate nohighlight">\((M, N)\)</span> array, whereas the vectorized computation creates an intermediate array (i.e. <code class="docutils literal notranslate"><span class="pre">diffs</span></code>) of shape-<span class="math notranslate nohighlight">\((M, N, D)\)</span>. This intermediate array is even created in the one-line version of the code. This will create a
massive array if <span class="math notranslate nohighlight">\(D\)</span> is a large number!</p>
<p>Suppose, for instance, that you are finding the Euclidean between pairs of RGB images that each have a resolution of <span class="math notranslate nohighlight">\(32 \times 32\)</span> (in order to see if the images resemble one another). Thus in this scenario, each image is comprised of <span class="math notranslate nohighlight">\(D = 32 \times 32 \times 3 = 3072\)</span> numbers (<span class="math notranslate nohighlight">\(32^2\)</span> pixels, and each pixel has 3 values: a red, blue, and green-color value). Computing all the distances between a stack of 5000 images with a stack of 100 images would form an intermediate array of
shape-<span class="math notranslate nohighlight">\((5000, 100, 3072)\)</span>. Even though this large array only exists temporarily, it would have to consume over 6GB of RAM! The for-loop version requires <span class="math notranslate nohighlight">\(\frac{1}{3072}\)</span> as much memory (about 2MB).</p>
<p>Is our goose cooked? Are we doomed to pick between either slow for-loops, or a memory-inefficient use of vectorization? No! We can refactor the mathematical form of the Euclidean distance in order to avoid the creation of that bloated intermediate array.</p>
</div>
<div class="section" id="Optimized-Pairwise-Distances">
<h3>Optimized Pairwise Distances<a class="headerlink" href="#Optimized-Pairwise-Distances" title="Permalink to this headline"></a></h3>
<p>Performing the pairwise subtraction between the respective rows of <code class="docutils literal notranslate"><span class="pre">x</span></code> and <code class="docutils literal notranslate"><span class="pre">y</span></code> is what created the over-sized intermediate array in our previous calculation. Thus we want to rewrite the Euclidean distance equation such that none of the terms require broadcasting beyond the size of <span class="math notranslate nohighlight">\(M \times N\)</span>.</p>
<p>The Euclidean distance equation, ignoring the square root for now, can be refactored by multiplying out each squared term as so:</p>
<div class="math notranslate nohighlight">
\begin{equation}
\sum_{i=0}^{D-1}{(x_{i} - y_{i})^2} = \sum_{i=0}^{D-1}{x_{i}^2} + \sum_{i=0}^{D-1}{y_{i}^2} - 2\sum_{i=0}^{D-1}{x_{i} y_{i}}
\end{equation}</div><p>Keep in mind that we must compute this for each pair of rows in <code class="docutils literal notranslate"><span class="pre">x</span></code> and <code class="docutils literal notranslate"><span class="pre">y</span></code>. We will find that this formulation permits the use of matrix multiplication, such that we can avoid forming the shape-<span class="math notranslate nohighlight">\((M, N, D)\)</span> intermediate array.</p>
<p>The first two terms in this equation are straight-forward to calculate, and, when combined, will only produce a shape-<span class="math notranslate nohighlight">\((M, N)\)</span> array. For both <code class="docutils literal notranslate"><span class="pre">x</span></code> and <code class="docutils literal notranslate"><span class="pre">y</span></code>, we square each element in the array and then sum over the columns for each row:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Computing the first two terms of the</span>
<span class="c1"># refactored Euclidean distance equation</span>
<span class="c1"># creates a shape-(5,) array</span>
<span class="o">>>></span> <span class="n">x_sqrd_summed</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">x</span><span class="o">**</span><span class="mi">2</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="c1"># creates a shape-(6,) array</span>
<span class="o">>>></span> <span class="n">y_sqrd_summed</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">y</span><span class="o">**</span><span class="mi">2</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
</pre></div>
</div>
<p>We must insert a size-1 dimension in <code class="docutils literal notranslate"><span class="pre">x</span></code> so that we can add all pairs of numbers between the resulting shape-<span class="math notranslate nohighlight">\((M, 1)\)</span> and shape-<span class="math notranslate nohighlight">\((N,)\)</span> arrays. This will compute <span class="math notranslate nohighlight">\(\sum_{i=0}^{D-1}{x_{i}^2} + \sum_{i=0}^{D-1}{y_{i}^2}\)</span> for all of the <span class="math notranslate nohighlight">\(M \times N\)</span> pairs of rows:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># add a shape-(5, 1) array with a shape-(6, ) array</span>
<span class="c1"># to create a shape-(5, 6) array</span>
<span class="o">>>></span> <span class="n">x_y_sqrd</span> <span class="o">=</span> <span class="n">x_sqrd_summed</span><span class="p">[:,</span> <span class="n">np</span><span class="o">.</span><span class="n">newaxis</span><span class="p">]</span> <span class="o">+</span> <span class="n">y_sqrd_summed</span>
<span class="o">>>></span> <span class="n">x_y_sqrd</span><span class="o">.</span><span class="n">shape</span>
<span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span>
</pre></div>
</div>
<p>This leaves the third term to be computed. It is left to the reader to show that computing this sum of products for each pair of rows in <code class="docutils literal notranslate"><span class="pre">x</span></code> and <code class="docutils literal notranslate"><span class="pre">y</span></code> is equivalent to performing the matrix multiplication <span class="math notranslate nohighlight">\(-2\;(x \cdot y^{T})\)</span>, where <code class="docutils literal notranslate"><span class="pre">y</span></code> has been transposed so that it has a shape of <span class="math notranslate nohighlight">\((D, N)\)</span>. This matrix multiplication of a shape-<span class="math notranslate nohighlight">\((M, D)\)</span> array with a shape-<span class="math notranslate nohighlight">\((D, N)\)</span> array produces a shape-<span class="math notranslate nohighlight">\((M, N)\)</span> array. Therefore, we can compute this final term without
needing to create a larger, intermediate array.</p>
<p>Thus the third term in our equation, <span class="math notranslate nohighlight">\(-2\sum_{i=0}^{D-1}{x_{i} y_{i}}\)</span>, for all <span class="math notranslate nohighlight">\(M \times N\)</span> pairs of rows, is:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># computing the third term in the distance</span>
<span class="c1"># equation, for all pairs of rows</span>
<span class="o">>>></span> <span class="n">x_y_prod</span> <span class="o">=</span> <span class="mi">2</span> <span class="o">*</span> <span class="n">np</span><span class="o">.</span><span class="n">matmul</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="o">.</span><span class="n">T</span><span class="p">)</span> <span class="c1"># `np.dot` can also be used to the same effect</span>
<span class="o">>>></span> <span class="n">x_y_prod</span><span class="o">.</span><span class="n">shape</span>
<span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span>
</pre></div>
</div>
<p>Having accounted for all three terms, we can finally compute the Euclidean distances:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># computing all the distances</span>
<span class="o">>>></span> <span class="n">dists</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">x_y_sqrd</span> <span class="o">-</span> <span class="n">x_y_prod</span><span class="p">)</span>
<span class="o">>>></span> <span class="n">dists</span><span class="o">.</span><span class="n">shape</span>
<span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span>
</pre></div>
</div>
<div class="section" id="A-Subtle-Issue-with-Floating-point-Precision">
<h4>A Subtle Issue with Floating-point Precision<a class="headerlink" href="#A-Subtle-Issue-with-Floating-point-Precision" title="Permalink to this headline"></a></h4>
<p>There is one more important and very subtle detail that we have to deal with. In terms of pure mathematics, <code class="docutils literal notranslate"><span class="pre">x_y_sqrd</span> <span class="pre">-</span> <span class="pre">x_y_prod</span></code> must be a strictly non-negative value (i.e. its smallest possible value is <span class="math notranslate nohighlight">\(0\)</span>), since it is equivalent to</p>
<div class="math notranslate nohighlight">
\begin{equation}
\sum_{i=0}^{D-1}{(x_{i} - y_{i})^2}
\end{equation}</div><p>That being said, we are working with floating-point numbers, which do not always behave exactly like rational numbers when we do arithmetic with them. Indeed, we saw earlier that the <a class="reference external" href="https://www.pythonlikeyoumeanit.com/Module2_EssentialsOfPython/Basic_Objects.html#Understanding-Numerical-Precision">quirks of floating-point arithmetic</a> can lead to surprising results. Here, the strange behavior is that <code class="docutils literal notranslate"><span class="pre">x_y_sqrd</span> <span class="pre">-</span> <span class="pre">x_y_prod</span></code> <strong>can produce negative numbers</strong>!</p>
<p>Let’s see this in action. We’ll take the following shape-(2, 3) array</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># A carefully-selected input that will trigger surprising</span>
<span class="c1"># floating-point precision issues in our distance calculation</span>
<span class="o">>>></span> <span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mf">4.700867387959219</span><span class="p">,</span> <span class="mf">4.700867387959219</span><span class="p">,</span> <span class="mf">4.700867387959219</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span><span class="mf">4.700867387959219</span><span class="p">,</span> <span class="mf">4.700867387959219</span><span class="p">,</span> <span class="mf">4.700867387959219</span><span class="p">]])</span>
</pre></div>
</div>
<p>And compute the square distance between all combinations of rows <code class="docutils literal notranslate"><span class="pre">x</span></code>; the result should simply be a shape-(2, 2) array of zeros.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># The square-distances should be exactly zero, but they</span>
<span class="c1"># will instead be (*very* small) negative numbers!</span>
<span class="o">>>></span> <span class="n">sqr_dists</span> <span class="o">=</span> <span class="o">-</span><span class="mi">2</span> <span class="o">*</span> <span class="n">np</span><span class="o">.</span><span class="n">matmul</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">x</span><span class="o">.</span><span class="n">T</span><span class="p">)</span> <span class="c1"># `x_x_prod`</span>
<span class="o">>>></span> <span class="n">sqr_dists</span> <span class="o">+=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">x</span><span class="o">**</span><span class="mi">2</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)[:,</span> <span class="n">np</span><span class="o">.</span><span class="n">newaxis</span><span class="p">]</span>
<span class="o">>>></span> <span class="n">sqr_dists</span> <span class="o">+=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">x</span><span class="o">**</span><span class="mi">2</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="o">>>></span> <span class="n">sqr_dists</span>
<span class="n">array</span><span class="p">([[</span><span class="o">-</span><span class="mf">2.842170943040401e-14</span><span class="p">,</span> <span class="o">-</span><span class="mf">2.842170943040401e-14</span><span class="p">],</span>
<span class="p">[</span><span class="o">-</span><span class="mf">2.842170943040401e-14</span><span class="p">,</span> <span class="o">-</span><span class="mf">2.842170943040401e-14</span><span class="p">]])</span>
</pre></div>
</div>
<p>These values are <em>very</em> close to being zero, so it is not as if the magnitude of our result is wildly off, but the critical issue here is that the quirks of floating-point arithmetic produced (very small) negative numbers. We are going to be in for a rude awakening when we take the square-root of these values to get our final distances:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Taking the square-root of negative floats will produce NaNs</span>
<span class="o">>>></span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">sqr_dists</span><span class="p">)</span>
<span class="n">array</span><span class="p">([[</span><span class="n">nan</span><span class="p">,</span> <span class="n">nan</span><span class="p">],</span>
<span class="p">[</span><span class="n">nan</span><span class="p">,</span> <span class="n">nan</span><span class="p">]])</span>
</pre></div>
</div>
<p><code class="docutils literal notranslate"><span class="pre">nan</span></code> stands for “Not a Number”, which is to say that the square-root of a negative number cannot produce a real-valued float (the result will be an <a class="reference external" href="https://www.pythonlikeyoumeanit.com/Module2_EssentialsOfPython/Basic_Objects.html#Complex-Numbers">imaginary number</a>, but NumPy will not swap out real numbers for imaginary ones unless we explicitly permit it to).</p>
<p>The solution to this problem comes from the realization that this occurrence is truly an edge-case, and that it will only manifest when the result <em>ought</em> to have been zero and indeed is very close to zero. Thus we can <a class="reference external" href="https://numpy.org/doc/stable/reference/generated/numpy.clip.html">clip</a> <code class="docutils literal notranslate"><span class="pre">sqr_dists</span></code> such that any value that falls below zero gets set to zero and all other values will be left alone:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># "Clipping" all negative entries so that they are set to 0</span>
<span class="o">>>></span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="n">sqr_dists</span><span class="p">,</span> <span class="n">a_min</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">a_max</span><span class="o">=</span><span class="kc">None</span><span class="p">))</span>
<span class="n">array</span><span class="p">([[</span><span class="mf">0.</span><span class="p">,</span> <span class="mf">0.</span><span class="p">],</span>
<span class="p">[</span><span class="mf">0.</span><span class="p">,</span> <span class="mf">0.</span><span class="p">]])</span>
</pre></div>
</div>
</div>
<div class="section" id="The-Final-Answer,-At-Last!">
<h4>The Final Answer, At Last!<a class="headerlink" href="#The-Final-Answer,-At-Last!" title="Permalink to this headline"></a></h4>
<p>In total, we have successfully used vectorization to compute all the pairs of distances, while only requiring an array of shape-<span class="math notranslate nohighlight">\((M, N)\)</span> to do so! This is the memory-efficient, vectorized form – the stuff that dreams are made of. Let’s write the function that performs this computation in full.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">pairwise_dists</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
<span class="sd">""" Computing pairwise distances using memory-efficient</span>
<span class="sd"> vectorization.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> x : numpy.ndarray, shape=(M, D)</span>
<span class="sd"> y : numpy.ndarray, shape=(N, D)</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> numpy.ndarray, shape=(M, N)</span>
<span class="sd"> The Euclidean distance between each pair of</span>
<span class="sd"> rows between `x` and `y`."""</span>
<span class="n">sqr_dists</span> <span class="o">=</span> <span class="o">-</span><span class="mi">2</span> <span class="o">*</span> <span class="n">np</span><span class="o">.</span><span class="n">matmul</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="o">.</span><span class="n">T</span><span class="p">)</span>
<span class="n">sqr_dists</span> <span class="o">+=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">x</span><span class="o">**</span><span class="mi">2</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)[:,</span> <span class="n">np</span><span class="o">.</span><span class="n">newaxis</span><span class="p">]</span>
<span class="n">sqr_dists</span> <span class="o">+=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">y</span><span class="o">**</span><span class="mi">2</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="n">sqr_dists</span><span class="p">,</span> <span class="n">a_min</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">a_max</span><span class="o">=</span><span class="kc">None</span><span class="p">))</span>
</pre></div>
</div>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># produces a shape-(5, 6) result</span>
<span class="o">>>></span> <span class="n">pairwise_dists</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="n">array</span><span class="p">([[</span> <span class="mf">3.678</span> <span class="p">,</span> <span class="mf">8.4524</span><span class="p">,</span> <span class="mf">10.3057</span><span class="p">,</span> <span class="mf">7.3711</span><span class="p">,</span> <span class="mf">6.2152</span><span class="p">,</span> <span class="mf">5.5548</span><span class="p">],</span>
<span class="p">[</span><span class="mf">10.1457</span><span class="p">,</span> <span class="mf">5.8793</span><span class="p">,</span> <span class="mf">2.9274</span><span class="p">,</span> <span class="mf">4.1114</span><span class="p">,</span> <span class="mf">3.9098</span><span class="p">,</span> <span class="mf">5.2259</span><span class="p">],</span>
<span class="p">[</span> <span class="mf">7.3219</span><span class="p">,</span> <span class="mf">0.8439</span><span class="p">,</span> <span class="mf">6.8734</span><span class="p">,</span> <span class="mf">4.5687</span><span class="p">,</span> <span class="mf">7.3283</span><span class="p">,</span> <span class="mf">4.8216</span><span class="p">],</span>
<span class="p">[</span><span class="mf">10.339</span> <span class="p">,</span> <span class="mf">7.032</span> <span class="p">,</span> <span class="mf">7.4745</span><span class="p">,</span> <span class="mf">7.0633</span><span class="p">,</span> <span class="mf">3.5999</span><span class="p">,</span> <span class="mf">4.0107</span><span class="p">],</span>
<span class="p">[</span> <span class="mf">8.2878</span><span class="p">,</span> <span class="mf">3.5468</span><span class="p">,</span> <span class="mf">6.336</span> <span class="p">,</span> <span class="mf">4.9014</span><span class="p">,</span> <span class="mf">4.1858</span><span class="p">,</span> <span class="mf">2.0257</span><span class="p">]])</span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title fa fa-exclamation-circle"><strong>Takeaway</strong>:</p>
<p>The specific form of an equation can have a major impact on the memory-footprint of its vectorized implementation in NumPy. This issue can be safely overlooked in cases where you can be certain that the array shapes at play will not lead to substantial memory consumption. Otherwise, take care to study the form of the equation, to see if it can be recast in a way that alleviates its memory-consumption bottlenecks.</p>
</div>
<div class="admonition note">
<p class="admonition-title fa fa-exclamation-circle"><strong>Reading Comprehension: Checking the equivalence of the three pairwise distance functions</strong></p>
<p>Use the function <a class="reference external" href="https://numpy.org/doc/stable/reference/generated/numpy.allclose.html">numpy.allclose</a> to verify that the three methods for computing the pairwise distances produce the same numerical results.</p>
</div>
</div>
</div>
</div>
<div class="section" id="Links-to-Official-Documentation">
<h2>Links to Official Documentation<a class="headerlink" href="#Links-to-Official-Documentation" title="Permalink to this headline"></a></h2>
<ul class="simple">
<li><p><a class="reference external" href="https://numpy.org/doc/stable/user/basics.broadcasting.html#broadcasting">Basics of broadcasting</a></p></li>
<li><p><a class="reference external" href="https://numpy.org/doc/stable/reference/routines.array-manipulation.html#changing-number-of-dimensions">Broadcasting routines</a></p></li>
</ul>
</div>
<div class="section" id="Reading-Comprehension-Solutions">
<h2>Reading Comprehension Solutions<a class="headerlink" href="#Reading-Comprehension-Solutions" title="Permalink to this headline"></a></h2>
<p><strong>Broadcast Compatibility: Solution</strong></p>
<ol class="arabic simple">
<li><p>Incompatible</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">3</span> <span class="pre">x</span> <span class="pre">4</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">8</span> <span class="pre">x</span> <span class="pre">3</span> <span class="pre">x</span> <span class="pre">1</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">9</span> <span class="pre">x</span> <span class="pre">2</span> <span class="pre">x</span> <span class="pre">5</span></code></p></li>
<li><p>Incompatible</p></li>
</ol>
<p><strong>Basic Broadcasting: Solution</strong></p>
<p>Generating the random array of 10,000, 2D points, and their “center-of-mass”.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># find the mean x-coord and y-coord of the 10000 points</span>
<span class="o">>>></span> <span class="n">pts</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">10000</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span>
<span class="o">>>></span> <span class="n">center_of_mass</span> <span class="o">=</span> <span class="n">pts</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span> <span class="c1"># -> array([mean_x, mean_y])</span>
<span class="o">>>></span> <span class="n">center_of_mass</span><span class="o">.</span><span class="n">shape</span>
<span class="p">(</span><span class="mi">2</span><span class="p">,)</span>
<span class="c1"># Use broadcasting to compute the position of each point relative</span>
<span class="c1"># to the center of mass. The center of mass coordinates are subtracted</span>
<span class="c1"># from each of the 10000 points, via broadcast-subtraction</span>
<span class="o">>>></span> <span class="n">relative_pos</span> <span class="o">=</span> <span class="n">pts</span> <span class="o">-</span> <span class="n">center_of_mass</span> <span class="c1"># shape-(10000,2) w/ shape-(2,)</span>
<span class="o">>>></span> <span class="n">relative_pos</span><span class="o">.</span><span class="n">shape</span>
<span class="p">(</span><span class="mi">10000</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span>
</pre></div>
</div>
<p><strong>Basic Broadcasting II: Solution</strong></p>
<p>Normalize <code class="docutils literal notranslate"><span class="pre">x</span></code> such that <em>each of its rows, within each sheet, will sum to a value of 1</em>. Make use of the sequential function <code class="docutils literal notranslate"><span class="pre">np.sum</span></code>, which should be called only once, and broadcast-division.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># a shape-(2, 3, 4) array</span>
<span class="o">>>></span> <span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[[</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">11</span><span class="p">]],</span>
<span class="o">...</span>
<span class="o">...</span> <span class="p">[[</span><span class="mi">12</span><span class="p">,</span> <span class="mi">13</span><span class="p">,</span> <span class="mi">14</span><span class="p">,</span> <span class="mi">15</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span><span class="mi">16</span><span class="p">,</span> <span class="mi">17</span><span class="p">,</span> <span class="mi">18</span><span class="p">,</span> <span class="mi">19</span><span class="p">],</span>
<span class="o">...</span> <span class="p">[</span><span class="mi">20</span><span class="p">,</span> <span class="mi">21</span><span class="p">,</span> <span class="mi">22</span><span class="p">,</span> <span class="mi">23</span><span class="p">]]])</span>
<span class="c1"># sum along each of the three rows within each sheet</span>
<span class="o">>>></span> <span class="n">summed_rows</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
<span class="o">>>></span> <span class="n">summed_rows</span>
<span class="n">array</span><span class="p">([[</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">22</span><span class="p">,</span> <span class="mi">38</span><span class="p">],</span>
<span class="p">[</span><span class="mi">54</span><span class="p">,</span> <span class="mi">70</span><span class="p">,</span> <span class="mi">86</span><span class="p">]])</span>
<span class="c1"># this shape-(2, 3) array can be broadcast-divided</span>
<span class="c1"># along the sheets and rows of `x`, if we insert a size-1 axis</span>
<span class="c1"># at dimension-2 of the summed array, where the columns used to</span>
<span class="c1"># be</span>
<span class="o">>>></span> <span class="n">x_norm</span> <span class="o">=</span> <span class="n">x</span> <span class="o">/</span> <span class="n">summed_rows</span><span class="p">[:,</span> <span class="p">:,</span> <span class="n">np</span><span class="o">.</span><span class="n">newaxis</span><span class="p">]</span>
<span class="c1"># verifying the solution</span>
<span class="o">>>></span> <span class="n">x_norm</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
<span class="n">array</span><span class="p">([[</span><span class="mf">1.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">],</span>
<span class="p">[</span><span class="mf">1.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">]])</span>
</pre></div>
</div>
<p><strong>Basic Broadcasting III: Solution</strong></p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># a collection of 500 48x48 RGB images</span>
<span class="o">>>></span> <span class="n">images</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">500</span><span class="p">,</span> <span class="mi">48</span><span class="p">,</span> <span class="mi">48</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
<span class="c1"># finding the max-value within each color-channel of each image</span>
<span class="o">>>></span> <span class="n">max_vals</span> <span class="o">=</span> <span class="n">images</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">))</span>
<span class="o">>>></span> <span class="n">max_vals</span><span class="o">.</span><span class="n">shape</span>
<span class="p">(</span><span class="mi">500</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
<span class="c1"># we can insert size-1 dimensions so that we can</span>
<span class="c1"># broadcast-divide these max-values with</span>
<span class="c1"># the pixels of the images.</span>
<span class="c1"># broadcasting (500, 48, 48, 3) with (500, 1, 1, 3)</span>
<span class="o">>>></span> <span class="n">normed_images</span> <span class="o">=</span> <span class="n">images</span> <span class="o">/</span> <span class="n">max_vals</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">500</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
<span class="c1"># checking that all the max-values are 1</span>
<span class="o">>>></span> <span class="n">normed_images</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">))</span>
<span class="n">array</span><span class="p">([[</span> <span class="mf">1.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">],</span>
<span class="p">[</span> <span class="mf">1.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">],</span>
<span class="p">[</span> <span class="mf">1.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">],</span>
<span class="o">...</span><span class="p">,</span>
<span class="p">[</span> <span class="mf">1.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">],</span>
<span class="p">[</span> <span class="mf">1.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">],</span>
<span class="p">[</span> <span class="mf">1.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">]])</span>
<span class="c1"># a rigorous check</span>
<span class="o">>>></span> <span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">normed_images</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">))</span> <span class="o">==</span> <span class="mi">1</span><span class="p">)</span>
<span class="kc">True</span>
</pre></div>
</div>
<p><strong>Checking the equivalence of the three pairwise distance functions: Solution</strong></p>
<p><code class="docutils literal notranslate"><span class="pre">numpy.allclose</span></code> returns <code class="docutils literal notranslate"><span class="pre">True</span></code> if all pairwise elements between two arrays are almost-equal to one another.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span> <span class="mf">8.54</span><span class="p">,</span> <span class="mf">1.54</span><span class="p">,</span> <span class="mf">8.12</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span> <span class="mf">3.13</span><span class="p">,</span> <span class="mf">8.76</span><span class="p">,</span> <span class="mf">5.29</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span> <span class="mf">7.73</span><span class="p">,</span> <span class="mf">6.71</span><span class="p">,</span> <span class="mf">1.31</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span> <span class="mf">6.44</span><span class="p">,</span> <span class="mf">9.64</span><span class="p">,</span> <span class="mf">8.44</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span> <span class="mf">7.27</span><span class="p">,</span> <span class="mf">8.42</span><span class="p">,</span> <span class="mf">5.27</span><span class="p">]])</span>
<span class="gp">>>> </span><span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span> <span class="mf">8.65</span><span class="p">,</span> <span class="mf">0.27</span><span class="p">,</span> <span class="mf">4.67</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span> <span class="mf">7.73</span><span class="p">,</span> <span class="mf">7.26</span><span class="p">,</span> <span class="mf">1.95</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span> <span class="mf">1.27</span><span class="p">,</span> <span class="mf">7.27</span><span class="p">,</span> <span class="mf">3.59</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span> <span class="mf">4.05</span><span class="p">,</span> <span class="mf">5.16</span><span class="p">,</span> <span class="mf">3.53</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span> <span class="mf">4.77</span><span class="p">,</span> <span class="mf">6.48</span><span class="p">,</span> <span class="mf">8.01</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span> <span class="mf">7.85</span><span class="p">,</span> <span class="mf">6.68</span><span class="p">,</span> <span class="mf">6.13</span><span class="p">]])</span>
<span class="gp">>>> </span><span class="n">np</span><span class="o">.</span><span class="n">allclose</span><span class="p">(</span><span class="n">pairwise_dists_looped</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">),</span> <span class="n">pairwise_dists_crude</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">))</span>
<span class="go">True</span>
<span class="gp">>>> </span><span class="n">np</span><span class="o">.</span><span class="n">allclose</span><span class="p">(</span><span class="n">pairwise_dists_crude</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">),</span> <span class="n">pairwise_dists</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">))</span>
<span class="go">True</span>
</pre></div>
</div>
</div>
</div>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="VectorizedOperations.html" class="btn btn-neutral float-left" title="“Vectorized” Operations: Optimized Computations on NumPy Arrays" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<a href="BasicIndexing.html" class="btn btn-neutral float-right" title="Introducing Basic and Advanced Indexing" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>© Copyright 2021, Ryan Soklaski.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>