/
bidi_examples.html
604 lines (536 loc) · 51.3 KB
/
bidi_examples.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>Inline bidi markup examples</title>
<meta name="description" content="Provides background on the Unicode bidirectional algorithm and inline markup to help you implement Arabic, Hebrew and other right-to-left scripts in markup." />
<script>
var f = { }
// AUTHORS should fill in these assignments:
f.directory = 'articles/inline-bidi-markup'+'/'; // the name of the directory this file is in
f.filename = 'index'; // the file name WITHOUT extensions
f.authors = 'Richard Ishida, W3C, Aharon Lanin, Google'; // author(s) and affiliations
f.previousauthors = ''; // as above
f.modifiers = ''; // people making substantive changes, and their affiliation
f.searchString = 'article-inline-bidi-markup'; // blog search string - usually the filename without extensions
f.firstPubDate = '2003-09-29'; // date of the first publication of the document (after review)
f.lastSubstUpdate = { date:'2021-06-25', time:'12:43'} // date and time of latest substantive changes to this document
f.status = 'published'; // should be one of draft, review, published, or notreviewed
f.path = '../../' // what you need to prepend to a URL to get to the /International directory
// AUTHORS AND TRANSLATORS should fill in these assignments:
f.thisVersion = { date:'2021-06-25', time:'12:43'} // date and time of latest edits to this document/translation
f.contributors = ''; // people providing useful contributions or feedback during review or at other times
// also make sure that the lang attribute on the html tag is correct!
// TRANSLATORS should fill in these assignments:
f.translators = 'xxxNAME, ORG'; // translator(s) and their affiliation - a elements allowed, but use double quotes for attributes
f.breadcrumb = 'direction';
</script>
<script src="index-data/translations.js"> </script>
<script src="../../javascript/doc-structure/article-dt.js"> </script>
<script src="../../javascript/boilerplate-text/boilerplate-en.js"> </script><!--TRANSLATORS must change -en to the subtag for their language! -->
<script src="../../javascript/doc-structure/article.js"> </script>
<script src="../../javascript/articletoc-html5.js"></script>
<link rel="stylesheet" href="../../style/article-2016.css" />
<style>
.injectionexamples {
margin: 0 0 0px 0;
}
.injectionexamples td {
text-align: left;
border: 0;
padding-bottom: 12px;
font-size: 90%;
}
ins {
background-color: #FF0
}
del {
color: #999;
}
li ol {
margin-right: 20px;
}
.example {
margin-bottom: 2em;
}
.hide {
background-color: #FCC;
}
.example1 {
margin-bottom: 2em;
}
</style>
</head>
<body>
<header>
<nav id="mainNavigation"></nav><script>document.getElementById('mainNavigation').innerHTML = mainNavigation</script>
<h1>Inline bidi markup examples</h1>
</header>
<div>
<div id="audience">
<!--p><span id="intendedAudience" class="leadin">Intended audience:</span> content developers working with right-to-left scripts, HTML and SVG coders (using editors or scripting), script developers (PHP, JSP, etc.), schema developers (DTDs, XML Schema, RelaxNG, etc.), and anyone who is struggling to understand how to make their mixed direction text look right in markup.</p-->
<div id="updateInfo"></div><script>document.getElementById('updateInfo').innerHTML = g.updated</script>
</div>
<div class="insideinfonote noprint">
<p class="info">Many examples in this document are shown as images to avoid problems for those with a browser that doesn't produce what was intended or doesn't have non-ASCII fonts.</p>
<p>Code samples containing Arabic and Hebrew text <a class="print" href="http://www.w3.org/TR/i18n-html-tech-bidi/#bidisource"> may be displayed in different ways</a> depending on which editor is used. In this article right-to-left text in code samples is represented by UPPERCASE TRANSLATIONS, and left-to-right text by lowercase. All text in code samples reflects the direction of characters as stored in memory, rather than the displayed result. The original version of text in uppercase translations would be read from right-to-left.</p>
<p> To see the full source, click on the "Test in your browser" links and view the source of the page that displays.</p>
</div>
<p>It is common for content in Arabic, Hebrew, and other languages that use right-to-left scripts to include numerals or include text from other scripts. Both of these typically flow left-to-right within the overall right-to-left context. </p>
<p>This article provides a series of worked examples in HTML that support the advice in the companion article, <cite><a class="print" href="/International/articles/inline-bidi-markup/">Inline markup and bidirectional text in HTML</a></cite>.</p>
<p>It also describes a number of elements, attributes and characters that are commonly used for managing text direction in HTML.</p>
</div>
<section>
<h2 id="aboutmarkup"><a href="#aboutmarkup">Useful markup and control codes</a></h2>
<section>
<h3 id="dirattribute"><a href="#dirattribute">The <code translate="no" class="kw">dir</code> attribute</a></h3>
<div class="sidenoteGroup">
<p>The <code translate="no" class="kw">dir</code> attribute sets the base direction for the content of an element. </p>
</div>
<p>To set the default direction of the whole HTML document to right-to-left, add <code>dir="rtl"</code> to the <code translate="no" class="kw">html</code> tag. This will result in all elements in the document inheriting a base direction of RTL.</p>
<p> You can change the base direction for content within a page by surrounding that content with an element and adding a <code translate="no" class="kw">dir</code> attribute to indicate the desired direction. </p>
<p>In principle, the right thing to do for every opposite-direction phrase is to set its base direction by using the <code translate="no" class="kw">dir</code> attribute on an element tightly wrapping the phrase.</p>
<div class="sidenoteGroup">
<p>The <code translate="no" class="kw">dir</code> attribute also isolates the content of the element from the content surrounding it in terms of the bidi algorithm. Wrapping the opposite-direction phrases in an element with a <code translate="no" class="kw">dir</code> attribute, helps address some of bidi problems; adding isolation helps resolve some more.</p>
<aside class="implnote">
<p class="impl"><img src="../../icons/firefox_16x16.png" width="16" height="16" alt="Firefox" title="Firefox"/> <img src="../../icons/chrome_16x16.png" width="16" height="16" alt="Chrome" title="Chrome"/> <img src="../../icons/safari_16x16.png" width="16" height="16" alt="Safari" title="Safari"/> <img src="../../icons/edge_16x16.png" width="16" height="16" alt="Edge" title="Edge"/> <img src="../../icons/ok.png" alt="OK" style="margin-left: 0.5em;"/></p>
<p><a target="_blank" href="https://www.w3.org/International/i18n-tests/results/the-dir-attribute-isolation">See test results for major browsers</a>.</p>
</aside>
</div>
<p>Check out the worked examples below to see how this works.</p>
</section>
<section>
<h3 id="lrmrlm"><a href="#lrmrlm">LRM/RLM</a></h3>
<div class="sidenoteGroup">
<p>The visual order in which text is displayed can sometimes be modified using two invisible Unicode control characters: LRM (<span class="uname">U+200E LEFT-TO-RIGHT MARK</span>) which can be added to the source text using the character itself or the escapes <code>&#x200E;</code> or <code>&lrm;</code>, and RLM (<span class="uname">U+200F RIGHT-TO-LEFT MARK</span>), for which the escapes are <code>&#x200F;</code> or <code>&rlm;)</code>. Each has the strong type indicated by its name, like an A or an א, but is invisible.</p>
<aside class="implnote">
<p class="impl"><img src="../../icons/firefox_16x16.png" width="16" height="16" alt="Firefox" title="Firefox"/> <img src="../../icons/chrome_16x16.png" width="16" height="16" alt="Chrome" title="Chrome"/> <img src="../../icons/safari_16x16.png" width="16" height="16" alt="Safari" title="Safari"/> <img src="../../icons/edge_16x16.png" width="16" height="16" alt="Edge" title="Edge"/> <img src="../../icons/ok.png" alt="OK" style="margin-left: 0.5em;"/></p>
</aside>
</div>
<p>One use of LRM and RLM is to <em>extend</em> a directional run through neutral or weak characters at the start or end of an opposite-direction phrase, by putting a mark of the same direction as the phrase on the other side of those neutral or weak characters. You can see an example of how it works in the advanced usage notes for <a href="#usecase1">use case 1</a> below.</p>
<p>Another use is to separate an opposite-direction phrase from some neighboring but independent text that would otherwise be incorrectly treated as the same directional run (see <a href="#usecase3">use case 3</a> for a good example). To do this you can put between them a directional mark with the same directionality as the overall context.</p>
<p>In HTML, where the <code translate="no" class="kw">dir</code> attribute is isolating, both cases are addressed by adding the <code translate="no" class="kw">dir</code> attribute to an element wrapping the opposite-direction phrase, so there may be no need to use LRM/RLM. See below for details.</p>
</section>
<section>
<h3 id="dirauto"><a href="#dirauto">dir="auto"</a></h3>
<div class="sidenoteGroup">
<p>HTML addresses another need: text dropped into a page, say from a database, when you don't know its base direction. Before HTML5, you could only set the <code translate="no" class="kw">dir</code> attribute to <code translate="no" class="kw">ltr</code> or <code translate="no" class="kw">rtl</code>, and had to somehow determine yourself which of them was appropriate. </p>
<aside class="implnote">
<p class="impl"><img src="../../icons/firefox_16x16.png" width="16" height="16" alt="Firefox" title="Firefox"/> <img src="../../icons/chrome_16x16.png" width="16" height="16" alt="Chrome" title="Chrome"/> <img src="../../icons/safari_16x16.png" width="16" height="16" alt="Safari" title="Safari"/> <img src="../../icons/edge_16x16.png" width="16" height="16" alt="Edge" title="Edge"/> <img src="../../icons/ok.png" alt="OK" style="margin-left: 0.5em;"/></p>
<p><a target="_blank" href="https://www.w3.org/International/i18n-tests/results/the-dir-attribute-auto">See test results for major browsers</a>.</p>
</aside>
</div>
<p>HTML5 provided a new value for the <code translate="no" class="kw">dir</code> attribute: <code translate="no" class="kw">auto</code>. The <code translate="no" class="kw">auto</code> value tells the browser to look at the first strongly typed character in the element. If it's a right-to-left typed character such as a Hebrew or Arabic letter, the element will get a direction of <code translate="no" class="kw">rtl</code>. If it's, say, a Latin character, the direction will be <code translate="no" class="kw">ltr</code>.</p>
<p>There are corner cases where this may not give the desired outcome, but it should usually produce the desired result.</p>
<p>Note that the browser ignores any neutral or weak characters at the beginning of the text when looking for the first strong character. It also ignores anything inside a <code translate="no" class="kw">bdi</code> element or an element with a <code translate="no" class="kw">dir</code> tag of its own, including <code translate="no" class="kw">auto</code>.</p>
<p>Like any other use of the <code translate="no" class="kw">dir</code> attribute in HTML, <code>dir="auto"</code> also directionally isolates its content from its surroundings.<!-- The effect of the isolation is that you do not need to use LRM and RLM to separate a markup-wrapped opposite-direction phrase from a number or a logically separate opposite-direction phrase that happens to follow it. Thus, if you already have an element like <code translate="no" class="kw">a</code> or <code translate="no" class="kw">cite</code> wrapping a phrase of unknown direction, all your bidi wrapping needs are accomplished by adding <code>dir="auto"</code> on the existing element.--></p>
</section>
<section>
<h3 id="bdi"><a href="#bdi">The <code translate="no" class="kw">bdi</code> element</a></h3>
<div class="sidenoteGroup">
<p> The <code translate="no" class="kw">bdi</code> (bidirectional isolate) element is just like a <code translate="no" class="kw">span</code> that directionally isolates its content from the surrounding text, and uses first-strong heuristics to guess the base direction for its content. This is the same as using a <code class="kw" translate="no">dir</code> attribute set to <code class="kw" translate="no">auto</code> and wrapped around your content. It is most likely to be useful if you don't already have an element wrapping your content, as an alternative to adding a <code class="kw" translate="no">span</code> with the <code class="kw" translate="no">dir</code> attribute.</p>
<aside class="implnote">
<p class="impl"><img src="../../icons/firefox_16x16.png" width="16" height="16" alt="Firefox" title="Firefox"/> <img src="../../icons/chrome_16x16.png" width="16" height="16" alt="Chrome" title="Chrome"/> <img src="../../icons/safari_16x16.png" width="16" height="16" alt="Safari" title="Safari"/> <img src="../../icons/edge_16x16.png" width="16" height="16" alt="Edge" title="Edge"/> <img src="../../icons/ok.png" alt="OK" style="margin-left: 0.5em;"/></p>
<p><a target="_blank" href="https://www.w3.org/International/i18n-tests/results/the-bdi-element">See test results for major browsers</a>.</p>
</aside>
</div>
<!--p> Since it is actually quite rare not to want to isolate embedded phrases from its surroundings, <code translate="no" class="kw">bdi</code> (when the browser supports it) can be used instead of a <code translate="no" class="kw">span</code> for bidi-wrapping, and the use of LRMs and RLMs can be completely avoided.</p--></section>
<section>
<h3 id="bdo"><a href="#bdo">The <code translate="no" class="kw">bdo</code> element</a></h3>
<div class="sidenoteGroup">
<p>The <code class="kw" translate="no">bdo</code> (bidirectional override) element prevents the bidirectional algorithm from rearranging the sequence of characters it encloses, and allows you to display the sequence from right to left or from left to right in the order in which the characters are stored in memory. </p>
<aside class="implnote">
<p class="impl"><img src="../../icons/firefox_16x16.png" width="16" height="16" alt="Firefox" title="Firefox"/> <img src="../../icons/chrome_16x16.png" width="16" height="16" alt="Chrome" title="Chrome"/> <img src="../../icons/safari_16x16.png" width="16" height="16" alt="Safari" title="Safari"/> <img src="../../icons/edge_16x16.png" width="16" height="16" alt="Edge" title="Edge"/> <img src="../../icons/ok.png" alt="OK" style="margin-left: 0.5em;"/></p>
<p><a target="_blank" href="https://www.w3.org/International/i18n-tests/results/the-bdo-element">See test results for major browsers</a>.</p>
</aside>
</div>
<p>There are important use cases for <code class="kw" translate="no">bdo</code>, but they are rare. For more information see <a href="index#override">Overriding the algorithm</a>. <strong>Do not confuse this element with <code class="kw" translate="no">bdi</code>, and do not use it for managing normal bidi text.</strong></p>
</section>
<section>
<h3 id="cssshim"><a href="#cssshim">The CSS shim</a></h3>
<div class="sidenoteGroup">
<p>The CSS shim can be applied when a browser supports the CSS needed to isolate text, but doesn't support isolation for the <code class="kw" translate="no">dir</code> attribute. It was particularly useful during the transition period, while several major browsers had still not implemented isolation for the <code class="kw" translate="no">dir</code> attribute but did support the necessary CSS.</p>
<aside class="implnote">
<p class="impl"><img src="../../icons/firefox_16x16.png" width="16" height="16" alt="Firefox" title="Firefox"/> <img src="../../icons/chrome_16x16.png" width="16" height="16" alt="Chrome" title="Chrome"/> <img src="../../icons/safari_16x16.png" width="16" height="16" alt="Safari" title="Safari"/> <img src="../../icons/edge_16x16.png" width="16" height="16" alt="Edge" title="Edge"/> <img src="../../icons/ok.png" alt="OK" style="margin-left: 0.5em;"/></p>
<p><a target="_blank" href="https://www.w3.org/International/i18n-tests/results/the-dir-attribute-isolation#shim">See test results for major browsers</a>.</p>
</aside>
</div>
<p>Browsers that don't yet support the CSS will simply behave in the same way as before, but most recent versions of major browsers do support the desired behavior already.</p>
<p>The CSS shim is as follows: </p>
<figure>
<pre>[dir='ltr'], [dir='rtl'] {
unicode-bidi: -webkit-isolate;
unicode-bidi: -moz-isolate;
unicode-bidi: -ms-isolate;
unicode-bidi: isolate;
}
bdo[dir='ltr'], bdo[dir='rtl'] {
unicode-bidi: bidi-override;
unicode-bidi: -webkit-isolate-override;
unicode-bidi: -moz-isolate-override;
unicode-bidi: -ms-isolate-override;
unicode-bidi: isolate-override;
}</pre>
</figure>
<p>At the time of writing, all browser versions that support isolation in CSS also support the <code translate="no" class="kw">bdi</code> element.</p> </section>
</section>
<section>
<h2 id="staticexamples"><a href="#staticexamples">Worked examples for static use cases</a></h2>
<p>In this section we look at how to write code that addresses various use cases where the content is written by the author. The section following this deals with use cases where content is injected into the page.</p>
<section>
<h3 id="usecase1"><a href="#usecase1">Use case 1: Nested bidi</a></h3>
<p> A right-to-left book title is <strong>embedded</strong> in a left-to-right context, and the book title itself contains an embedded left-to-right phrase. Here is the code without any additional bidi markup:</p>
<figure class="example">
<div class="badcode"><span><img src="../../icons/dontcopy.png" alt=" "> Bad code. Don't copy!</span></div>
<p><code><p>the title is "AN INTRODUCTION TO c++" in arabic.</p></code></p>
</figure>
<p>What one would expect to see is:</p>
<figure>
<p><img src="index-data/intro-to-C-right.png" alt="Displayed result of previous code" /></p>
</figure>
<p>Unfortunately, the bidirectional algorithm cannot tell where the boundaries of the nested changes in base direction should be. The result, without help in the markup, is:</p>
<figure>
<p><img src="index-data/intro-to-C-wrong.png" alt="Displayed result of previous code" /></p>
</figure>
</section>
<section>
<h4 id="usecase1html5" class="notoc"><a href="#usecase1html5">Fixing use case 1</a></h4>
<p> To address this in HTML, if there is no other markup around the opposite-direction phrases, wrap both in markup with<code> </code>the appropriate<code translate="no" class="kw"> dir</code> value. (Note, by the way, how the markup appears <em>inside</em> the quotation marks, which are part of the English text.)</p>
<figure class="example">
<p><code><p>the title is "<span dir="rtl">AN INTRODUCTION TO <span dir="ltr">c++</span></span>" in arabic.</p></code></p>
<div class="demolink"><a href="index-data/uc1.html" target="_blank">Test in your browser.</a></div>
</figure>
<p> It is important to take into account that each phrase is <em>nested</em>. Just wrapping the Arabic in one <code translate="no" class="kw">span</code> followed by a <code translate="no" class="kw">span</code> containing the C++ would result in no improvement at all.</p>
<div class="advancedusage">
<p><strong>advanced usage notes:</strong> Note that two elements with <code translate="no" class="kw">dir</code> are needed in this case. This is because there are two opposite-direction phrases. If only one was used, like this:</p>
<figure class="example">
<div class="badcode"><span><img src="../../icons/dontcopy.png" alt=" "> Bad code. Don't copy!</span></div>
<p><code><p>the title is "<span dir="rtl">AN INTRODUCTION TO c++</span>"</p></code></p>
</figure>
<p>the displayed text would be as shown below. This moves the C++ to the left, as needed, but the + signs appear on the wrong side of the C.</p>
<figure>
<p><img src="index-data/intro-to-C-attempt1.png" alt="Displayed result of previous code" /></p>
</figure>
<p>This fails because the "C++" is an opposite-direction (LTR) phrase within the title, ending in neutral characters and the phrase is now being displayed with an RTL base direction. The bidi algorithm has no way of knowing that the plus signs are part of an LTR phrase, not of the RTL context, and thus displays them to the left of the "C" instead of to its right.</p>
<p>To solve this problem, wrap the overall RTL phrase in a <code><span dir="rtl"></code>, and the LTR phrase nested inside it in its own <code><span dir="ltr"></code>, as shown.</p>
</div>
<p>If there is already suitable markup to surround the book title, such as a <code translate="no" class="kw">cite</code> element, add the <code translate="no" class="kw">dir</code> attribute to that.</p>
<figure class="example">
<p><code><p>the title is <cite dir="rtl">AN INTRODUCTION TO <span dir="ltr">c++</span></cite> in arabic.</p></code></p>
<div class="demolink"><a href="index-data/uc1_cite.html" target="_blank">Test in your browser.</a></div>
</figure>
<div class="advancedusage">
<p><strong>advanced usage notes:</strong> If the "C++" in this example was an ordinary Latin-script word, such as "Python" you wouldn't actually need to mark it up to get the right display. The bidi algorithm would take care of it. However marking up text in this way avoids you having to understand why these two cases are different, and having to work out which case applies for your content. </p>
<p>Similarly, if the title contained no embedded left-to-right text, you wouldn't actually need directional markup at all, but adding it avoids possible issues related to following inline text, such as where the text is edited to add a following number or another title, like this:</p>
<figure class="example1">
<p><code><p>the titles are <cite dir="rtl">AN INTRODUCTION TO ARABIC</cite>, <cite dir="rtl">FIRST STEPS IN URDU</cite>, and <cite dir="rtl">MASTERING HEBREW</cite>.</p></code></p>
</figure>
</div>
</section>
<section>
<h4 id="usecase1soln" class="notoc"><a href="#usecase1soln">Bulletproofing for legacy browsers</a></h4>
<p>The solution outlined for modern browsers will work equally well for legacy browsers.</p>
<div class="advancedusage">
<p><strong>advanced usage notes:</strong> As noted earlier, one use of LRM and RLM is to <em>extend</em> a directional run through neutral or weak characters at the start or end of an opposite-direction phrase, by putting a mark of the same direction as the phrase on the other side of those neutral or weak characters. For this example, instead of wrapping the "C++" in a <code><span dir="ltr"></code>, we could add <code>&lrm;</code> after the second plus:</p>
<figure class="example">
<p><code><p>the title is <cite dir="rtl">AN INTRODUCTION TO c++&lrm;</cite></p></code> </p>
</figure>
<p>The result is what we need:</p>
<figure>
<p><img src="index-data/intro-to-C-lrm.png" alt="Displayed result of previous code" /></p>
</figure>
<p>Because the LRM is a strongly left-to-right character, the neutral pluses are now between two strong left-to-right characters (the C and the LRM). They therefore also become left-to-right in direction, making a single directional run of the four characters.</p>
<p>Used this way, however, LRM and RLM are a bit like gotos in programming languages: a quick hack that, unlike the <code translate="no" class="kw">dir</code> attribute, says nothing about the structure of the text. And they simply cannot be used to deal with an opposite-direction phrase that happens to contain a nested phrase in the original direction, like our complete "Introduction to C++" example above. That may seem like an esoteric case, but it is surprisingly common when displaying right-to-left data in a left-to-right page, because the use of left-to-right words (like "C++") is not uncommon in right-to-left text. </p>
<p>So, if you don't want to analyze whether LRM and RLM can replace the use of the <code translate="no" class="kw">dir</code> attribute in your case, just use the <code translate="no" class="kw">dir</code> attribute.</p>
</div>
<!--div class="advancedusage">
<p><strong>advanced usage notes:</strong> Nevertheless, it turns out that LRM and RLM do have an essential function dealing with opposite-direction phrases in HTML4: separating an opposite-direction phrase from a number, or from a separate opposite-direction phrase that happens to follow it. It does this by putting between them a mark of the same direction as the context. When used this way, LRM and RLM do not replace the use of the <code translate="no" class="kw">dir</code> attribute, but augment it. See <a href="#usecase2">use case 2</a> and <a href="#usecase3">use case 3</a> below.</p>
</div-->
</section>
<section>
<h3 id="usecase2"><a href="#usecase2">Use case 2: Following numbers</a></h3>
<p> In the next example, the opposite-direction phrase is <strong>followed by a logically separate number</strong>. This is the code without any bidi markup:</p>
<figure class="example">
<div class="badcode"><span><img src="../../icons/dontcopy.png" alt=" "> Bad code. Don't copy!</span></div>
<p><code><p>we find the phrase 'INTERNATIONALIZATION ACTIVITY' 5 times on the page.</p></code></p>
</figure>
<p>You would expect to see:</p>
<figure>
<p><img src="index-data/number-following-right.png" alt="Displayed result of previous code" /></p>
</figure>
<p style="clear: both;">You would actually see:</p>
<figure>
<p><img src="index-data/number-following-wrong.png" alt="Displayed result of previous code" /></p>
</figure>
<p style="clear: both;">This happens because the bidi algorithm tells the browser to treat the "5″ as part of the Hebrew text. Although this is often appropriate, it not appropriate here. We need to find a way to say that the name and the number are separate things, ie. to isolate the inserted name from the number.</p>
</section>
<section>
<h4 id="uc2html5" class="notoc"><a href="#uc2html5">Fixing use case 2</a></h4>
<p> Wrap the opposite-direction phrase (the title) in markup and add the appropriate<code translate="no" class="kw"> dir</code> value. There is no need to add anything else, since the <code translate="no" class="kw">dir</code> attribute automatically isolates its content.</p>
<figure class="example">
<p><code><p>we find the phrase '<span dir="rtl">INTERNATIONALIZATION ACTIVITY</span>' 5 times on the page.</p></code></p>
<div class="demolink"><a href="index-data/uc2.html" target="_blank">Test in your browser.</a></div>
</figure>
<p>If there is already suitable markup to surround the book title, such as an <code translate="no" class="kw">a</code> element, add the <code translate="no" class="kw">dir</code> attribute to it.</p>
<figure class="example">
<p><code><p>we find the phrase '<a href="..." dir="rtl">INTERNATIONALIZATION ACTIVITY</a>' 5 times on the page.</p></code></p>
<div class="demolink"><a href="index-data/uc2_a.html" target="_blank">Test in your browser.</a></div>
</figure>
</section>
<section>
<h4 id="uc2html4" class="notoc"><a href="#uc2html4">Bulletproofing for legacy browsers</a></h4>
<p> For browsers where <code translate="no" class="kw">dir</code> doesn't isolate, you would fix this by not only adding the markup around the opposite direction, Hebrew text, but adding also an LRM character after it. That would prevent the number being associated with the right-to-left text.</p>
<figure class="example">
<p><code><p>we find the phrase '<span dir="rtl">INTERNATIONALIZATION ACTIVITY</span>' &lrm;5 times on the page.</p></code></p>
<div class="demolink"><a href="index-data/uc2_bulletproof.html" target="_blank">Test in your browser.</a></div>
</figure>
<p>If the search string was already tightly wrapped by an element, use that element tag to add the <code translate="no" class="kw">dir</code> attribute, and add the LRM character after it.</p>
<p>Of course, if the overall context is right-to-left, eg. Arabic/Hebrew/etc. text, and the book title was in English, you would need to add an RLM character rather than an LRM character.</p>
</section>
<section>
<h3 id="usecase3"><a href="#usecase3">Use case 3: Lists</a></h3>
<p> Neutrals between same directional runs can sometimes be misinterpreted by the bidi algorithm. In this use case we have several country names in Arabic listed in a LTR paragraph. This is an example of an opposite-direction phrase <strong>followed by another, but logically separate, opposite-direction phrase</strong>. Here is the source code without any bidi markup:</p>
<figure class="example">
<div class="badcode"><span><img src="../../icons/dontcopy.png" alt=" "> Bad code. Don't copy!</span></div>
<p><code><p>the names of these states in arabic are EGYPT, BAHRAIN and KUWAIT respectively.</p></code></p>
</figure>
<p>We expect to see the following:</p>
<figure>
<p><img src="index-data/comma-right.gif" alt="Egypt appears to the left of Bahrain." /></p>
</figure>
<p>In the actual result, the first two Arabic words are reversed and the intervening comma is moved to the right side of the space between the words.</p>
<figure>
<p><img src="index-data/comma-wrong.gif" alt="Bahrain appears to the left of Egypt." /></p>
</figure>
<p>The reason for the failure is that, with a strongly typed right-to-left (RTL) character on either side, the bidirectional algorithm
sees the neutral comma as part of the Arabic text. It is interpreting the first two Arabic words and the comma as a single directional run in Arabic. In fact it is part of the English text, and should mark the boundary between the two separate right-to-left directional runs in
Arabic. </p>
<p>The solution for this use case is similar to that for the previous use case, so we will keep the notes below brief, and assume that you have read the solutions for use cases 1 and 2. We will present just the default markup approach.</p>
</section>
<section>
<h4 id="uc3html5" class="notoc"><a href="#uc3html5">Fixing use case 3</a></h4>
<p> Simply wrap each Arabic word with markup and add the appropriate<code translate="no" class="kw"> dir</code> value.</p>
<figure class="example">
<p><code><p>the names of these states in arabic are <span dir="rtl">EGYPT</span>, <span dir="rtl">BAHRAIN</span> and <span dir="rtl">KUWAIT</span> respectively.</p></code></p>
<div class="demolink"><a href="index-data/uc3.html" target="_blank">Test in your browser.</a></div>
</figure>
<p>If there is already markup surrounding the Arabic text, such as an <code translate="no" class="kw">a</code> element, add the <code translate="no" class="kw">dir</code> attribute to it.</p>
<figure class="example">
<p><code><p>the names of these states in arabic are <a href="..." dir="rtl">EGYPT</a>, <a href="..." dir="rtl">BAHRAIN</a> and <a href="..." dir="rtl">KUWAIT</a> respectively.</p></code></p>
<div class="demolink"><a href="index-data/uc3_a.html" target="_blank">Test in your browser.</a></div>
</figure>
</section>
<section>
<h4 id="uc3html4" class="notoc"><a href="#uc3html4">Bulletproofing for legacy browsers</a></h4>
<p> Add markup around the Arabic text, but add also an LRM character after it whenever that text is followed by another opposite-direction phrase. Use an RLM character if the surrounding context is right-to-left.</p>
<figure class="example">
<p><code><p>the names of these states in arabic are <span dir="rtl">EGYPT</span>&lrm;, <span dir="rtl">BAHRAIN</span> and <span dir="rtl">KUWAIT</span> respectively.</p></code></p>
<div class="demolink"><a href="index-data/uc3_bulletproof.html" target="_blank">Test in your browser.</a></div>
</figure>
<p>As before, if the Arabic text was already tightly wrapped by an element, use that element tag to add the <code translate="no" class="kw">dir</code> attribute. </p>
</section>
</section>
<section>
<h2 id="dynamicexamples"><a href="#dynamicexamples">Worked examples for dynamic use cases</a></h2>
<p>In this section we will look at use cases that involve injecting content into a page at run time.</p>
<p>It is important to note that we cannot address markup <em>inside</em> the injected content. In all cases below, if the injected phrases contain embedded opposite-direction phrases themselves, these need to be already marked up when the phrase is injected into the page, either in the database, or added by scripting when the injected phrase is fetched. If this is not done, the injected text will look alright for simple cases, but may be problematic for more complex ones. </p>
<section>
<h3 id="uc4"><a href="#uc4">Use case 4: Nested bidi</a></h3>
<p> In the article <a class="print" href="https://www.w3.org/International/questions/qa-html-dir"><cite>Structural markup and right-to-left text in HTML</cite></a> there is an example of a page for an online book store that carries books in many languages and needs to display the original book titles regardless of the language of the user interface. Thus, a Hebrew or Arabic book title may appear in an English interface, and vice-versa.</p>
<p>Let us suppose that you searched for the book <cite lang="he" dir="rtl" style="white-space:nowrap;">הצהרות קידוד תװי CSS</cite> and and let's further suppose that that book wasn't found. The bookstore might generate a message that says so. <span style="clear: both;">The image below shows what one would expect to see. </span></p>
<figure>
<p><img src="index-data/book-result-correct.png" alt="Book not found message."></p>
</figure>
<p style="clear: both;">Note how the 'CSS' is to the left of the Hebrew text because it is part of the book title. However with the following source code ...</p>
<figure class="example">
<div class="badcode"><span><img src="../../icons/dontcopy.png" alt=" "> Bad code. Don't copy!</span></div>
<p style="text-align:left;"><code><p>your search - <cite class="booktitle">CHARACTER ENCODING IN css</cite> - did not match any documents.</p></code></p>
</figure>
<p>... here is the actual result. Note how the 'CSS' is now on the right of the Hebrew text.</p>
<figure>
<p><img src="index-data/book-result-incorrect.png" alt="Book not found message."></p>
</figure>
</section>
<section>
<h4 id="uc4html5" class="notoc"><a href="#uc4html5">Fixing use case 4</a></h4>
<p><span class="leadin"></span> The default rule when there is no other element around the injected text, is to wrap it in <code translate="no" class="kw">bdi</code>.</p>
<figure class="example">
<p style="text-align:left;"><code><p>your search - <bdi><?php echo $theString; ?></bdi> - did not match any documents.</p></code></p>
<div class="demolink"><a href="index-data/uc4.html" target="_blank">Test in your browser.</a></div>
</figure>
<p>The <code translate="no" class="kw">bdi</code> tag automatically assigns a direction based on the first strong character in the injected string.</p>
<div class="advancedusage">
<p><strong>advanced usage notes:</strong>It is possible that the search string in this example begins with a strong left-to-right character, for example, if the book title that we are searching for begins with 'CSS', rather than ending with it. In that case, there is not much we can do by default in the markup. To cover this case you would have to use scripting to detect the direction of the string as a whole and apply that to the markup. </p>
</div>
<p>If there <em>is</em> another element around the injected text, use <code>dir="auto"</code>.</p>
<figure class="example">
<p style="text-align:left;"><code><p>your search - <cite dir="auto"><?php echo $theString; ?></cite> - did not match any documents.</p></code></p>
<div class="demolink"><a href="index-data/uc4_cite.html" target="_blank">Test in your browser.</a></div>
</figure>
</section>
<section>
<h4 id="uc4html4" class="notoc"><a href="#uc4html4">Bulletproofing for legacy browsers</a></h4>
<p> Without the expected HTML markup behavior we can't really address this use case using markup, since we need to know in advance the direction of the text. This can only be achieved by knowing the direction of or examining the injected phrase before insertion, and applying the appropriate directional information by scripting.</p>
</section>
<section>
<h3 id="uc5"><a href="#uc5">Use case 5: Following numbers</a></h3>
<p> Here's an example where the names of restaurants are added to a page from a database and followed by a number. You don't know in advance the directionality of the injected text. This is the code produced by the script that injects the phrases, without bidi markup:</p>
<figure class="example">
<div class="badcode"><span><img src="../../icons/dontcopy.png" alt=" "> Bad code. Don't copy!</span></div>
<pre><p><span class="name">aroma</span> - 3 reviews</p>
<p><span class="name">PURPLE PIZZA</span> - 5 reviews</p>
<p><span class="name">PURPLE PIZZA roma</span> - 3 reviews</p></pre>
</figure>
<p>And here's what one would expect to see, and what you'd actually see.</p>
<figure style="width: 230px; float: left; text-align: center; display: block; margin-right: 10px; margin-left: 10%;">
<div class="figcaption">What it should look like.</div>
<p><img src="index-data/restaurant-right.png" title="AZZIP ELPRUP - 5 reviews" alt="AZZIP ELPRUP - 5 reviews"></p>
</figure>
<figure style="width: 230px; float: left; text-align: center; display: block; margin-left: 10px; margin-right: 10px;">
<div class="figcaption">What it actually looks like.</div>
<p><img src="index-data/restaurant-wrong.png" title="5 - AZZIP ELPRUP reviews" alt="5 - AZZIP ELPRUP reviews"></p>
</figure>
<p style="clear: both;">The problem with the second restaurant name arises because the browser thinks that the " – 5″ is part of the Hebrew text. This is what the Unicode Bidi Algorithm tells it to do, and usually it is correct. Not here though. We need to find a way to say that the name and the number are separate things, ie. to isolate the inserted name from the number.</p>
<p style="clear: both;">In the third restaurant name the number is back in the right place, but the word 'Roma' is part of the Hebrew name, and should appear to the left of the Hebrew text. In other words, we need to apply a base direction of RTL to the whole of the injected text.</p>
</section>
<section>
<h4 id="uc5html5" class="notoc"><a href="#uc5html5">Fixing use case 5</a></h4>
<p><span class="leadin"></span> Once again, the default rule when there is no other element around the injected text, is to wrap it in <code translate="no" class="kw">bdi</code>. The <code translate="no" class="kw">bdi</code> element automatically isolates the injected phrase from the number, and sets the direction for the phrase based on its first strong character.</p>
<figure class="example">
<p><code class="code1">foreach $restaurant</code> <code class="code2">echo "<p><bdi>$restaurant['name']</bdi> - $restaurant['count'] reviews</p>";</code> </p>
<div class="demolink"><a href="index-data/uc5.html" target="_blank">Test in your browser.</a></div>
</figure>
<p>The <code translate="no" class="kw">bdi</code> tag automatically assigns a direction based on the first strong character in the injected string.</p>
<p>You'll notice that the example above puts <code translate="no" class="kw">bdi</code> around the name <samp>Aroma</samp> too. Of course, you don't actually need that, but it won't do any harm. On the other hand, it simplifies the necessary script code, and means you can handle any name that comes out of the database, whatever script it is in.</p>
<p>If there <em>is</em> another element around the injected text, use <code>dir="auto"</code>.</p>
<figure class="example">
<p><code class="code1">foreach $restaurant</code> <code class="code2">echo "<p><a href='...' dir='auto' class='name'>$restaurant['name']</a> - $restaurant['count'] reviews</p>";</code> </p>
<div class="demolink"><a href="index-data/uc5_auto.html" target="_blank">Test in your browser.</a></div>
</figure>
</section>
<section>
<h4 id="uc5html4" class="notoc"><a href="#uc5html4">Bulletproofing for legacy browsers</a></h4>
<p> Again, without HTML5 markup behavior, all we can do is add a LRM character after the injected phrase, to ensure that it is isolated from the number. This would be sufficient to correctly render the second item in the list, because it is a very simple case, with no embedded opposite-direction phrases or neutral characters. The third case, however, will not work so well, since the base direction has to be set to right-to-left for the word <samp>'Roma'</samp> to appear on the left. This can only be properly rendered if the injected phrase has markup added to it before insertion.</p>
<p>The code would look something like this.</p>
<figure class="example">
<p><code class="code1">foreach $restaurant</code> <code class="code2">echo "<p><span class='name' dir='auto'>$restaurant['name']</span>&lrm; - $restaurant['count'] reviews</p>";</code> </p>
<div class="demolink"><a href="index-data/uc5_bulletproof.html" target="_blank">Test in your browser.</a></div>
</figure>
</section>
</section>
<section>
<h2 id="additionalexamples"><a href="#additionalexamples">Additional examples</a></h2>
<section>
<h3 id="usecase6"><a href="#usecase6">Use case 6: Punctuation at the end of an opposite-direction phrase</a></h3>
<p> It is a very common situation for punctuation or some other neutral character to appear at the end of an opposite direction phrase and belong with that phrase.</p>
<p>Unfortunately, such neutrals between different directional runs are typically misinterpreted unless there is additional bidi markup. In the following example, the exclamation mark should appear at the end of the Arabic text, ie. to
the left, like this:</p>
<figure>
<p><img src="index-data/exclamation-right.gif" alt="An exclamation mark appearing to the left of Arabic text." /></p>
</figure>
<p>Unfortunately, if we rely solely on the bidirectional algorithm we see this:</p>
<figure>
<p><img src="index-data/exclamation-wrong.gif" alt="An exclamation mark appearing to the right of Arabic text." /></p>
</figure>
<p>Given our understanding of the bidi algorithm we can easily understand why this happened. Because the exclamation mark was typed in
between the last RTL letter 'ب' (on the left) and the LTR letter 'i' (of the word 'in') its directionality is determined by the base direction of
the paragraph, ie. LTR in this case. </p>
<p>Because the exclamation mark is seen as LTR it joins the directional run that includes the text
'in Arabic'.</p>
</section>
<section>
<h4 id="uc6html4" class="notoc"><a href="#uc6html4">Fixing use case 6 when the direction is known</a></h4>
<p>The general solution mentioned above works fine: just put the opposite-direction phrase in an element with a <code translate="no" class="kw">dir</code> attribute. If there isn't already an element present, use a <code translate="no" class="kw">span</code>.</p>
<figure class="example">
<p><code><p>the title is "<cite dir="rtl" lang="ar">INTERNATIONALIZATION ACTIVITY!</cite>" in arabic.</p></code></p>
<div class="demolink"><a href="index-data/uc6_static.html" target="_blank">Test in your browser.</a></div>
</figure>
<div class="advancedusage">
<p><strong>advanced usage notes:</strong>You could also simply place an RLM after the exclamation mark, but we have already discussed earlier why that is a less ideal fix. Note, also, that when using this solution, without markup, the Arabic text is not marked up for language or styling. Adding markup around the embedded title is probably a better way to solve the problem.</p>
</div>
</section>
<section>
<h4 id="uc6html5" class="notoc"><a href="#uc6html5">Fixing use case 6 for injected text</a></h4>
<p> Use <code translate="no" class="kw">bdi</code> if there isn't already a surrounding element, otherwise put a <code>dir="auto"</code> on the surrounding element.</p>
<figure class="example">
<p><code><p>the title is "<bdi lang="ar">INTERNATIONALIZATION ACTIVITY!</bdi>" in arabic.</p></code> </p>
<p><code><p>the title is "<cite dir="auto" lang="ar">INTERNATIONALIZATION ACTIVITY!</cite>" in arabic.</p></code></p>
<div class="demolink"><a href="index-data/uc6_dynamic.html" target="_blank">Test in your browser.</a></div>
<!--p><code><p>the title is "<cite lang="ar"><bdi dir="rtl">INTERNATIONALIZATION ACTIVITY!</bdi></cite>" in arabic.</p></code></p-->
</figure>
</section>
<section>
<h3 id="usecase7"><a href="#usecase7">Use case 7: Telephone numbers, MAC addresses, etc.</a></h3>
<p> The picture below shows the expected result of displaying a telephone number in a right-to-left context, where the area code is surrounded by parentheses, and where the number appears at the beginning of a line or after some right-to-left text.</p>
<figure>
<p style="text-align: right;"><img src="index-data/phonenumber-correct.png" alt="Telephone number correctly ordered."></p>
</figure>
<p>The next picture shows what you actually see, if you rely solely on the bidi algorithm.</p>
<figure>
<p style="text-align: right;"><img src="index-data/phonenumber-incorrect.png" alt="Telephone number incorrectly ordered."></p>
</figure>
<p>Because these are numbers, the order applied by the bidirectional algorithm is slightly different from what we've seen before, but the fix is essentially the same. </p>
<p> Here is another, somewhat more problematic example of the same thing. The picture below shows a MAC address number as you would expect to see it displayed in a right-to-left context. The sequence <samp>01:02:aa:4a:bb:06</samp> looks exactly the same as it would in a left-to-right context.</p>
<figure>
<p style="text-align: right;"><img src="index-data/mac-correct.png" alt="MAC address correctly ordered."></p>
</figure>
<p>Here, however, is what you will see when relying solely on the bidirectional algorithm.</p>
<figure>
<p style="text-align: right;"><img src="index-data/mac-incorrect.png" alt="MAC address incorrectly ordered."></p>
</figure>
<p>This is particularly worrisome, since it's not obvious when the order is incorrect. Even if you did know it was incorrect, it is not at all clear how it should be read.</p>
<p>Although there are more characters involved, this problem is caused because the bidirectional algorithm assumes that the initial run of numbers (and colons, since they are neutral) are associated with the preceding Hebrew text, rather than part of the MAC address.</p>
<p>This example indicates that you should always wrap MAC addresses, and similar numbers, with directional information.</p>
</section>
<section>
<h4 id="uc7html4" class="notoc"><a href="#uc7html4">Fixing use case 7 when the direction is known</a></h4>
<p> The solution is the same. Put the opposite-direction phrase in an element with a <code translate="no" class="kw">dir</code> attribute. If there isn't already an element present, use a <code translate="no" class="kw">span</code>. The following code would be used in an overall right-to-left context.</p>
<figure class="example">
<p><code><p>... <span dir="ltr">(012) 345 6789</span> ...</p></code></p>
<p><code><p>כתובת <span dir="ltr">01:02:aa:4a:bb:06</span> ...</p></code></p>
<div class="demolink"><a href="index-data/uc7_static.html" target="_blank">Test in your browser.</a></div>
</figure>
</section>
<section>
<h4 id="uc7html5" class="notoc"><a href="#uc7html5">Fixing use case 7 for injected text</a></h4>
<p>Use <code translate="no" class="kw">bdi</code> if there isn't already a surrounding element, or put <code>dir="auto"</code> on a surrounding element. We just show the simplest case here. The following code would be used in an overall right-to-left context.</p>
<figure class="example">
<p><code><p>...<bdi>(012) 345 6789</bdi> ...</p></code></p>
<p><code><p>כתובת <bdi>01:02:aa:4a:bb:06</bdi> ...</p></code> </p>
<div class="demolink"><a href="index-data/uc7_dynamic.html" target="_blank">Test in your browser.</a></div>
</figure>
<div class="advancedusage">
<p><strong>advanced usage notes:</strong>You could also solve both of these cases by simply inserting an RLM immediately before the number. Adding markup around the number is probably a safer way to solve the problem.</p>
</div>
</section>
</section>
<section>
<h2 id="endlinks"><a href="#endlinks">Further reading</a></h2>
<aside class="section" id="survey"> </aside><script>document.getElementById('survey').innerHTML = g.survey</script>
<ul id="full-links">
<li>
<p>Tutorial, <a href="https://www.w3.org/International/tutorials/bidi-xhtml/"><cite>Creating HTML Pages in Arabic, Hebrew and Other Right-to-left Scripts</cite></a></p>
</li>
<li>
<p>Related links, <cite>Authoring HTML & CSS</cite></p>
<ul>
<li><a href="https://www.w3.org/International/techniques/authoring-html#direction">Text direction</a></li>
<li><a href="https://www.w3.org/International/techniques/authoring-html#inline">Mixing text direction inline</a></li>
<li><a href="https://www.w3.org/International/techniques/authoring-html#mirrored">Handling parentheses and other mirrored characters</a></li>
<li><a href="https://www.w3.org/International/techniques/authoring-html#bdo">Overriding the Unicode bidirectional algorithm</a></li>
</ul>
</li>
<li>
<p>Related links, <cite>Authoring SVG</cite></p>
<ul>
<li>
<p><a href="https://www.w3.org/International/techniques/authoring-svg#direction">Specifying text direction</a></p>
</li>
</ul>
</li>
</ul>
</section>
<footer id="thefooter"></footer><script>document.getElementById('thefooter').innerHTML = g.bottomOfPage</script>
<script>completePage()</script>
</body>
</html>