Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
154 lines (145 sloc) 5.52 KB
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
<!--
Copyright © 1991-2013 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
<version number="$Revision$"/>
<transforms>
<transform source="Arab" target="Latn" direction="both" alias="Arabic-Latin und-Latn-t-und-arab" backwardAlias="Latin-Arabic und-Arab-t-und-latn">
<tRule><![CDATA[
# Generally follows UNGEGN
# http://www.eki.ee/wgrs/rom1_ar.pdf
# Occasionally deviates in the direction of ISO 233
# http://homepage.mac.com/sirbinks/pdf/Arabic.pdf
# a) where required for disambiguation.
# b) with underdot instead of cedilla for letter like SAD,
# since those are explicitly in Unicode for transliteration.
# c) with extra non-Arabic-language letters, like PEH
#
# Does *not* do assimilation of "al", nor hyphenation.
# While it could be done, we need to determine whether a prefix "al" could
# occur other than as the definite article (since no space is used).
:: [[:Arabic:][:block=ARABIC:][‎ⁿ،؛؟ـً-ٕ٠-٬۰-۹﷼ښ]] ;
:: NFKD (NFC);
$disambig = ̱ ;
$disambig2 = ̰ ;
$under = ̣ ;
$descender = ˌ;
$notAbove = [[:^ccc=0:] & [:^ccc=230:]];
# non-letters
[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR
[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR
٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR
٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR
# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate
، ↔ ',' ; # ARABIC COMMA
؛ ↔ ';' ; # ARABIC SEMICOLON
؟ ↔ '?' ; # ARABIC QUESTION MARK
٪ ↔ '%' ; # ARABIC PERCENT SIGN
۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO
۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE
۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO
۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE
۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR
۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE
۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX
۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE
٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO
١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE
٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO
٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE
٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR
٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE
٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX
٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN
٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT
٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE
# letters
# long vowels
َا↔ ā ; # ARABIC FATHA, ARABIC LETTER ALEF
ُو ↔ ū ; # ARABIC DAMMA, ARABIC LETTER WAW
ِي ↔ ī ; # ARABIC KASRA, ARABIC LETTER YEH
# longer items moved here to prevent masking
ث ↔ t h $disambig ; # ARABIC LETTER THEH
ذ ↔ d h $disambig ; # ARABIC LETTER THAL
ش ↔ s h $disambig ; # ARABIC LETTER SHEEN
ص ↔ s $under ; # ARABIC LETTER SAD
ض ↔ d $under ; # ARABIC LETTER DAD
ط ↔ t $under ; # ARABIC LETTER TAH
ظ ↔ z $under ; # ARABIC LETTER ZAH
غ ↔ g h $disambig ; # ARABIC LETTER GHAIN
# WARNING: special case
# ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→
# so on the return, we have to skip over (but preserve) the half-ring below (or others like it)
# ةٕ ← ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS
ة ↔ t ̈ ; # ARABIC LETTER TEH MARBUTA
ة | $1 ← t ($notAbove+) ̈ ; # ARABIC LETTER TEH MARBUTA
# non-Arabic language
ژ ↔ z h $disambig ; # ARABIC LETTER JEH
ڭ ↔ n $disambig g ; # ARABIC LETTER NG
ۋ ↔ v $disambig ; # ARABIC LETTER VE
ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH
ښ ↔ s $descender;
# Arabic language
ء ↔ ʾ ; # ARABIC LETTER HAMZA
ا ↔ a $under; # ARABIC LETTER ALEF
ب ↔ b ; # ARABIC LETTER BEH
ت ↔ t ; # ARABIC LETTER TEH
ج ↔ j ; # ARABIC LETTER JEEM
ح ↔ h $under ; # ARABIC LETTER HAH
خ ↔ k h $disambig ; # ARABIC LETTER KHAH
د ↔ d ; # ARABIC LETTER DAL
ر ↔ r ; # ARABIC LETTER REH
ز ↔ z ; # ARABIC LETTER ZAIN
س ↔ s ; # ARABIC LETTER SEEN
ع ↔ ʿ ; # ARABIC LETTER AIN
ـ → ; # ARABIC TATWEEL
ف ↔ f ; # ARABIC LETTER FEH
ق ↔ q ; # ARABIC LETTER QAF
ک ↔ k $disambig ; # ARABIC LETTER KEHEH
ك ↔ k ; # ARABIC LETTER KAF
ل ↔ l ; # ARABIC LETTER LAM
م ↔ m ; # ARABIC LETTER MEEM
ن ↔ n ; # ARABIC LETTER NOON
ه ↔ h ; # ARABIC LETTER HEH
و ↔ w ; # ARABIC LETTER WAW
ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA
ي ↔ y ; # ARABIC LETTER YEH
ً ↔ aⁿ ; # ARABIC FATHATAN
ٌ ↔ uⁿ ; # ARABIC DAMMATAN
ٍ ↔ iⁿ ; # ARABIC KASRATAN
َ ↔ a ; # ARABIC FATHA
ُ ↔ u ; # ARABIC DAMMA
ِ ↔ i ; # ARABIC KASRA
ّ ↔ ̃ ; # ARABIC SHADDA
ْ ↔ ̊ ; # ARABIC SUKUN
# special combining marks
ٓ ↔ ̂ ; # ARABIC MADDAH ABOVE
ٔ ↔ ̉ ; # ARABIC HAMZA ABOVE
ٕ ↔ ̹ ; # ARABIC HAMZA BELOW
# Some non-Arabic language (not in UNGEGN)
پ ↔ p ; # ARABIC LETTER PEH
چ ↔ c h $disambig ; # ARABIC LETTER TCHEH
ڤ ↔ v ; # ARABIC LETTER VEH
# ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW
# ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW
گ ↔ g ; # ARABIC LETTER GAF
# fallbacks
| s ← c } [eiy];
| k ← c ;
| i ← e ;
| u ← o ;
| ks ← x ;
| n ← ‎ⁿ;
:: (lower) ;
::NFC (NFD);
:: ( [[:Latin:] [%,.0-9;?ʾ-ʿ̂-̄̈-̣̰̊-̱̹;ˌ]] );
]]></tRule>
</transform>
</transforms>
</supplementalData>
You can’t perform that action at this time.