diff --git a/src/Markdig.Tests/TestHtmlHelper.cs b/src/Markdig.Tests/TestHtmlHelper.cs index bed65281e..d6e788edd 100644 --- a/src/Markdig.Tests/TestHtmlHelper.cs +++ b/src/Markdig.Tests/TestHtmlHelper.cs @@ -14,7 +14,7 @@ public void TestParseHtmlTagSimple() { var inputTag = ""; var text = new StringSlice(inputTag); - Assert.True(HtmlHelper.TryParseHtmlTag(text, out string outputTag)); + Assert.True(HtmlHelper.TryParseHtmlTag(ref text, out string outputTag)); Assert.AreEqual(inputTag, outputTag); } @@ -23,7 +23,7 @@ public void TestParseHtmlTagSimpleWithAttribute() { var inputTag = ""; var text = new StringSlice(inputTag); - Assert.True(HtmlHelper.TryParseHtmlTag(text, out string outputTag)); + Assert.True(HtmlHelper.TryParseHtmlTag(ref text, out string outputTag)); Assert.AreEqual(inputTag, outputTag); } } diff --git a/src/Markdig.Tests/TestStringSliceList.cs b/src/Markdig.Tests/TestStringSliceList.cs index cc833ab27..0279d58fa 100644 --- a/src/Markdig.Tests/TestStringSliceList.cs +++ b/src/Markdig.Tests/TestStringSliceList.cs @@ -153,5 +153,33 @@ public void TestStringLineGroupIteratorPeekChar() Assert.Throws(() => iterator.PeekChar(-1)); } + + [Test] + public void TestIteratorSkipChar() + { + var lineGroup = new StringLineGroup(4) + { + new StringSlice("ABC", NewLine.LineFeed), + new StringSlice("E", NewLine.LineFeed) + }; + + Test(lineGroup.ToCharIterator()); + + Test(new StringSlice("ABC\nE\n")); + + Test(new StringSlice("Foo\nABC\nE\n", 4, 9)); + + static void Test(T iterator) where T : ICharIterator + { + Assert.AreEqual('A', iterator.CurrentChar); iterator.SkipChar(); + Assert.AreEqual('B', iterator.CurrentChar); iterator.SkipChar(); + Assert.AreEqual('C', iterator.CurrentChar); iterator.SkipChar(); + Assert.AreEqual('\n', iterator.CurrentChar); iterator.SkipChar(); + Assert.AreEqual('E', iterator.CurrentChar); iterator.SkipChar(); + Assert.AreEqual('\n', iterator.CurrentChar); iterator.SkipChar(); + Assert.AreEqual('\0', iterator.CurrentChar); iterator.SkipChar(); + Assert.AreEqual('\0', iterator.CurrentChar); iterator.SkipChar(); + } + } } } \ No newline at end of file diff --git a/src/Markdig/Extensions/Abbreviations/AbbreviationParser.cs b/src/Markdig/Extensions/Abbreviations/AbbreviationParser.cs index eef41768b..56cd07be3 100644 --- a/src/Markdig/Extensions/Abbreviations/AbbreviationParser.cs +++ b/src/Markdig/Extensions/Abbreviations/AbbreviationParser.cs @@ -51,7 +51,7 @@ public override BlockState TryOpen(BlockProcessor processor) { return BlockState.None; } - slice.NextChar(); + slice.SkipChar(); slice.Trim(); diff --git a/src/Markdig/Extensions/GenericAttributes/GenericAttributesParser.cs b/src/Markdig/Extensions/GenericAttributes/GenericAttributesParser.cs index 90020ff95..f5c2cad51 100644 --- a/src/Markdig/Extensions/GenericAttributes/GenericAttributesParser.cs +++ b/src/Markdig/Extensions/GenericAttributes/GenericAttributesParser.cs @@ -109,7 +109,7 @@ public static bool TryParse(ref StringSlice slice, [NotNullWhen(true)] out HtmlA if (c == '}') { isValid = true; - line.NextChar(); // skip } + line.SkipChar(); // skip } break; } @@ -191,7 +191,7 @@ public static bool TryParse(ref StringSlice slice, [NotNullWhen(true)] out HtmlA } // Go to next char, skip any spaces - line.NextChar(); + line.SkipChar(); line.TrimStart(); int startValue = -1; diff --git a/src/Markdig/Extensions/Globalization/GlobalizationExtension.cs b/src/Markdig/Extensions/Globalization/GlobalizationExtension.cs index 880ebc944..328f7ed03 100644 --- a/src/Markdig/Extensions/Globalization/GlobalizationExtension.cs +++ b/src/Markdig/Extensions/Globalization/GlobalizationExtension.cs @@ -10,6 +10,7 @@ using Markdig.Syntax; using Markdig.Syntax.Inlines; using System.Collections.Generic; +using System.Diagnostics; namespace Markdig.Extensions.Globalization { @@ -51,7 +52,7 @@ public void Setup(MarkdownPipeline pipeline, IMarkdownRenderer renderer) } - private bool ShouldBeRightToLeft(MarkdownObject item) + private static bool ShouldBeRightToLeft(MarkdownObject item) { if (item is IEnumerable container) { @@ -88,14 +89,30 @@ private bool ShouldBeRightToLeft(MarkdownObject item) return false; } - private bool StartsWithRtlCharacter(StringSlice slice) + private static bool StartsWithRtlCharacter(StringSlice slice) { - foreach (var c in CharHelper.ToUtf32(slice)) + for (int i = slice.Start; i <= slice.End; i++) { - if (CharHelper.IsRightToLeft(c)) + if (slice[i] < 128) + { + continue; + } + + int rune; + if (CharHelper.IsHighSurrogate(slice[i]) && i < slice.End && CharHelper.IsLowSurrogate(slice[i + 1])) + { + Debug.Assert(char.IsSurrogatePair(slice[i], slice[i + 1])); + rune = char.ConvertToUtf32(slice[i], slice[i + 1]); + } + else + { + rune = slice[i]; + } + + if (CharHelper.IsRightToLeft(rune)) return true; - else if (CharHelper.IsLeftToRight(c)) + if (CharHelper.IsLeftToRight(rune)) return false; } diff --git a/src/Markdig/Extensions/SmartyPants/SmartyPantsInlineParser.cs b/src/Markdig/Extensions/SmartyPants/SmartyPantsInlineParser.cs index e9b1265b2..3aa732966 100644 --- a/src/Markdig/Extensions/SmartyPants/SmartyPantsInlineParser.cs +++ b/src/Markdig/Extensions/SmartyPants/SmartyPantsInlineParser.cs @@ -52,7 +52,7 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice) type = SmartyPantType.Quote; // We will resolve them at the end of parsing all inlines if (slice.PeekChar() == '\'') { - slice.NextChar(); + slice.SkipChar(); type = SmartyPantType.DoubleQuote; // We will resolve them at the end of parsing all inlines } break; diff --git a/src/Markdig/Extensions/Tables/GridTableParser.cs b/src/Markdig/Extensions/Tables/GridTableParser.cs index 44417f23d..49be8cae1 100644 --- a/src/Markdig/Extensions/Tables/GridTableParser.cs +++ b/src/Markdig/Extensions/Tables/GridTableParser.cs @@ -33,7 +33,7 @@ public override BlockState TryOpen(BlockProcessor processor) while (c == '+') { var columnStart = line.Start; - line.NextChar(); + line.SkipChar(); line.TrimStart(); // if we have reached the end of the line, exit @@ -161,15 +161,16 @@ private static void SetRowSpanState(List columns, St private static bool IsRowSeperator(StringSlice slice) { - while (slice.Length > 0) + char c = slice.CurrentChar; + do { - if (slice.CurrentChar != '-' && slice.CurrentChar != '=' && slice.CurrentChar != ':') + if (c != '-' && c != '=' && c != ':') { - return false; + return c == '\0'; } - slice.NextChar(); + c = slice.NextChar(); } - return true; + while (true); } private static void TerminateCurrentRow(BlockProcessor processor, GridTableState tableState, Table gridTable, bool isLastRow) diff --git a/src/Markdig/Extensions/Tables/PipeTableParser.cs b/src/Markdig/Extensions/Tables/PipeTableParser.cs index 94a0eca1a..205283406 100644 --- a/src/Markdig/Extensions/Tables/PipeTableParser.cs +++ b/src/Markdig/Extensions/Tables/PipeTableParser.cs @@ -112,7 +112,7 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice) } tableState.LineHasPipe = true; tableState.LineIndex = localLineIndex; - slice.NextChar(); // Skip the `|` character + slice.SkipChar(); // Skip the `|` character tableState.ColumnAndLineDelimiters.Add(processor.Inline); } diff --git a/src/Markdig/Extensions/Tables/TableHelper.cs b/src/Markdig/Extensions/Tables/TableHelper.cs index aff49cce0..3526e1e2d 100644 --- a/src/Markdig/Extensions/Tables/TableHelper.cs +++ b/src/Markdig/Extensions/Tables/TableHelper.cs @@ -60,7 +60,7 @@ public static bool ParseColumnHeaderDetect(ref StringSlice slice, ref char delim if (c == ':') { hasLeft = true; - slice.NextChar(); + slice.SkipChar(); } slice.TrimStart(); @@ -91,7 +91,7 @@ public static bool ParseColumnHeaderDetect(ref StringSlice slice, ref char delim if (c == ':') { hasRight = true; - slice.NextChar(); + slice.SkipChar(); } slice.TrimStart(); diff --git a/src/Markdig/Extensions/TaskLists/TaskListInlineParser.cs b/src/Markdig/Extensions/TaskLists/TaskListInlineParser.cs index 5514e4912..faf7abdf0 100644 --- a/src/Markdig/Extensions/TaskLists/TaskListInlineParser.cs +++ b/src/Markdig/Extensions/TaskLists/TaskListInlineParser.cs @@ -56,7 +56,7 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice) return false; } // Skip last ] - slice.NextChar(); + slice.SkipChar(); // Create the TaskList var taskItem = new TaskList() diff --git a/src/Markdig/Helpers/CharHelper.cs b/src/Markdig/Helpers/CharHelper.cs index c23293930..98b41a362 100644 --- a/src/Markdig/Helpers/CharHelper.cs +++ b/src/Markdig/Helpers/CharHelper.cs @@ -355,22 +355,6 @@ private static bool IsInInclusiveRange(char c, char min, char max) internal static bool IsInInclusiveRange(int value, uint min, uint max) => ((uint)value - min) <= (max - min); - public static IEnumerable ToUtf32(StringSlice text) - { - for (int i = text.Start; i <= text.End; i++) - { - if (IsHighSurrogate(text[i]) && i < text.End && IsLowSurrogate(text[i + 1])) - { - Debug.Assert(char.IsSurrogatePair(text[i], text[i + 1])); - yield return char.ConvertToUtf32(text[i], text[i + 1]); - } - else - { - yield return text[i]; - } - } - } - public static bool IsRightToLeft(int c) { // Generated from Table D.1 of RFC3454 diff --git a/src/Markdig/Helpers/CharNormalizer.cs b/src/Markdig/Helpers/CharNormalizer.cs index 02ee8842d..82ab74652 100644 --- a/src/Markdig/Helpers/CharNormalizer.cs +++ b/src/Markdig/Helpers/CharNormalizer.cs @@ -11,8 +11,6 @@ namespace Markdig.Helpers /// public static class CharNormalizer { - private static readonly Dictionary CodeToAscii; - /// /// Converts a unicode char to a simple ASCII string. /// @@ -20,1284 +18,1281 @@ public static class CharNormalizer /// The simple ASCII string or null if the char itself cannot be simplified public static string? ConvertToAscii(char c) { - return CodeToAscii.TryGetValue(c, out string? str) ? str : null; + return c >= 160 && CodeToAscii.TryGetValue(c, out string? str) ? str : null; } - static CharNormalizer() + // This table was generated by the app UnicodeNormDApp + private static readonly Dictionary CodeToAscii = new(1269) { - // This table was generated by the app UnicodeNormDApp - CodeToAscii = new Dictionary(1269) - { - {'Ḋ', "D"}, - {'Ḍ', "D"}, - {'È', "E"}, - {'Ē', "E"}, - {'Ḕ', "E"}, - {'ª', "a"}, - {'²', "2"}, - {'³', "3"}, - {'¹', "1"}, - {'º', "o"}, - {'¼', "14"}, - {'½', "12"}, - {'¾', "34"}, - {'À', "A"}, - {'Á', "A"}, - {'Â', "A"}, - {'Ã', "A"}, - {'Ä', "A"}, - {'Å', "A"}, - {'Ç', "C"}, - {'É', "E"}, - {'Ê', "E"}, - {'Ë', "E"}, - {'Ì', "I"}, - {'Í', "I"}, - {'Î', "I"}, - {'Ï', "I"}, - {'Ñ', "N"}, - {'Ò', "O"}, - {'Ó', "O"}, - {'Ô', "O"}, - {'Õ', "O"}, - {'Ö', "O"}, - {'Ù', "U"}, - {'Ú', "U"}, - {'Û', "U"}, - {'Ü', "U"}, - {'Ý', "Y"}, - {'à', "a"}, - {'á', "a"}, - {'â', "a"}, - {'ã', "a"}, - {'ä', "a"}, - {'å', "a"}, - {'ç', "c"}, - {'è', "e"}, - {'é', "e"}, - {'ê', "e"}, - {'ë', "e"}, - {'ì', "i"}, - {'í', "i"}, - {'î', "i"}, - {'ï', "i"}, - {'ñ', "n"}, - {'ò', "o"}, - {'ó', "o"}, - {'ô', "o"}, - {'õ', "o"}, - {'ö', "o"}, - {'ù', "u"}, - {'ú', "u"}, - {'û', "u"}, - {'ü', "u"}, - {'ý', "y"}, - {'ÿ', "y"}, - {'Ā', "A"}, - {'ā', "a"}, - {'Ă', "A"}, - {'ă', "a"}, - {'Ą', "A"}, - {'ą', "a"}, - {'Ć', "C"}, - {'ć', "c"}, - {'Ĉ', "C"}, - {'ĉ', "c"}, - {'Ċ', "C"}, - {'ċ', "c"}, - {'Č', "C"}, - {'č', "c"}, - {'Ď', "D"}, - {'ď', "d"}, - {'ē', "e"}, - {'Ĕ', "E"}, - {'ĕ', "e"}, - {'Ė', "E"}, - {'ė', "e"}, - {'Ę', "E"}, - {'ę', "e"}, - {'Ě', "E"}, - {'ě', "e"}, - {'Ĝ', "G"}, - {'ĝ', "g"}, - {'Ğ', "G"}, - {'ğ', "g"}, - {'Ġ', "G"}, - {'ġ', "g"}, - {'Ģ', "G"}, - {'ģ', "g"}, - {'Ĥ', "H"}, - {'ĥ', "h"}, - {'Ĩ', "I"}, - {'ĩ', "i"}, - {'Ī', "I"}, - {'ī', "i"}, - {'Ĭ', "I"}, - {'ĭ', "i"}, - {'Į', "I"}, - {'į', "i"}, - {'İ', "I"}, - {'IJ', "IJ"}, - {'ij', "ij"}, - {'Ĵ', "J"}, - {'ĵ', "j"}, - {'Ķ', "K"}, - {'ķ', "k"}, - {'Ĺ', "L"}, - {'ĺ', "l"}, - {'Ļ', "L"}, - {'ļ', "l"}, - {'Ľ', "L"}, - {'ľ', "l"}, - {'Ŀ', "L"}, - {'ŀ', "l"}, - {'Ń', "N"}, - {'ń', "n"}, - {'Ņ', "N"}, - {'ņ', "n"}, - {'Ň', "N"}, - {'ň', "n"}, - {'ʼn', "n"}, - {'Ō', "O"}, - {'ō', "o"}, - {'Ŏ', "O"}, - {'ŏ', "o"}, - {'Ő', "O"}, - {'ő', "o"}, - {'Ŕ', "R"}, - {'ŕ', "r"}, - {'Ŗ', "R"}, - {'ŗ', "r"}, - {'Ř', "R"}, - {'ř', "r"}, - {'Ś', "S"}, - {'ś', "s"}, - {'Ŝ', "S"}, - {'ŝ', "s"}, - {'Ş', "S"}, - {'ş', "s"}, - {'Š', "S"}, - {'š', "s"}, - {'Ţ', "T"}, - {'ţ', "t"}, - {'Ť', "T"}, - {'ť', "t"}, - {'Ũ', "U"}, - {'ũ', "u"}, - {'Ū', "U"}, - {'ū', "u"}, - {'Ŭ', "U"}, - {'ŭ', "u"}, - {'Ů', "U"}, - {'ů', "u"}, - {'Ű', "U"}, - {'ű', "u"}, - {'Ų', "U"}, - {'ų', "u"}, - {'Ŵ', "W"}, - {'ŵ', "w"}, - {'Ŷ', "Y"}, - {'ŷ', "y"}, - {'Ÿ', "Y"}, - {'Ź', "Z"}, - {'ź', "z"}, - {'Ż', "Z"}, - {'ż', "z"}, - {'Ž', "Z"}, - {'ž', "z"}, - {'ſ', "s"}, - {'Ơ', "O"}, - {'ơ', "o"}, - {'Ư', "U"}, - {'ư', "u"}, - {'DŽ', "DZ"}, - {'Dž', "Dz"}, - {'dž', "dz"}, - {'LJ', "LJ"}, - {'Lj', "Lj"}, - {'lj', "lj"}, - {'NJ', "NJ"}, - {'Nj', "Nj"}, - {'nj', "nj"}, - {'Ǎ', "A"}, - {'ǎ', "a"}, - {'Ǐ', "I"}, - {'ǐ', "i"}, - {'Ǒ', "O"}, - {'ǒ', "o"}, - {'Ǔ', "U"}, - {'ǔ', "u"}, - {'Ǖ', "U"}, - {'ǖ', "u"}, - {'Ǘ', "U"}, - {'ǘ', "u"}, - {'Ǚ', "U"}, - {'ǚ', "u"}, - {'Ǜ', "U"}, - {'ǜ', "u"}, - {'Ǟ', "A"}, - {'ǟ', "a"}, - {'Ǡ', "A"}, - {'ǡ', "a"}, - {'Ǧ', "G"}, - {'ǧ', "g"}, - {'Ǩ', "K"}, - {'ǩ', "k"}, - {'Ǫ', "O"}, - {'ǫ', "o"}, - {'Ǭ', "O"}, - {'ǭ', "o"}, - {'ǰ', "j"}, - {'DZ', "DZ"}, - {'Dz', "Dz"}, - {'dz', "dz"}, - {'Ǵ', "G"}, - {'ǵ', "g"}, - {'Ǹ', "N"}, - {'ǹ', "n"}, - {'Ǻ', "A"}, - {'ǻ', "a"}, - {'Ȁ', "A"}, - {'ȁ', "a"}, - {'Ȃ', "A"}, - {'ȃ', "a"}, - {'Ȅ', "E"}, - {'ȅ', "e"}, - {'Ȇ', "E"}, - {'ȇ', "e"}, - {'Ȉ', "I"}, - {'ȉ', "i"}, - {'Ȋ', "I"}, - {'ȋ', "i"}, - {'Ȍ', "O"}, - {'ȍ', "o"}, - {'Ȏ', "O"}, - {'ȏ', "o"}, - {'Ȑ', "R"}, - {'ȑ', "r"}, - {'Ȓ', "R"}, - {'ȓ', "r"}, - {'Ȕ', "U"}, - {'ȕ', "u"}, - {'Ȗ', "U"}, - {'ȗ', "u"}, - {'Ș', "S"}, - {'ș', "s"}, - {'Ț', "T"}, - {'ț', "t"}, - {'Ȟ', "H"}, - {'ȟ', "h"}, - {'Ȧ', "A"}, - {'ȧ', "a"}, - {'Ȩ', "E"}, - {'ȩ', "e"}, - {'Ȫ', "O"}, - {'ȫ', "o"}, - {'Ȭ', "O"}, - {'ȭ', "o"}, - {'Ȯ', "O"}, - {'ȯ', "o"}, - {'Ȱ', "O"}, - {'ȱ', "o"}, - {'Ȳ', "Y"}, - {'ȳ', "y"}, - {'ʰ', "h"}, - {'ʲ', "j"}, - {'ʳ', "r"}, - {'ʷ', "w"}, - {'ʸ', "y"}, - {'ˡ', "l"}, - {'ˢ', "s"}, - {'ˣ', "x"}, - {';', ";"}, - {'ᴬ', "A"}, - {'ᴮ', "B"}, - {'ᴰ', "D"}, - {'ᴱ', "E"}, - {'ᴳ', "G"}, - {'ᴴ', "H"}, - {'ᴵ', "I"}, - {'ᴶ', "J"}, - {'ᴷ', "K"}, - {'ᴸ', "L"}, - {'ᴹ', "M"}, - {'ᴺ', "N"}, - {'ᴼ', "O"}, - {'ᴾ', "P"}, - {'ᴿ', "R"}, - {'ᵀ', "T"}, - {'ᵁ', "U"}, - {'ᵂ', "W"}, - {'ᵃ', "a"}, - {'ᵇ', "b"}, - {'ᵈ', "d"}, - {'ᵉ', "e"}, - {'ᵍ', "g"}, - {'ᵏ', "k"}, - {'ᵐ', "m"}, - {'ᵒ', "o"}, - {'ᵖ', "p"}, - {'ᵗ', "t"}, - {'ᵘ', "u"}, - {'ᵛ', "v"}, - {'ᵢ', "i"}, - {'ᵣ', "r"}, - {'ᵤ', "u"}, - {'ᵥ', "v"}, - {'ᶜ', "c"}, - {'ᶠ', "f"}, - {'ᶻ', "z"}, - {'Ḁ', "A"}, - {'ḁ', "a"}, - {'Ḃ', "B"}, - {'ḃ', "b"}, - {'Ḅ', "B"}, - {'ḅ', "b"}, - {'Ḇ', "B"}, - {'ḇ', "b"}, - {'Ḉ', "C"}, - {'ḉ', "c"}, - {'ḋ', "d"}, - {'ḍ', "d"}, - {'Ḏ', "D"}, - {'ḏ', "d"}, - {'Ḑ', "D"}, - {'ḑ', "d"}, - {'Ḓ', "D"}, - {'ḓ', "d"}, - {'ḕ', "e"}, - {'Ḗ', "E"}, - {'ḗ', "e"}, - {'Ḙ', "E"}, - {'ḙ', "e"}, - {'Ḛ', "E"}, - {'ḛ', "e"}, - {'Ḝ', "E"}, - {'ḝ', "e"}, - {'Ḟ', "F"}, - {'ḟ', "f"}, - {'Ḡ', "G"}, - {'ḡ', "g"}, - {'Ḣ', "H"}, - {'ḣ', "h"}, - {'Ḥ', "H"}, - {'ḥ', "h"}, - {'Ḧ', "H"}, - {'ḧ', "h"}, - {'Ḩ', "H"}, - {'ḩ', "h"}, - {'Ḫ', "H"}, - {'ḫ', "h"}, - {'Ḭ', "I"}, - {'ḭ', "i"}, - {'Ḯ', "I"}, - {'ḯ', "i"}, - {'Ḱ', "K"}, - {'ḱ', "k"}, - {'Ḳ', "K"}, - {'ḳ', "k"}, - {'Ḵ', "K"}, - {'ḵ', "k"}, - {'Ḷ', "L"}, - {'ḷ', "l"}, - {'Ḹ', "L"}, - {'ḹ', "l"}, - {'Ḻ', "L"}, - {'ḻ', "l"}, - {'Ḽ', "L"}, - {'ḽ', "l"}, - {'Ḿ', "M"}, - {'ḿ', "m"}, - {'Ṁ', "M"}, - {'ṁ', "m"}, - {'Ṃ', "M"}, - {'ṃ', "m"}, - {'Ṅ', "N"}, - {'ṅ', "n"}, - {'Ṇ', "N"}, - {'ṇ', "n"}, - {'Ṉ', "N"}, - {'ṉ', "n"}, - {'Ṋ', "N"}, - {'ṋ', "n"}, - {'Ṍ', "O"}, - {'ṍ', "o"}, - {'Ṏ', "O"}, - {'ṏ', "o"}, - {'Ṑ', "O"}, - {'ṑ', "o"}, - {'Ṓ', "O"}, - {'ṓ', "o"}, - {'Ṕ', "P"}, - {'ṕ', "p"}, - {'Ṗ', "P"}, - {'ṗ', "p"}, - {'Ṙ', "R"}, - {'ṙ', "r"}, - {'Ṛ', "R"}, - {'ṛ', "r"}, - {'Ṝ', "R"}, - {'ṝ', "r"}, - {'Ṟ', "R"}, - {'ṟ', "r"}, - {'Ṡ', "S"}, - {'ṡ', "s"}, - {'Ṣ', "S"}, - {'ṣ', "s"}, - {'Ṥ', "S"}, - {'ṥ', "s"}, - {'Ṧ', "S"}, - {'ṧ', "s"}, - {'Ṩ', "S"}, - {'ṩ', "s"}, - {'Ṫ', "T"}, - {'ṫ', "t"}, - {'Ṭ', "T"}, - {'ṭ', "t"}, - {'Ṯ', "T"}, - {'ṯ', "t"}, - {'Ṱ', "T"}, - {'ṱ', "t"}, - {'Ṳ', "U"}, - {'ṳ', "u"}, - {'Ṵ', "U"}, - {'ṵ', "u"}, - {'Ṷ', "U"}, - {'ṷ', "u"}, - {'Ṹ', "U"}, - {'ṹ', "u"}, - {'Ṻ', "U"}, - {'ṻ', "u"}, - {'Ṽ', "V"}, - {'ṽ', "v"}, - {'Ṿ', "V"}, - {'ṿ', "v"}, - {'Ẁ', "W"}, - {'ẁ', "w"}, - {'Ẃ', "W"}, - {'ẃ', "w"}, - {'Ẅ', "W"}, - {'ẅ', "w"}, - {'Ẇ', "W"}, - {'ẇ', "w"}, - {'Ẉ', "W"}, - {'ẉ', "w"}, - {'Ẋ', "X"}, - {'ẋ', "x"}, - {'Ẍ', "X"}, - {'ẍ', "x"}, - {'Ẏ', "Y"}, - {'ẏ', "y"}, - {'Ẑ', "Z"}, - {'ẑ', "z"}, - {'Ẓ', "Z"}, - {'ẓ', "z"}, - {'Ẕ', "Z"}, - {'ẕ', "z"}, - {'ẖ', "h"}, - {'ẗ', "t"}, - {'ẘ', "w"}, - {'ẙ', "y"}, - {'ẚ', "a"}, - {'ẛ', "s"}, - {'Ạ', "A"}, - {'ạ', "a"}, - {'Ả', "A"}, - {'ả', "a"}, - {'Ấ', "A"}, - {'ấ', "a"}, - {'Ầ', "A"}, - {'ầ', "a"}, - {'Ẩ', "A"}, - {'ẩ', "a"}, - {'Ẫ', "A"}, - {'ẫ', "a"}, - {'Ậ', "A"}, - {'ậ', "a"}, - {'Ắ', "A"}, - {'ắ', "a"}, - {'Ằ', "A"}, - {'ằ', "a"}, - {'Ẳ', "A"}, - {'ẳ', "a"}, - {'Ẵ', "A"}, - {'ẵ', "a"}, - {'Ặ', "A"}, - {'ặ', "a"}, - {'Ẹ', "E"}, - {'ẹ', "e"}, - {'Ẻ', "E"}, - {'ẻ', "e"}, - {'Ẽ', "E"}, - {'ẽ', "e"}, - {'Ế', "E"}, - {'ế', "e"}, - {'Ề', "E"}, - {'ề', "e"}, - {'Ể', "E"}, - {'ể', "e"}, - {'Ễ', "E"}, - {'ễ', "e"}, - {'Ệ', "E"}, - {'ệ', "e"}, - {'Ỉ', "I"}, - {'ỉ', "i"}, - {'Ị', "I"}, - {'ị', "i"}, - {'Ọ', "O"}, - {'ọ', "o"}, - {'Ỏ', "O"}, - {'ỏ', "o"}, - {'Ố', "O"}, - {'ố', "o"}, - {'Ồ', "O"}, - {'ồ', "o"}, - {'Ổ', "O"}, - {'ổ', "o"}, - {'Ỗ', "O"}, - {'ỗ', "o"}, - {'Ộ', "O"}, - {'ộ', "o"}, - {'Ớ', "O"}, - {'ớ', "o"}, - {'Ờ', "O"}, - {'ờ', "o"}, - {'Ở', "O"}, - {'ở', "o"}, - {'Ỡ', "O"}, - {'ỡ', "o"}, - {'Ợ', "O"}, - {'ợ', "o"}, - {'Ụ', "U"}, - {'ụ', "u"}, - {'Ủ', "U"}, - {'ủ', "u"}, - {'Ứ', "U"}, - {'ứ', "u"}, - {'Ừ', "U"}, - {'ừ', "u"}, - {'Ử', "U"}, - {'ử', "u"}, - {'Ữ', "U"}, - {'ữ', "u"}, - {'Ự', "U"}, - {'ự', "u"}, - {'Ỳ', "Y"}, - {'ỳ', "y"}, - {'Ỵ', "Y"}, - {'ỵ', "y"}, - {'Ỷ', "Y"}, - {'ỷ', "y"}, - {'Ỹ', "Y"}, - {'ỹ', "y"}, - {'`', "`"}, - {'․', "."}, - {'‥', ".."}, - {'…', "..."}, - {'‼', "!!"}, - {'⁇', "??"}, - {'⁈', "?!"}, - {'⁉', "!?"}, - {'⁰', "0"}, - {'ⁱ', "i"}, - {'⁴', "4"}, - {'⁵', "5"}, - {'⁶', "6"}, - {'⁷', "7"}, - {'⁸', "8"}, - {'⁹', "9"}, - {'⁺', "+"}, - {'⁼', "="}, - {'⁽', "("}, - {'⁾', ")"}, - {'ⁿ', "n"}, - {'₀', "0"}, - {'₁', "1"}, - {'₂', "2"}, - {'₃', "3"}, - {'₄', "4"}, - {'₅', "5"}, - {'₆', "6"}, - {'₇', "7"}, - {'₈', "8"}, - {'₉', "9"}, - {'₊', "+"}, - {'₌', "="}, - {'₍', "("}, - {'₎', ")"}, - {'ₐ', "a"}, - {'ₑ', "e"}, - {'ₒ', "o"}, - {'ₓ', "x"}, - {'ₕ', "h"}, - {'ₖ', "k"}, - {'ₗ', "l"}, - {'ₘ', "m"}, - {'ₙ', "n"}, - {'ₚ', "p"}, - {'ₛ', "s"}, - {'ₜ', "t"}, - {'₨', "Rs"}, - {'℀', "a/c"}, - {'℁', "a/s"}, - {'ℂ', "C"}, - {'℃', "C"}, - {'℅', "c/o"}, - {'℆', "c/u"}, - {'℉', "F"}, - {'ℊ', "g"}, - {'ℋ', "H"}, - {'ℌ', "H"}, - {'ℍ', "H"}, - {'ℎ', "h"}, - {'ℐ', "I"}, - {'ℑ', "I"}, - {'ℒ', "L"}, - {'ℓ', "l"}, - {'ℕ', "N"}, - {'№', "No"}, - {'ℙ', "P"}, - {'ℚ', "Q"}, - {'ℛ', "R"}, - {'ℜ', "R"}, - {'ℝ', "R"}, - {'℠', "SM"}, - {'℡', "TEL"}, - {'™', "TM"}, - {'ℤ', "Z"}, - {'ℨ', "Z"}, - {'K', "K"}, - {'Å', "A"}, - {'ℬ', "B"}, - {'ℭ', "C"}, - {'ℯ', "e"}, - {'ℰ', "E"}, - {'ℱ', "F"}, - {'ℳ', "M"}, - {'ℴ', "o"}, - {'ℹ', "i"}, - {'℻', "FAX"}, - {'ⅅ', "D"}, - {'ⅆ', "d"}, - {'ⅇ', "e"}, - {'ⅈ', "i"}, - {'ⅉ', "j"}, - {'⅐', "17"}, - {'⅑', "19"}, - {'⅒', "110"}, - {'⅓', "13"}, - {'⅔', "23"}, - {'⅕', "15"}, - {'⅖', "25"}, - {'⅗', "35"}, - {'⅘', "45"}, - {'⅙', "16"}, - {'⅚', "56"}, - {'⅛', "18"}, - {'⅜', "38"}, - {'⅝', "58"}, - {'⅞', "78"}, - {'⅟', "1"}, - {'Ⅰ', "I"}, - {'Ⅱ', "II"}, - {'Ⅲ', "III"}, - {'Ⅳ', "IV"}, - {'Ⅴ', "V"}, - {'Ⅵ', "VI"}, - {'Ⅶ', "VII"}, - {'Ⅷ', "VIII"}, - {'Ⅸ', "IX"}, - {'Ⅹ', "X"}, - {'Ⅺ', "XI"}, - {'Ⅻ', "XII"}, - {'Ⅼ', "L"}, - {'Ⅽ', "C"}, - {'Ⅾ', "D"}, - {'Ⅿ', "M"}, - {'ⅰ', "i"}, - {'ⅱ', "ii"}, - {'ⅲ', "iii"}, - {'ⅳ', "iv"}, - {'ⅴ', "v"}, - {'ⅵ', "vi"}, - {'ⅶ', "vii"}, - {'ⅷ', "viii"}, - {'ⅸ', "ix"}, - {'ⅹ', "x"}, - {'ⅺ', "xi"}, - {'ⅻ', "xii"}, - {'ⅼ', "l"}, - {'ⅽ', "c"}, - {'ⅾ', "d"}, - {'ⅿ', "m"}, - {'↉', "03"}, - {'≠', "="}, - {'≮', "<"}, - {'≯', ">"}, - {'①', "1"}, - {'②', "2"}, - {'③', "3"}, - {'④', "4"}, - {'⑤', "5"}, - {'⑥', "6"}, - {'⑦', "7"}, - {'⑧', "8"}, - {'⑨', "9"}, - {'⑩', "10"}, - {'⑪', "11"}, - {'⑫', "12"}, - {'⑬', "13"}, - {'⑭', "14"}, - {'⑮', "15"}, - {'⑯', "16"}, - {'⑰', "17"}, - {'⑱', "18"}, - {'⑲', "19"}, - {'⑳', "20"}, - {'⑴', "(1)"}, - {'⑵', "(2)"}, - {'⑶', "(3)"}, - {'⑷', "(4)"}, - {'⑸', "(5)"}, - {'⑹', "(6)"}, - {'⑺', "(7)"}, - {'⑻', "(8)"}, - {'⑼', "(9)"}, - {'⑽', "(10)"}, - {'⑾', "(11)"}, - {'⑿', "(12)"}, - {'⒀', "(13)"}, - {'⒁', "(14)"}, - {'⒂', "(15)"}, - {'⒃', "(16)"}, - {'⒄', "(17)"}, - {'⒅', "(18)"}, - {'⒆', "(19)"}, - {'⒇', "(20)"}, - {'⒈', "1."}, - {'⒉', "2."}, - {'⒊', "3."}, - {'⒋', "4."}, - {'⒌', "5."}, - {'⒍', "6."}, - {'⒎', "7."}, - {'⒏', "8."}, - {'⒐', "9."}, - {'⒑', "10."}, - {'⒒', "11."}, - {'⒓', "12."}, - {'⒔', "13."}, - {'⒕', "14."}, - {'⒖', "15."}, - {'⒗', "16."}, - {'⒘', "17."}, - {'⒙', "18."}, - {'⒚', "19."}, - {'⒛', "20."}, - {'⒜', "(a)"}, - {'⒝', "(b)"}, - {'⒞', "(c)"}, - {'⒟', "(d)"}, - {'⒠', "(e)"}, - {'⒡', "(f)"}, - {'⒢', "(g)"}, - {'⒣', "(h)"}, - {'⒤', "(i)"}, - {'⒥', "(j)"}, - {'⒦', "(k)"}, - {'⒧', "(l)"}, - {'⒨', "(m)"}, - {'⒩', "(n)"}, - {'⒪', "(o)"}, - {'⒫', "(p)"}, - {'⒬', "(q)"}, - {'⒭', "(r)"}, - {'⒮', "(s)"}, - {'⒯', "(t)"}, - {'⒰', "(u)"}, - {'⒱', "(v)"}, - {'⒲', "(w)"}, - {'⒳', "(x)"}, - {'⒴', "(y)"}, - {'⒵', "(z)"}, - {'Ⓐ', "A"}, - {'Ⓑ', "B"}, - {'Ⓒ', "C"}, - {'Ⓓ', "D"}, - {'Ⓔ', "E"}, - {'Ⓕ', "F"}, - {'Ⓖ', "G"}, - {'Ⓗ', "H"}, - {'Ⓘ', "I"}, - {'Ⓙ', "J"}, - {'Ⓚ', "K"}, - {'Ⓛ', "L"}, - {'Ⓜ', "M"}, - {'Ⓝ', "N"}, - {'Ⓞ', "O"}, - {'Ⓟ', "P"}, - {'Ⓠ', "Q"}, - {'Ⓡ', "R"}, - {'Ⓢ', "S"}, - {'Ⓣ', "T"}, - {'Ⓤ', "U"}, - {'Ⓥ', "V"}, - {'Ⓦ', "W"}, - {'Ⓧ', "X"}, - {'Ⓨ', "Y"}, - {'Ⓩ', "Z"}, - {'ⓐ', "a"}, - {'ⓑ', "b"}, - {'ⓒ', "c"}, - {'ⓓ', "d"}, - {'ⓔ', "e"}, - {'ⓕ', "f"}, - {'ⓖ', "g"}, - {'ⓗ', "h"}, - {'ⓘ', "i"}, - {'ⓙ', "j"}, - {'ⓚ', "k"}, - {'ⓛ', "l"}, - {'ⓜ', "m"}, - {'ⓝ', "n"}, - {'ⓞ', "o"}, - {'ⓟ', "p"}, - {'ⓠ', "q"}, - {'ⓡ', "r"}, - {'ⓢ', "s"}, - {'ⓣ', "t"}, - {'ⓤ', "u"}, - {'ⓥ', "v"}, - {'ⓦ', "w"}, - {'ⓧ', "x"}, - {'ⓨ', "y"}, - {'ⓩ', "z"}, - {'⓪', "0"}, - {'⩴', "::="}, - {'⩵', "=="}, - {'⩶', "==="}, - {'ⱼ', "j"}, - {'ⱽ', "V"}, - {'㈀', "()"}, - {'㈁', "()"}, - {'㈂', "()"}, - {'㈃', "()"}, - {'㈄', "()"}, - {'㈅', "()"}, - {'㈆', "()"}, - {'㈇', "()"}, - {'㈈', "()"}, - {'㈉', "()"}, - {'㈊', "()"}, - {'㈋', "()"}, - {'㈌', "()"}, - {'㈍', "()"}, - {'㈎', "()"}, - {'㈏', "()"}, - {'㈐', "()"}, - {'㈑', "()"}, - {'㈒', "()"}, - {'㈓', "()"}, - {'㈔', "()"}, - {'㈕', "()"}, - {'㈖', "()"}, - {'㈗', "()"}, - {'㈘', "()"}, - {'㈙', "()"}, - {'㈚', "()"}, - {'㈛', "()"}, - {'㈜', "()"}, - {'㈝', "()"}, - {'㈞', "()"}, - {'㈠', "()"}, - {'㈡', "()"}, - {'㈢', "()"}, - {'㈣', "()"}, - {'㈤', "()"}, - {'㈥', "()"}, - {'㈦', "()"}, - {'㈧', "()"}, - {'㈨', "()"}, - {'㈩', "()"}, - {'㈪', "()"}, - {'㈫', "()"}, - {'㈬', "()"}, - {'㈭', "()"}, - {'㈮', "()"}, - {'㈯', "()"}, - {'㈰', "()"}, - {'㈱', "()"}, - {'㈲', "()"}, - {'㈳', "()"}, - {'㈴', "()"}, - {'㈵', "()"}, - {'㈶', "()"}, - {'㈷', "()"}, - {'㈸', "()"}, - {'㈹', "()"}, - {'㈺', "()"}, - {'㈻', "()"}, - {'㈼', "()"}, - {'㈽', "()"}, - {'㈾', "()"}, - {'㈿', "()"}, - {'㉀', "()"}, - {'㉁', "()"}, - {'㉂', "()"}, - {'㉃', "()"}, - {'㉐', "PTE"}, - {'㉑', "21"}, - {'㉒', "22"}, - {'㉓', "23"}, - {'㉔', "24"}, - {'㉕', "25"}, - {'㉖', "26"}, - {'㉗', "27"}, - {'㉘', "28"}, - {'㉙', "29"}, - {'㉚', "30"}, - {'㉛', "31"}, - {'㉜', "32"}, - {'㉝', "33"}, - {'㉞', "34"}, - {'㉟', "35"}, - {'㊱', "36"}, - {'㊲', "37"}, - {'㊳', "38"}, - {'㊴', "39"}, - {'㊵', "40"}, - {'㊶', "41"}, - {'㊷', "42"}, - {'㊸', "43"}, - {'㊹', "44"}, - {'㊺', "45"}, - {'㊻', "46"}, - {'㊼', "47"}, - {'㊽', "48"}, - {'㊾', "49"}, - {'㊿', "50"}, - {'㋀', "1"}, - {'㋁', "2"}, - {'㋂', "3"}, - {'㋃', "4"}, - {'㋄', "5"}, - {'㋅', "6"}, - {'㋆', "7"}, - {'㋇', "8"}, - {'㋈', "9"}, - {'㋉', "10"}, - {'㋊', "11"}, - {'㋋', "12"}, - {'㋌', "Hg"}, - {'㋍', "erg"}, - {'㋎', "eV"}, - {'㋏', "LTD"}, - {'㍘', "0"}, - {'㍙', "1"}, - {'㍚', "2"}, - {'㍛', "3"}, - {'㍜', "4"}, - {'㍝', "5"}, - {'㍞', "6"}, - {'㍟', "7"}, - {'㍠', "8"}, - {'㍡', "9"}, - {'㍢', "10"}, - {'㍣', "11"}, - {'㍤', "12"}, - {'㍥', "13"}, - {'㍦', "14"}, - {'㍧', "15"}, - {'㍨', "16"}, - {'㍩', "17"}, - {'㍪', "18"}, - {'㍫', "19"}, - {'㍬', "20"}, - {'㍭', "21"}, - {'㍮', "22"}, - {'㍯', "23"}, - {'㍰', "24"}, - {'㍱', "hPa"}, - {'㍲', "da"}, - {'㍳', "AU"}, - {'㍴', "bar"}, - {'㍵', "oV"}, - {'㍶', "pc"}, - {'㍷', "dm"}, - {'㍸', "dm2"}, - {'㍹', "dm3"}, - {'㍺', "IU"}, - {'㎀', "pA"}, - {'㎁', "nA"}, - {'㎂', "A"}, - {'㎃', "mA"}, - {'㎄', "kA"}, - {'㎅', "KB"}, - {'㎆', "MB"}, - {'㎇', "GB"}, - {'㎈', "cal"}, - {'㎉', "kcal"}, - {'㎊', "pF"}, - {'㎋', "nF"}, - {'㎌', "F"}, - {'㎍', "g"}, - {'㎎', "mg"}, - {'㎏', "kg"}, - {'㎐', "Hz"}, - {'㎑', "kHz"}, - {'㎒', "MHz"}, - {'㎓', "GHz"}, - {'㎔', "THz"}, - {'㎕', "l"}, - {'㎖', "ml"}, - {'㎗', "dl"}, - {'㎘', "kl"}, - {'㎙', "fm"}, - {'㎚', "nm"}, - {'㎛', "m"}, - {'㎜', "mm"}, - {'㎝', "cm"}, - {'㎞', "km"}, - {'㎟', "mm2"}, - {'㎠', "cm2"}, - {'㎡', "m2"}, - {'㎢', "km2"}, - {'㎣', "mm3"}, - {'㎤', "cm3"}, - {'㎥', "m3"}, - {'㎦', "km3"}, - {'㎧', "ms"}, - {'㎨', "ms2"}, - {'㎩', "Pa"}, - {'㎪', "kPa"}, - {'㎫', "MPa"}, - {'㎬', "GPa"}, - {'㎭', "rad"}, - {'㎮', "rads"}, - {'㎯', "rads2"}, - {'㎰', "ps"}, - {'㎱', "ns"}, - {'㎲', "s"}, - {'㎳', "ms"}, - {'㎴', "pV"}, - {'㎵', "nV"}, - {'㎶', "V"}, - {'㎷', "mV"}, - {'㎸', "kV"}, - {'㎹', "MV"}, - {'㎺', "pW"}, - {'㎻', "nW"}, - {'㎼', "W"}, - {'㎽', "mW"}, - {'㎾', "kW"}, - {'㎿', "MW"}, - {'㏀', "k"}, - {'㏁', "M"}, - {'㏂', "a.m."}, - {'㏃', "Bq"}, - {'㏄', "cc"}, - {'㏅', "cd"}, - {'㏆', "Ckg"}, - {'㏇', "Co."}, - {'㏈', "dB"}, - {'㏉', "Gy"}, - {'㏊', "ha"}, - {'㏋', "HP"}, - {'㏌', "in"}, - {'㏍', "KK"}, - {'㏎', "KM"}, - {'㏏', "kt"}, - {'㏐', "lm"}, - {'㏑', "ln"}, - {'㏒', "log"}, - {'㏓', "lx"}, - {'㏔', "mb"}, - {'㏕', "mil"}, - {'㏖', "mol"}, - {'㏗', "PH"}, - {'㏘', "p.m."}, - {'㏙', "PPM"}, - {'㏚', "PR"}, - {'㏛', "sr"}, - {'㏜', "Sv"}, - {'㏝', "Wb"}, - {'㏞', "Vm"}, - {'㏟', "Am"}, - {'㏠', "1"}, - {'㏡', "2"}, - {'㏢', "3"}, - {'㏣', "4"}, - {'㏤', "5"}, - {'㏥', "6"}, - {'㏦', "7"}, - {'㏧', "8"}, - {'㏨', "9"}, - {'㏩', "10"}, - {'㏪', "11"}, - {'㏫', "12"}, - {'㏬', "13"}, - {'㏭', "14"}, - {'㏮', "15"}, - {'㏯', "16"}, - {'㏰', "17"}, - {'㏱', "18"}, - {'㏲', "19"}, - {'㏳', "20"}, - {'㏴', "21"}, - {'㏵', "22"}, - {'㏶', "23"}, - {'㏷', "24"}, - {'㏸', "25"}, - {'㏹', "26"}, - {'㏺', "27"}, - {'㏻', "28"}, - {'㏼', "29"}, - {'㏽', "30"}, - {'㏾', "31"}, - {'㏿', "gal"}, - {'ff', "ff"}, - {'fi', "fi"}, - {'fl', "fl"}, - {'ffi', "ffi"}, - {'ffl', "ffl"}, - {'ſt', "st"}, - {'st', "st"}, - {'﬩', "+"}, - {'︐', ","}, - {'︓', ":"}, - {'︔', ";"}, - {'︕', "!"}, - {'︖', "?"}, - {'︙', "..."}, - {'︰', ".."}, - {'︳', "_"}, - {'︴', "_"}, - {'︵', "("}, - {'︶', ")"}, - {'︷', "{"}, - {'︸', "}"}, - {'﹇', "["}, - {'﹈', "]"}, - {'﹍', "_"}, - {'﹎', "_"}, - {'﹏', "_"}, - {'﹐', ","}, - {'﹒', "."}, - {'﹔', ";"}, - {'﹕', ":"}, - {'﹖', "?"}, - {'﹗', "!"}, - {'﹙', "("}, - {'﹚', ")"}, - {'﹛', "{"}, - {'﹜', "}"}, - {'﹟', "#"}, - {'﹠', "&"}, - {'﹡', "*"}, - {'﹢', "+"}, - {'﹣', "-"}, - {'﹤', "<"}, - {'﹥', ">"}, - {'﹦', "="}, - {'﹨', "\\"}, - {'﹩', "$"}, - {'﹪', "%"}, - {'﹫', "@"}, - {'!', "!"}, - {'"', "\""}, - {'#', "#"}, - {'$', "$"}, - {'%', "%"}, - {'&', "&"}, - {''', "'"}, - {'(', "("}, - {')', ")"}, - {'*', "*"}, - {'+', "+"}, - {',', ","}, - {'-', "-"}, - {'.', "."}, - {'/', "/"}, - {'0', "0"}, - {'1', "1"}, - {'2', "2"}, - {'3', "3"}, - {'4', "4"}, - {'5', "5"}, - {'6', "6"}, - {'7', "7"}, - {'8', "8"}, - {'9', "9"}, - {':', ":"}, - {';', ";"}, - {'<', "<"}, - {'=', "="}, - {'>', ">"}, - {'?', "?"}, - {'@', "@"}, - {'A', "A"}, - {'B', "B"}, - {'C', "C"}, - {'D', "D"}, - {'E', "E"}, - {'F', "F"}, - {'G', "G"}, - {'H', "H"}, - {'I', "I"}, - {'J', "J"}, - {'K', "K"}, - {'L', "L"}, - {'M', "M"}, - {'N', "N"}, - {'O', "O"}, - {'P', "P"}, - {'Q', "Q"}, - {'R', "R"}, - {'S', "S"}, - {'T', "T"}, - {'U', "U"}, - {'V', "V"}, - {'W', "W"}, - {'X', "X"}, - {'Y', "Y"}, - {'Z', "Z"}, - {'[', "["}, - {'\', "\\"}, - {']', "]"}, - {'^', "^"}, - {'_', "_"}, - {'`', "`"}, - {'a', "a"}, - {'b', "b"}, - {'c', "c"}, - {'d', "d"}, - {'e', "e"}, - {'f', "f"}, - {'g', "g"}, - {'h', "h"}, - {'i', "i"}, - {'j', "j"}, - {'k', "k"}, - {'l', "l"}, - {'m', "m"}, - {'n', "n"}, - {'o', "o"}, - {'p', "p"}, - {'q', "q"}, - {'r', "r"}, - {'s', "s"}, - {'t', "t"}, - {'u', "u"}, - {'v', "v"}, - {'w', "w"}, - {'x', "x"}, - {'y', "y"}, - {'z', "z"}, - {'{', "{"}, - {'|', "|"}, - {'}', "}"}, - {'~', "~"}, - }; - } + {'Ḋ', "D"}, + {'Ḍ', "D"}, + {'È', "E"}, + {'Ē', "E"}, + {'Ḕ', "E"}, + {'ª', "a"}, + {'²', "2"}, + {'³', "3"}, + {'¹', "1"}, + {'º', "o"}, + {'¼', "14"}, + {'½', "12"}, + {'¾', "34"}, + {'À', "A"}, + {'Á', "A"}, + {'Â', "A"}, + {'Ã', "A"}, + {'Ä', "A"}, + {'Å', "A"}, + {'Ç', "C"}, + {'É', "E"}, + {'Ê', "E"}, + {'Ë', "E"}, + {'Ì', "I"}, + {'Í', "I"}, + {'Î', "I"}, + {'Ï', "I"}, + {'Ñ', "N"}, + {'Ò', "O"}, + {'Ó', "O"}, + {'Ô', "O"}, + {'Õ', "O"}, + {'Ö', "O"}, + {'Ù', "U"}, + {'Ú', "U"}, + {'Û', "U"}, + {'Ü', "U"}, + {'Ý', "Y"}, + {'à', "a"}, + {'á', "a"}, + {'â', "a"}, + {'ã', "a"}, + {'ä', "a"}, + {'å', "a"}, + {'ç', "c"}, + {'è', "e"}, + {'é', "e"}, + {'ê', "e"}, + {'ë', "e"}, + {'ì', "i"}, + {'í', "i"}, + {'î', "i"}, + {'ï', "i"}, + {'ñ', "n"}, + {'ò', "o"}, + {'ó', "o"}, + {'ô', "o"}, + {'õ', "o"}, + {'ö', "o"}, + {'ù', "u"}, + {'ú', "u"}, + {'û', "u"}, + {'ü', "u"}, + {'ý', "y"}, + {'ÿ', "y"}, + {'Ā', "A"}, + {'ā', "a"}, + {'Ă', "A"}, + {'ă', "a"}, + {'Ą', "A"}, + {'ą', "a"}, + {'Ć', "C"}, + {'ć', "c"}, + {'Ĉ', "C"}, + {'ĉ', "c"}, + {'Ċ', "C"}, + {'ċ', "c"}, + {'Č', "C"}, + {'č', "c"}, + {'Ď', "D"}, + {'ď', "d"}, + {'ē', "e"}, + {'Ĕ', "E"}, + {'ĕ', "e"}, + {'Ė', "E"}, + {'ė', "e"}, + {'Ę', "E"}, + {'ę', "e"}, + {'Ě', "E"}, + {'ě', "e"}, + {'Ĝ', "G"}, + {'ĝ', "g"}, + {'Ğ', "G"}, + {'ğ', "g"}, + {'Ġ', "G"}, + {'ġ', "g"}, + {'Ģ', "G"}, + {'ģ', "g"}, + {'Ĥ', "H"}, + {'ĥ', "h"}, + {'Ĩ', "I"}, + {'ĩ', "i"}, + {'Ī', "I"}, + {'ī', "i"}, + {'Ĭ', "I"}, + {'ĭ', "i"}, + {'Į', "I"}, + {'į', "i"}, + {'İ', "I"}, + {'IJ', "IJ"}, + {'ij', "ij"}, + {'Ĵ', "J"}, + {'ĵ', "j"}, + {'Ķ', "K"}, + {'ķ', "k"}, + {'Ĺ', "L"}, + {'ĺ', "l"}, + {'Ļ', "L"}, + {'ļ', "l"}, + {'Ľ', "L"}, + {'ľ', "l"}, + {'Ŀ', "L"}, + {'ŀ', "l"}, + {'Ń', "N"}, + {'ń', "n"}, + {'Ņ', "N"}, + {'ņ', "n"}, + {'Ň', "N"}, + {'ň', "n"}, + {'ʼn', "n"}, + {'Ō', "O"}, + {'ō', "o"}, + {'Ŏ', "O"}, + {'ŏ', "o"}, + {'Ő', "O"}, + {'ő', "o"}, + {'Ŕ', "R"}, + {'ŕ', "r"}, + {'Ŗ', "R"}, + {'ŗ', "r"}, + {'Ř', "R"}, + {'ř', "r"}, + {'Ś', "S"}, + {'ś', "s"}, + {'Ŝ', "S"}, + {'ŝ', "s"}, + {'Ş', "S"}, + {'ş', "s"}, + {'Š', "S"}, + {'š', "s"}, + {'Ţ', "T"}, + {'ţ', "t"}, + {'Ť', "T"}, + {'ť', "t"}, + {'Ũ', "U"}, + {'ũ', "u"}, + {'Ū', "U"}, + {'ū', "u"}, + {'Ŭ', "U"}, + {'ŭ', "u"}, + {'Ů', "U"}, + {'ů', "u"}, + {'Ű', "U"}, + {'ű', "u"}, + {'Ų', "U"}, + {'ų', "u"}, + {'Ŵ', "W"}, + {'ŵ', "w"}, + {'Ŷ', "Y"}, + {'ŷ', "y"}, + {'Ÿ', "Y"}, + {'Ź', "Z"}, + {'ź', "z"}, + {'Ż', "Z"}, + {'ż', "z"}, + {'Ž', "Z"}, + {'ž', "z"}, + {'ſ', "s"}, + {'Ơ', "O"}, + {'ơ', "o"}, + {'Ư', "U"}, + {'ư', "u"}, + {'DŽ', "DZ"}, + {'Dž', "Dz"}, + {'dž', "dz"}, + {'LJ', "LJ"}, + {'Lj', "Lj"}, + {'lj', "lj"}, + {'NJ', "NJ"}, + {'Nj', "Nj"}, + {'nj', "nj"}, + {'Ǎ', "A"}, + {'ǎ', "a"}, + {'Ǐ', "I"}, + {'ǐ', "i"}, + {'Ǒ', "O"}, + {'ǒ', "o"}, + {'Ǔ', "U"}, + {'ǔ', "u"}, + {'Ǖ', "U"}, + {'ǖ', "u"}, + {'Ǘ', "U"}, + {'ǘ', "u"}, + {'Ǚ', "U"}, + {'ǚ', "u"}, + {'Ǜ', "U"}, + {'ǜ', "u"}, + {'Ǟ', "A"}, + {'ǟ', "a"}, + {'Ǡ', "A"}, + {'ǡ', "a"}, + {'Ǧ', "G"}, + {'ǧ', "g"}, + {'Ǩ', "K"}, + {'ǩ', "k"}, + {'Ǫ', "O"}, + {'ǫ', "o"}, + {'Ǭ', "O"}, + {'ǭ', "o"}, + {'ǰ', "j"}, + {'DZ', "DZ"}, + {'Dz', "Dz"}, + {'dz', "dz"}, + {'Ǵ', "G"}, + {'ǵ', "g"}, + {'Ǹ', "N"}, + {'ǹ', "n"}, + {'Ǻ', "A"}, + {'ǻ', "a"}, + {'Ȁ', "A"}, + {'ȁ', "a"}, + {'Ȃ', "A"}, + {'ȃ', "a"}, + {'Ȅ', "E"}, + {'ȅ', "e"}, + {'Ȇ', "E"}, + {'ȇ', "e"}, + {'Ȉ', "I"}, + {'ȉ', "i"}, + {'Ȋ', "I"}, + {'ȋ', "i"}, + {'Ȍ', "O"}, + {'ȍ', "o"}, + {'Ȏ', "O"}, + {'ȏ', "o"}, + {'Ȑ', "R"}, + {'ȑ', "r"}, + {'Ȓ', "R"}, + {'ȓ', "r"}, + {'Ȕ', "U"}, + {'ȕ', "u"}, + {'Ȗ', "U"}, + {'ȗ', "u"}, + {'Ș', "S"}, + {'ș', "s"}, + {'Ț', "T"}, + {'ț', "t"}, + {'Ȟ', "H"}, + {'ȟ', "h"}, + {'Ȧ', "A"}, + {'ȧ', "a"}, + {'Ȩ', "E"}, + {'ȩ', "e"}, + {'Ȫ', "O"}, + {'ȫ', "o"}, + {'Ȭ', "O"}, + {'ȭ', "o"}, + {'Ȯ', "O"}, + {'ȯ', "o"}, + {'Ȱ', "O"}, + {'ȱ', "o"}, + {'Ȳ', "Y"}, + {'ȳ', "y"}, + {'ʰ', "h"}, + {'ʲ', "j"}, + {'ʳ', "r"}, + {'ʷ', "w"}, + {'ʸ', "y"}, + {'ˡ', "l"}, + {'ˢ', "s"}, + {'ˣ', "x"}, + {';', ";"}, + {'ᴬ', "A"}, + {'ᴮ', "B"}, + {'ᴰ', "D"}, + {'ᴱ', "E"}, + {'ᴳ', "G"}, + {'ᴴ', "H"}, + {'ᴵ', "I"}, + {'ᴶ', "J"}, + {'ᴷ', "K"}, + {'ᴸ', "L"}, + {'ᴹ', "M"}, + {'ᴺ', "N"}, + {'ᴼ', "O"}, + {'ᴾ', "P"}, + {'ᴿ', "R"}, + {'ᵀ', "T"}, + {'ᵁ', "U"}, + {'ᵂ', "W"}, + {'ᵃ', "a"}, + {'ᵇ', "b"}, + {'ᵈ', "d"}, + {'ᵉ', "e"}, + {'ᵍ', "g"}, + {'ᵏ', "k"}, + {'ᵐ', "m"}, + {'ᵒ', "o"}, + {'ᵖ', "p"}, + {'ᵗ', "t"}, + {'ᵘ', "u"}, + {'ᵛ', "v"}, + {'ᵢ', "i"}, + {'ᵣ', "r"}, + {'ᵤ', "u"}, + {'ᵥ', "v"}, + {'ᶜ', "c"}, + {'ᶠ', "f"}, + {'ᶻ', "z"}, + {'Ḁ', "A"}, + {'ḁ', "a"}, + {'Ḃ', "B"}, + {'ḃ', "b"}, + {'Ḅ', "B"}, + {'ḅ', "b"}, + {'Ḇ', "B"}, + {'ḇ', "b"}, + {'Ḉ', "C"}, + {'ḉ', "c"}, + {'ḋ', "d"}, + {'ḍ', "d"}, + {'Ḏ', "D"}, + {'ḏ', "d"}, + {'Ḑ', "D"}, + {'ḑ', "d"}, + {'Ḓ', "D"}, + {'ḓ', "d"}, + {'ḕ', "e"}, + {'Ḗ', "E"}, + {'ḗ', "e"}, + {'Ḙ', "E"}, + {'ḙ', "e"}, + {'Ḛ', "E"}, + {'ḛ', "e"}, + {'Ḝ', "E"}, + {'ḝ', "e"}, + {'Ḟ', "F"}, + {'ḟ', "f"}, + {'Ḡ', "G"}, + {'ḡ', "g"}, + {'Ḣ', "H"}, + {'ḣ', "h"}, + {'Ḥ', "H"}, + {'ḥ', "h"}, + {'Ḧ', "H"}, + {'ḧ', "h"}, + {'Ḩ', "H"}, + {'ḩ', "h"}, + {'Ḫ', "H"}, + {'ḫ', "h"}, + {'Ḭ', "I"}, + {'ḭ', "i"}, + {'Ḯ', "I"}, + {'ḯ', "i"}, + {'Ḱ', "K"}, + {'ḱ', "k"}, + {'Ḳ', "K"}, + {'ḳ', "k"}, + {'Ḵ', "K"}, + {'ḵ', "k"}, + {'Ḷ', "L"}, + {'ḷ', "l"}, + {'Ḹ', "L"}, + {'ḹ', "l"}, + {'Ḻ', "L"}, + {'ḻ', "l"}, + {'Ḽ', "L"}, + {'ḽ', "l"}, + {'Ḿ', "M"}, + {'ḿ', "m"}, + {'Ṁ', "M"}, + {'ṁ', "m"}, + {'Ṃ', "M"}, + {'ṃ', "m"}, + {'Ṅ', "N"}, + {'ṅ', "n"}, + {'Ṇ', "N"}, + {'ṇ', "n"}, + {'Ṉ', "N"}, + {'ṉ', "n"}, + {'Ṋ', "N"}, + {'ṋ', "n"}, + {'Ṍ', "O"}, + {'ṍ', "o"}, + {'Ṏ', "O"}, + {'ṏ', "o"}, + {'Ṑ', "O"}, + {'ṑ', "o"}, + {'Ṓ', "O"}, + {'ṓ', "o"}, + {'Ṕ', "P"}, + {'ṕ', "p"}, + {'Ṗ', "P"}, + {'ṗ', "p"}, + {'Ṙ', "R"}, + {'ṙ', "r"}, + {'Ṛ', "R"}, + {'ṛ', "r"}, + {'Ṝ', "R"}, + {'ṝ', "r"}, + {'Ṟ', "R"}, + {'ṟ', "r"}, + {'Ṡ', "S"}, + {'ṡ', "s"}, + {'Ṣ', "S"}, + {'ṣ', "s"}, + {'Ṥ', "S"}, + {'ṥ', "s"}, + {'Ṧ', "S"}, + {'ṧ', "s"}, + {'Ṩ', "S"}, + {'ṩ', "s"}, + {'Ṫ', "T"}, + {'ṫ', "t"}, + {'Ṭ', "T"}, + {'ṭ', "t"}, + {'Ṯ', "T"}, + {'ṯ', "t"}, + {'Ṱ', "T"}, + {'ṱ', "t"}, + {'Ṳ', "U"}, + {'ṳ', "u"}, + {'Ṵ', "U"}, + {'ṵ', "u"}, + {'Ṷ', "U"}, + {'ṷ', "u"}, + {'Ṹ', "U"}, + {'ṹ', "u"}, + {'Ṻ', "U"}, + {'ṻ', "u"}, + {'Ṽ', "V"}, + {'ṽ', "v"}, + {'Ṿ', "V"}, + {'ṿ', "v"}, + {'Ẁ', "W"}, + {'ẁ', "w"}, + {'Ẃ', "W"}, + {'ẃ', "w"}, + {'Ẅ', "W"}, + {'ẅ', "w"}, + {'Ẇ', "W"}, + {'ẇ', "w"}, + {'Ẉ', "W"}, + {'ẉ', "w"}, + {'Ẋ', "X"}, + {'ẋ', "x"}, + {'Ẍ', "X"}, + {'ẍ', "x"}, + {'Ẏ', "Y"}, + {'ẏ', "y"}, + {'Ẑ', "Z"}, + {'ẑ', "z"}, + {'Ẓ', "Z"}, + {'ẓ', "z"}, + {'Ẕ', "Z"}, + {'ẕ', "z"}, + {'ẖ', "h"}, + {'ẗ', "t"}, + {'ẘ', "w"}, + {'ẙ', "y"}, + {'ẚ', "a"}, + {'ẛ', "s"}, + {'Ạ', "A"}, + {'ạ', "a"}, + {'Ả', "A"}, + {'ả', "a"}, + {'Ấ', "A"}, + {'ấ', "a"}, + {'Ầ', "A"}, + {'ầ', "a"}, + {'Ẩ', "A"}, + {'ẩ', "a"}, + {'Ẫ', "A"}, + {'ẫ', "a"}, + {'Ậ', "A"}, + {'ậ', "a"}, + {'Ắ', "A"}, + {'ắ', "a"}, + {'Ằ', "A"}, + {'ằ', "a"}, + {'Ẳ', "A"}, + {'ẳ', "a"}, + {'Ẵ', "A"}, + {'ẵ', "a"}, + {'Ặ', "A"}, + {'ặ', "a"}, + {'Ẹ', "E"}, + {'ẹ', "e"}, + {'Ẻ', "E"}, + {'ẻ', "e"}, + {'Ẽ', "E"}, + {'ẽ', "e"}, + {'Ế', "E"}, + {'ế', "e"}, + {'Ề', "E"}, + {'ề', "e"}, + {'Ể', "E"}, + {'ể', "e"}, + {'Ễ', "E"}, + {'ễ', "e"}, + {'Ệ', "E"}, + {'ệ', "e"}, + {'Ỉ', "I"}, + {'ỉ', "i"}, + {'Ị', "I"}, + {'ị', "i"}, + {'Ọ', "O"}, + {'ọ', "o"}, + {'Ỏ', "O"}, + {'ỏ', "o"}, + {'Ố', "O"}, + {'ố', "o"}, + {'Ồ', "O"}, + {'ồ', "o"}, + {'Ổ', "O"}, + {'ổ', "o"}, + {'Ỗ', "O"}, + {'ỗ', "o"}, + {'Ộ', "O"}, + {'ộ', "o"}, + {'Ớ', "O"}, + {'ớ', "o"}, + {'Ờ', "O"}, + {'ờ', "o"}, + {'Ở', "O"}, + {'ở', "o"}, + {'Ỡ', "O"}, + {'ỡ', "o"}, + {'Ợ', "O"}, + {'ợ', "o"}, + {'Ụ', "U"}, + {'ụ', "u"}, + {'Ủ', "U"}, + {'ủ', "u"}, + {'Ứ', "U"}, + {'ứ', "u"}, + {'Ừ', "U"}, + {'ừ', "u"}, + {'Ử', "U"}, + {'ử', "u"}, + {'Ữ', "U"}, + {'ữ', "u"}, + {'Ự', "U"}, + {'ự', "u"}, + {'Ỳ', "Y"}, + {'ỳ', "y"}, + {'Ỵ', "Y"}, + {'ỵ', "y"}, + {'Ỷ', "Y"}, + {'ỷ', "y"}, + {'Ỹ', "Y"}, + {'ỹ', "y"}, + {'`', "`"}, + {'․', "."}, + {'‥', ".."}, + {'…', "..."}, + {'‼', "!!"}, + {'⁇', "??"}, + {'⁈', "?!"}, + {'⁉', "!?"}, + {'⁰', "0"}, + {'ⁱ', "i"}, + {'⁴', "4"}, + {'⁵', "5"}, + {'⁶', "6"}, + {'⁷', "7"}, + {'⁸', "8"}, + {'⁹', "9"}, + {'⁺', "+"}, + {'⁼', "="}, + {'⁽', "("}, + {'⁾', ")"}, + {'ⁿ', "n"}, + {'₀', "0"}, + {'₁', "1"}, + {'₂', "2"}, + {'₃', "3"}, + {'₄', "4"}, + {'₅', "5"}, + {'₆', "6"}, + {'₇', "7"}, + {'₈', "8"}, + {'₉', "9"}, + {'₊', "+"}, + {'₌', "="}, + {'₍', "("}, + {'₎', ")"}, + {'ₐ', "a"}, + {'ₑ', "e"}, + {'ₒ', "o"}, + {'ₓ', "x"}, + {'ₕ', "h"}, + {'ₖ', "k"}, + {'ₗ', "l"}, + {'ₘ', "m"}, + {'ₙ', "n"}, + {'ₚ', "p"}, + {'ₛ', "s"}, + {'ₜ', "t"}, + {'₨', "Rs"}, + {'℀', "a/c"}, + {'℁', "a/s"}, + {'ℂ', "C"}, + {'℃', "C"}, + {'℅', "c/o"}, + {'℆', "c/u"}, + {'℉', "F"}, + {'ℊ', "g"}, + {'ℋ', "H"}, + {'ℌ', "H"}, + {'ℍ', "H"}, + {'ℎ', "h"}, + {'ℐ', "I"}, + {'ℑ', "I"}, + {'ℒ', "L"}, + {'ℓ', "l"}, + {'ℕ', "N"}, + {'№', "No"}, + {'ℙ', "P"}, + {'ℚ', "Q"}, + {'ℛ', "R"}, + {'ℜ', "R"}, + {'ℝ', "R"}, + {'℠', "SM"}, + {'℡', "TEL"}, + {'™', "TM"}, + {'ℤ', "Z"}, + {'ℨ', "Z"}, + {'K', "K"}, + {'Å', "A"}, + {'ℬ', "B"}, + {'ℭ', "C"}, + {'ℯ', "e"}, + {'ℰ', "E"}, + {'ℱ', "F"}, + {'ℳ', "M"}, + {'ℴ', "o"}, + {'ℹ', "i"}, + {'℻', "FAX"}, + {'ⅅ', "D"}, + {'ⅆ', "d"}, + {'ⅇ', "e"}, + {'ⅈ', "i"}, + {'ⅉ', "j"}, + {'⅐', "17"}, + {'⅑', "19"}, + {'⅒', "110"}, + {'⅓', "13"}, + {'⅔', "23"}, + {'⅕', "15"}, + {'⅖', "25"}, + {'⅗', "35"}, + {'⅘', "45"}, + {'⅙', "16"}, + {'⅚', "56"}, + {'⅛', "18"}, + {'⅜', "38"}, + {'⅝', "58"}, + {'⅞', "78"}, + {'⅟', "1"}, + {'Ⅰ', "I"}, + {'Ⅱ', "II"}, + {'Ⅲ', "III"}, + {'Ⅳ', "IV"}, + {'Ⅴ', "V"}, + {'Ⅵ', "VI"}, + {'Ⅶ', "VII"}, + {'Ⅷ', "VIII"}, + {'Ⅸ', "IX"}, + {'Ⅹ', "X"}, + {'Ⅺ', "XI"}, + {'Ⅻ', "XII"}, + {'Ⅼ', "L"}, + {'Ⅽ', "C"}, + {'Ⅾ', "D"}, + {'Ⅿ', "M"}, + {'ⅰ', "i"}, + {'ⅱ', "ii"}, + {'ⅲ', "iii"}, + {'ⅳ', "iv"}, + {'ⅴ', "v"}, + {'ⅵ', "vi"}, + {'ⅶ', "vii"}, + {'ⅷ', "viii"}, + {'ⅸ', "ix"}, + {'ⅹ', "x"}, + {'ⅺ', "xi"}, + {'ⅻ', "xii"}, + {'ⅼ', "l"}, + {'ⅽ', "c"}, + {'ⅾ', "d"}, + {'ⅿ', "m"}, + {'↉', "03"}, + {'≠', "="}, + {'≮', "<"}, + {'≯', ">"}, + {'①', "1"}, + {'②', "2"}, + {'③', "3"}, + {'④', "4"}, + {'⑤', "5"}, + {'⑥', "6"}, + {'⑦', "7"}, + {'⑧', "8"}, + {'⑨', "9"}, + {'⑩', "10"}, + {'⑪', "11"}, + {'⑫', "12"}, + {'⑬', "13"}, + {'⑭', "14"}, + {'⑮', "15"}, + {'⑯', "16"}, + {'⑰', "17"}, + {'⑱', "18"}, + {'⑲', "19"}, + {'⑳', "20"}, + {'⑴', "(1)"}, + {'⑵', "(2)"}, + {'⑶', "(3)"}, + {'⑷', "(4)"}, + {'⑸', "(5)"}, + {'⑹', "(6)"}, + {'⑺', "(7)"}, + {'⑻', "(8)"}, + {'⑼', "(9)"}, + {'⑽', "(10)"}, + {'⑾', "(11)"}, + {'⑿', "(12)"}, + {'⒀', "(13)"}, + {'⒁', "(14)"}, + {'⒂', "(15)"}, + {'⒃', "(16)"}, + {'⒄', "(17)"}, + {'⒅', "(18)"}, + {'⒆', "(19)"}, + {'⒇', "(20)"}, + {'⒈', "1."}, + {'⒉', "2."}, + {'⒊', "3."}, + {'⒋', "4."}, + {'⒌', "5."}, + {'⒍', "6."}, + {'⒎', "7."}, + {'⒏', "8."}, + {'⒐', "9."}, + {'⒑', "10."}, + {'⒒', "11."}, + {'⒓', "12."}, + {'⒔', "13."}, + {'⒕', "14."}, + {'⒖', "15."}, + {'⒗', "16."}, + {'⒘', "17."}, + {'⒙', "18."}, + {'⒚', "19."}, + {'⒛', "20."}, + {'⒜', "(a)"}, + {'⒝', "(b)"}, + {'⒞', "(c)"}, + {'⒟', "(d)"}, + {'⒠', "(e)"}, + {'⒡', "(f)"}, + {'⒢', "(g)"}, + {'⒣', "(h)"}, + {'⒤', "(i)"}, + {'⒥', "(j)"}, + {'⒦', "(k)"}, + {'⒧', "(l)"}, + {'⒨', "(m)"}, + {'⒩', "(n)"}, + {'⒪', "(o)"}, + {'⒫', "(p)"}, + {'⒬', "(q)"}, + {'⒭', "(r)"}, + {'⒮', "(s)"}, + {'⒯', "(t)"}, + {'⒰', "(u)"}, + {'⒱', "(v)"}, + {'⒲', "(w)"}, + {'⒳', "(x)"}, + {'⒴', "(y)"}, + {'⒵', "(z)"}, + {'Ⓐ', "A"}, + {'Ⓑ', "B"}, + {'Ⓒ', "C"}, + {'Ⓓ', "D"}, + {'Ⓔ', "E"}, + {'Ⓕ', "F"}, + {'Ⓖ', "G"}, + {'Ⓗ', "H"}, + {'Ⓘ', "I"}, + {'Ⓙ', "J"}, + {'Ⓚ', "K"}, + {'Ⓛ', "L"}, + {'Ⓜ', "M"}, + {'Ⓝ', "N"}, + {'Ⓞ', "O"}, + {'Ⓟ', "P"}, + {'Ⓠ', "Q"}, + {'Ⓡ', "R"}, + {'Ⓢ', "S"}, + {'Ⓣ', "T"}, + {'Ⓤ', "U"}, + {'Ⓥ', "V"}, + {'Ⓦ', "W"}, + {'Ⓧ', "X"}, + {'Ⓨ', "Y"}, + {'Ⓩ', "Z"}, + {'ⓐ', "a"}, + {'ⓑ', "b"}, + {'ⓒ', "c"}, + {'ⓓ', "d"}, + {'ⓔ', "e"}, + {'ⓕ', "f"}, + {'ⓖ', "g"}, + {'ⓗ', "h"}, + {'ⓘ', "i"}, + {'ⓙ', "j"}, + {'ⓚ', "k"}, + {'ⓛ', "l"}, + {'ⓜ', "m"}, + {'ⓝ', "n"}, + {'ⓞ', "o"}, + {'ⓟ', "p"}, + {'ⓠ', "q"}, + {'ⓡ', "r"}, + {'ⓢ', "s"}, + {'ⓣ', "t"}, + {'ⓤ', "u"}, + {'ⓥ', "v"}, + {'ⓦ', "w"}, + {'ⓧ', "x"}, + {'ⓨ', "y"}, + {'ⓩ', "z"}, + {'⓪', "0"}, + {'⩴', "::="}, + {'⩵', "=="}, + {'⩶', "==="}, + {'ⱼ', "j"}, + {'ⱽ', "V"}, + {'㈀', "()"}, + {'㈁', "()"}, + {'㈂', "()"}, + {'㈃', "()"}, + {'㈄', "()"}, + {'㈅', "()"}, + {'㈆', "()"}, + {'㈇', "()"}, + {'㈈', "()"}, + {'㈉', "()"}, + {'㈊', "()"}, + {'㈋', "()"}, + {'㈌', "()"}, + {'㈍', "()"}, + {'㈎', "()"}, + {'㈏', "()"}, + {'㈐', "()"}, + {'㈑', "()"}, + {'㈒', "()"}, + {'㈓', "()"}, + {'㈔', "()"}, + {'㈕', "()"}, + {'㈖', "()"}, + {'㈗', "()"}, + {'㈘', "()"}, + {'㈙', "()"}, + {'㈚', "()"}, + {'㈛', "()"}, + {'㈜', "()"}, + {'㈝', "()"}, + {'㈞', "()"}, + {'㈠', "()"}, + {'㈡', "()"}, + {'㈢', "()"}, + {'㈣', "()"}, + {'㈤', "()"}, + {'㈥', "()"}, + {'㈦', "()"}, + {'㈧', "()"}, + {'㈨', "()"}, + {'㈩', "()"}, + {'㈪', "()"}, + {'㈫', "()"}, + {'㈬', "()"}, + {'㈭', "()"}, + {'㈮', "()"}, + {'㈯', "()"}, + {'㈰', "()"}, + {'㈱', "()"}, + {'㈲', "()"}, + {'㈳', "()"}, + {'㈴', "()"}, + {'㈵', "()"}, + {'㈶', "()"}, + {'㈷', "()"}, + {'㈸', "()"}, + {'㈹', "()"}, + {'㈺', "()"}, + {'㈻', "()"}, + {'㈼', "()"}, + {'㈽', "()"}, + {'㈾', "()"}, + {'㈿', "()"}, + {'㉀', "()"}, + {'㉁', "()"}, + {'㉂', "()"}, + {'㉃', "()"}, + {'㉐', "PTE"}, + {'㉑', "21"}, + {'㉒', "22"}, + {'㉓', "23"}, + {'㉔', "24"}, + {'㉕', "25"}, + {'㉖', "26"}, + {'㉗', "27"}, + {'㉘', "28"}, + {'㉙', "29"}, + {'㉚', "30"}, + {'㉛', "31"}, + {'㉜', "32"}, + {'㉝', "33"}, + {'㉞', "34"}, + {'㉟', "35"}, + {'㊱', "36"}, + {'㊲', "37"}, + {'㊳', "38"}, + {'㊴', "39"}, + {'㊵', "40"}, + {'㊶', "41"}, + {'㊷', "42"}, + {'㊸', "43"}, + {'㊹', "44"}, + {'㊺', "45"}, + {'㊻', "46"}, + {'㊼', "47"}, + {'㊽', "48"}, + {'㊾', "49"}, + {'㊿', "50"}, + {'㋀', "1"}, + {'㋁', "2"}, + {'㋂', "3"}, + {'㋃', "4"}, + {'㋄', "5"}, + {'㋅', "6"}, + {'㋆', "7"}, + {'㋇', "8"}, + {'㋈', "9"}, + {'㋉', "10"}, + {'㋊', "11"}, + {'㋋', "12"}, + {'㋌', "Hg"}, + {'㋍', "erg"}, + {'㋎', "eV"}, + {'㋏', "LTD"}, + {'㍘', "0"}, + {'㍙', "1"}, + {'㍚', "2"}, + {'㍛', "3"}, + {'㍜', "4"}, + {'㍝', "5"}, + {'㍞', "6"}, + {'㍟', "7"}, + {'㍠', "8"}, + {'㍡', "9"}, + {'㍢', "10"}, + {'㍣', "11"}, + {'㍤', "12"}, + {'㍥', "13"}, + {'㍦', "14"}, + {'㍧', "15"}, + {'㍨', "16"}, + {'㍩', "17"}, + {'㍪', "18"}, + {'㍫', "19"}, + {'㍬', "20"}, + {'㍭', "21"}, + {'㍮', "22"}, + {'㍯', "23"}, + {'㍰', "24"}, + {'㍱', "hPa"}, + {'㍲', "da"}, + {'㍳', "AU"}, + {'㍴', "bar"}, + {'㍵', "oV"}, + {'㍶', "pc"}, + {'㍷', "dm"}, + {'㍸', "dm2"}, + {'㍹', "dm3"}, + {'㍺', "IU"}, + {'㎀', "pA"}, + {'㎁', "nA"}, + {'㎂', "A"}, + {'㎃', "mA"}, + {'㎄', "kA"}, + {'㎅', "KB"}, + {'㎆', "MB"}, + {'㎇', "GB"}, + {'㎈', "cal"}, + {'㎉', "kcal"}, + {'㎊', "pF"}, + {'㎋', "nF"}, + {'㎌', "F"}, + {'㎍', "g"}, + {'㎎', "mg"}, + {'㎏', "kg"}, + {'㎐', "Hz"}, + {'㎑', "kHz"}, + {'㎒', "MHz"}, + {'㎓', "GHz"}, + {'㎔', "THz"}, + {'㎕', "l"}, + {'㎖', "ml"}, + {'㎗', "dl"}, + {'㎘', "kl"}, + {'㎙', "fm"}, + {'㎚', "nm"}, + {'㎛', "m"}, + {'㎜', "mm"}, + {'㎝', "cm"}, + {'㎞', "km"}, + {'㎟', "mm2"}, + {'㎠', "cm2"}, + {'㎡', "m2"}, + {'㎢', "km2"}, + {'㎣', "mm3"}, + {'㎤', "cm3"}, + {'㎥', "m3"}, + {'㎦', "km3"}, + {'㎧', "ms"}, + {'㎨', "ms2"}, + {'㎩', "Pa"}, + {'㎪', "kPa"}, + {'㎫', "MPa"}, + {'㎬', "GPa"}, + {'㎭', "rad"}, + {'㎮', "rads"}, + {'㎯', "rads2"}, + {'㎰', "ps"}, + {'㎱', "ns"}, + {'㎲', "s"}, + {'㎳', "ms"}, + {'㎴', "pV"}, + {'㎵', "nV"}, + {'㎶', "V"}, + {'㎷', "mV"}, + {'㎸', "kV"}, + {'㎹', "MV"}, + {'㎺', "pW"}, + {'㎻', "nW"}, + {'㎼', "W"}, + {'㎽', "mW"}, + {'㎾', "kW"}, + {'㎿', "MW"}, + {'㏀', "k"}, + {'㏁', "M"}, + {'㏂', "a.m."}, + {'㏃', "Bq"}, + {'㏄', "cc"}, + {'㏅', "cd"}, + {'㏆', "Ckg"}, + {'㏇', "Co."}, + {'㏈', "dB"}, + {'㏉', "Gy"}, + {'㏊', "ha"}, + {'㏋', "HP"}, + {'㏌', "in"}, + {'㏍', "KK"}, + {'㏎', "KM"}, + {'㏏', "kt"}, + {'㏐', "lm"}, + {'㏑', "ln"}, + {'㏒', "log"}, + {'㏓', "lx"}, + {'㏔', "mb"}, + {'㏕', "mil"}, + {'㏖', "mol"}, + {'㏗', "PH"}, + {'㏘', "p.m."}, + {'㏙', "PPM"}, + {'㏚', "PR"}, + {'㏛', "sr"}, + {'㏜', "Sv"}, + {'㏝', "Wb"}, + {'㏞', "Vm"}, + {'㏟', "Am"}, + {'㏠', "1"}, + {'㏡', "2"}, + {'㏢', "3"}, + {'㏣', "4"}, + {'㏤', "5"}, + {'㏥', "6"}, + {'㏦', "7"}, + {'㏧', "8"}, + {'㏨', "9"}, + {'㏩', "10"}, + {'㏪', "11"}, + {'㏫', "12"}, + {'㏬', "13"}, + {'㏭', "14"}, + {'㏮', "15"}, + {'㏯', "16"}, + {'㏰', "17"}, + {'㏱', "18"}, + {'㏲', "19"}, + {'㏳', "20"}, + {'㏴', "21"}, + {'㏵', "22"}, + {'㏶', "23"}, + {'㏷', "24"}, + {'㏸', "25"}, + {'㏹', "26"}, + {'㏺', "27"}, + {'㏻', "28"}, + {'㏼', "29"}, + {'㏽', "30"}, + {'㏾', "31"}, + {'㏿', "gal"}, + {'ff', "ff"}, + {'fi', "fi"}, + {'fl', "fl"}, + {'ffi', "ffi"}, + {'ffl', "ffl"}, + {'ſt', "st"}, + {'st', "st"}, + {'﬩', "+"}, + {'︐', ","}, + {'︓', ":"}, + {'︔', ";"}, + {'︕', "!"}, + {'︖', "?"}, + {'︙', "..."}, + {'︰', ".."}, + {'︳', "_"}, + {'︴', "_"}, + {'︵', "("}, + {'︶', ")"}, + {'︷', "{"}, + {'︸', "}"}, + {'﹇', "["}, + {'﹈', "]"}, + {'﹍', "_"}, + {'﹎', "_"}, + {'﹏', "_"}, + {'﹐', ","}, + {'﹒', "."}, + {'﹔', ";"}, + {'﹕', ":"}, + {'﹖', "?"}, + {'﹗', "!"}, + {'﹙', "("}, + {'﹚', ")"}, + {'﹛', "{"}, + {'﹜', "}"}, + {'﹟', "#"}, + {'﹠', "&"}, + {'﹡', "*"}, + {'﹢', "+"}, + {'﹣', "-"}, + {'﹤', "<"}, + {'﹥', ">"}, + {'﹦', "="}, + {'﹨', "\\"}, + {'﹩', "$"}, + {'﹪', "%"}, + {'﹫', "@"}, + {'!', "!"}, + {'"', "\""}, + {'#', "#"}, + {'$', "$"}, + {'%', "%"}, + {'&', "&"}, + {''', "'"}, + {'(', "("}, + {')', ")"}, + {'*', "*"}, + {'+', "+"}, + {',', ","}, + {'-', "-"}, + {'.', "."}, + {'/', "/"}, + {'0', "0"}, + {'1', "1"}, + {'2', "2"}, + {'3', "3"}, + {'4', "4"}, + {'5', "5"}, + {'6', "6"}, + {'7', "7"}, + {'8', "8"}, + {'9', "9"}, + {':', ":"}, + {';', ";"}, + {'<', "<"}, + {'=', "="}, + {'>', ">"}, + {'?', "?"}, + {'@', "@"}, + {'A', "A"}, + {'B', "B"}, + {'C', "C"}, + {'D', "D"}, + {'E', "E"}, + {'F', "F"}, + {'G', "G"}, + {'H', "H"}, + {'I', "I"}, + {'J', "J"}, + {'K', "K"}, + {'L', "L"}, + {'M', "M"}, + {'N', "N"}, + {'O', "O"}, + {'P', "P"}, + {'Q', "Q"}, + {'R', "R"}, + {'S', "S"}, + {'T', "T"}, + {'U', "U"}, + {'V', "V"}, + {'W', "W"}, + {'X', "X"}, + {'Y', "Y"}, + {'Z', "Z"}, + {'[', "["}, + {'\', "\\"}, + {']', "]"}, + {'^', "^"}, + {'_', "_"}, + {'`', "`"}, + {'a', "a"}, + {'b', "b"}, + {'c', "c"}, + {'d', "d"}, + {'e', "e"}, + {'f', "f"}, + {'g', "g"}, + {'h', "h"}, + {'i', "i"}, + {'j', "j"}, + {'k', "k"}, + {'l', "l"}, + {'m', "m"}, + {'n', "n"}, + {'o', "o"}, + {'p', "p"}, + {'q', "q"}, + {'r', "r"}, + {'s', "s"}, + {'t', "t"}, + {'u', "u"}, + {'v', "v"}, + {'w', "w"}, + {'x', "x"}, + {'y', "y"}, + {'z', "z"}, + {'{', "{"}, + {'|', "|"}, + {'}', "}"}, + {'~', "~"}, + }; } } \ No newline at end of file diff --git a/src/Markdig/Helpers/HtmlHelper.cs b/src/Markdig/Helpers/HtmlHelper.cs index 2895be3d7..f25ac3c77 100644 --- a/src/Markdig/Helpers/HtmlHelper.cs +++ b/src/Markdig/Helpers/HtmlHelper.cs @@ -3,6 +3,7 @@ // See the license.txt file in the project root for more information. using System; +using System.Diagnostics.CodeAnalysis; using System.Text; namespace Markdig.Helpers @@ -36,18 +37,19 @@ static HtmlHelper() return c < 128 ? EscapeUrlsForAscii[c] : null; } - public static bool TryParseHtmlTag(StringSlice text, out string htmlTag) - { - return TryParseHtmlTag(ref text, out htmlTag); - } - - public static bool TryParseHtmlTag(ref StringSlice text, out string htmlTag) + public static bool TryParseHtmlTag(ref StringSlice text, [NotNullWhen(true)] out string? htmlTag) { var builder = StringBuilderCache.Local(); - var result = TryParseHtmlTag(ref text, builder); - htmlTag = builder.ToString(); - builder.Length = 0; - return result; + if (TryParseHtmlTag(ref text, builder)) + { + htmlTag = builder.GetStringAndReset(); + return true; + } + else + { + htmlTag = null; + return false; + } } public static bool TryParseHtmlTag(ref StringSlice text, StringBuilder builder) @@ -128,7 +130,7 @@ internal static bool TryParseHtmlTagOpenTag(ref StringSlice text, StringBuilder case '\0': return false; case '>': - text.NextChar(); + text.SkipChar(); builder.Append(c); return true; case '/': @@ -138,7 +140,7 @@ internal static bool TryParseHtmlTagOpenTag(ref StringSlice text, StringBuilder { return false; } - text.NextChar(); + text.SkipChar(); builder.Append('>'); return true; case '=': @@ -270,7 +272,7 @@ private static bool TryParseHtmlTagDeclaration(ref StringSlice text, StringBuild if (c == '>') { - text.NextChar(); + text.SkipChar(); builder.Append('>'); return true; } @@ -279,16 +281,12 @@ private static bool TryParseHtmlTagDeclaration(ref StringSlice text, StringBuild private static bool TryParseHtmlTagCData(ref StringSlice text, StringBuilder builder) { - builder.Append('['); - var c = text.NextChar(); - if (c == 'C' && - text.NextChar() == 'D' && - text.NextChar() == 'A' && - text.NextChar() == 'T' && - text.NextChar() == 'A' && - (c = text.NextChar()) == '[') + if (text.Match("[CDATA[")) { - builder.Append("CDATA["); + builder.Append("[CDATA["); + text.Start += 6; + + char c = '\0'; while (true) { var pc = c; @@ -298,23 +296,15 @@ private static bool TryParseHtmlTagCData(ref StringSlice text, StringBuilder bui return false; } - if (c == ']' && pc == ']') - { - builder.Append(']'); - c = text.NextChar(); - if (c == '>') - { - builder.Append('>'); - text.NextChar(); - return true; - } + builder.Append(c); - if (c == '\0') - { - return false; - } + if (c == ']' && pc == ']' && text.PeekChar() == '>') + { + text.SkipChar(); + text.SkipChar(); + builder.Append('>'); + return true; } - builder.Append(c); } } return false; @@ -338,7 +328,7 @@ internal static bool TryParseHtmlCloseTag(ref StringSlice text, StringBuilder bu c = text.NextChar(); if (c == '>') { - text.NextChar(); + text.SkipChar(); builder.Append('>'); return true; } @@ -393,7 +383,7 @@ private static bool TryParseHtmlTagHtmlComment(ref StringSlice text, StringBuild if (c == '>') { builder.Append('>'); - text.NextChar(); + text.SkipChar(); return true; } return false; @@ -418,7 +408,7 @@ private static bool TryParseHtmlTagProcessingInstruction(ref StringSlice text, S if (c == '>' && prevChar == '?') { builder.Append('>'); - text.NextChar(); + text.SkipChar(); return true; } prevChar = c; @@ -531,29 +521,24 @@ public static string Unescape(string? text, bool removeBackSlash = true) if (c == '#') { c = slice.PeekChar(); - if (c == 'x' || c == 'X') + if ((c | 0x20) == 'x') { c = slice.NextChar(); // skip # // expect 1-6 hex digits starting from pos+3 while (c != '\0') { c = slice.NextChar(); - if (c >= '0' && c <= '9') - { - if (++counter == 7) return 0; - numericEntity = numericEntity*16 + (c - '0'); - continue; - } - else if (c >= 'A' && c <= 'F') + + if (c.IsDigit()) { if (++counter == 7) return 0; - numericEntity = numericEntity*16 + (c - 'A' + 10); + numericEntity = numericEntity * 16 + (c - '0'); continue; } - else if (c >= 'a' && c <= 'f') + else if ((uint)((c - 'A') & ~0x20) <= ('F' - 'A')) { if (++counter == 7) return 0; - numericEntity = numericEntity*16 + (c - 'a' + 10); + numericEntity = numericEntity * 16 + ((c | 0x20) - 'a' + 10); continue; } @@ -570,10 +555,10 @@ public static string Unescape(string? text, bool removeBackSlash = true) { c = slice.NextChar(); - if (c >= '0' && c <= '9') + if (c.IsDigit()) { if (++counter == 8) return 0; - numericEntity = numericEntity*10 + (c - '0'); + numericEntity = numericEntity * 10 + (c - '0'); continue; } @@ -587,7 +572,7 @@ public static string Unescape(string? text, bool removeBackSlash = true) else { // expect a letter and 1-31 letters or digits - if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) + if (!c.IsAlpha()) return 0; namedEntityStart = slice.Start; @@ -596,7 +581,8 @@ public static string Unescape(string? text, bool removeBackSlash = true) while (c != '\0') { c = slice.NextChar(); - if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) + + if (c.IsAlphaNumeric()) { if (++counter == 32) return 0; diff --git a/src/Markdig/Helpers/ICharIterator.cs b/src/Markdig/Helpers/ICharIterator.cs index 2ea1f32f0..cb11ad1a5 100644 --- a/src/Markdig/Helpers/ICharIterator.cs +++ b/src/Markdig/Helpers/ICharIterator.cs @@ -31,12 +31,23 @@ public interface ICharIterator /// The next character. `\0` is end of the iteration. char NextChar(); + /// + /// Goes to the next character, incrementing the position. + /// + void SkipChar(); + + /// + /// Peeks at the next character, without incrementing the position. + /// + /// The next character. `\0` is end of the iteration. + char PeekChar(); + /// /// Peeks at the next character, without incrementing the position. /// /// /// The next character. `\0` is end of the iteration. - char PeekChar(int offset = 1); + char PeekChar(int offset); /// /// Gets a value indicating whether this instance is empty. diff --git a/src/Markdig/Helpers/LinkHelper.cs b/src/Markdig/Helpers/LinkHelper.cs index 4db69d821..29b5b7456 100644 --- a/src/Markdig/Helpers/LinkHelper.cs +++ b/src/Markdig/Helpers/LinkHelper.cs @@ -247,7 +247,7 @@ public static bool TryParseAutolink(ref StringSlice text, [NotNullWhen(true)] ou break; } - text.NextChar(); + text.SkipChar(); link = builder.ToString(); builder.Length = 0; return true; @@ -295,7 +295,7 @@ public static bool TryParseAutolink(ref StringSlice text, [NotNullWhen(true)] ou if (c == '>') { - text.NextChar(); + text.SkipChar(); link = builder.ToString(); builder.Length = 0; return true; @@ -356,7 +356,7 @@ public static bool TryParseInlineLink(ref StringSlice text, out string? link, ou // 1. An inline link consists of a link text followed immediately by a left parenthesis (, if (c == '(') { - text.NextChar(); + text.SkipChar(); text.TrimStart(); // this breaks whitespace before an uri var pos = text.Start; @@ -408,7 +408,7 @@ public static bool TryParseInlineLink(ref StringSlice text, out string? link, ou if (isValid) { // Skip ')' - text.NextChar(); + text.SkipChar(); title ??= string.Empty; } @@ -453,7 +453,7 @@ public static bool TryParseInlineLink(ref StringSlice text, out string? link, ou // 1. An inline link consists of a link text followed immediately by a left parenthesis (, if (c == '(') { - text.NextChar(); + text.SkipChar(); var sourcePosition = text.Start; text.TrimStart(); triviaBeforeLink = new SourceSpan(sourcePosition, text.Start - 1); @@ -515,7 +515,7 @@ public static bool TryParseInlineLink(ref StringSlice text, out string? link, ou if (isValid) { // Skip ')' - text.NextChar(); + text.SkipChar(); title ??= string.Empty; } return isValid; @@ -581,7 +581,7 @@ public static bool TryParseInlineLink(ref StringSlice text, out string? link, ou } // Skip last quote - text.NextChar(); + text.SkipChar(); isValid = true; break; } @@ -675,7 +675,7 @@ public static bool TryParseInlineLink(ref StringSlice text, out string? link, ou } // Skip last quote - text.NextChar(); + text.SkipChar(); isValid = true; break; } @@ -737,7 +737,7 @@ public static bool TryParseInlineLink(ref StringSlice text, out string? link, ou c = text.NextChar(); if (!hasEscape && c == '>') { - text.NextChar(); + text.SkipChar(); hasPointyBrackets = true; isValid = true; break; @@ -878,7 +878,7 @@ public static bool TryParseInlineLink(ref StringSlice text, out string? link, ou c = text.NextChar(); if (!hasEscape && c == '>') { - text.NextChar(); + text.SkipChar(); hasPointyBrackets = true; isValid = true; break; @@ -1093,7 +1093,7 @@ public static bool IsValidDomain(string link, int prefixLength) label = null; return false; } - text.NextChar(); // Skip ':' + text.SkipChar(); // Skip ':' // Skip any whitespace before the url text.TrimStart(); @@ -1128,7 +1128,7 @@ public static bool IsValidDomain(string link, int prefixLength) } else { - if (text.CurrentChar == '\0' || newLineCount > 0) + if (text.IsEmpty || newLineCount > 0) { return true; } @@ -1210,7 +1210,7 @@ public static bool IsValidDomain(string link, int prefixLength) label = null; return false; } - text.NextChar(); // Skip ':' + text.SkipChar(); // Skip ':' var triviaBeforeUrlStart = text.Start; // Skip any whitespace before the url @@ -1254,7 +1254,7 @@ public static bool IsValidDomain(string link, int prefixLength) } else { - if (text.CurrentChar == '\0' || newLineCount > 0) + if (text.IsEmpty || newLineCount > 0) { triviaAfterTitle = new SourceSpan(text.Start, text.Start - 1); return true; @@ -1376,7 +1376,7 @@ public static bool IsValidDomain(string link, int prefixLength) if (c == ']') { - lines.NextChar(); // Skip ] + lines.SkipChar(); // Skip ] if (allowEmpty || hasNonWhiteSpace) { // Remove trailing spaces @@ -1491,7 +1491,7 @@ public static bool IsValidDomain(string link, int prefixLength) if (c == ']') { - lines.NextChar(); // Skip ] + lines.SkipChar(); // Skip ] if (allowEmpty || hasNonWhiteSpace) { // Remove trailing spaces diff --git a/src/Markdig/Helpers/Newline.cs b/src/Markdig/Helpers/Newline.cs index f2a048074..b3c25e8e0 100644 --- a/src/Markdig/Helpers/Newline.cs +++ b/src/Markdig/Helpers/Newline.cs @@ -12,39 +12,24 @@ namespace Markdig.Helpers /// public enum NewLine : byte { - None, - CarriageReturn, - LineFeed, - CarriageReturnLineFeed + // Values have the length encoded in last 2 bits + None = 0, + CarriageReturn = 4 | 1, + LineFeed = 8 | 1, + CarriageReturnLineFeed = 16 | 2 } public static class NewLineExtensions { - public static string AsString(this NewLine newLine) + public static string AsString(this NewLine newLine) => newLine switch { - if (newLine == NewLine.CarriageReturnLineFeed) - { - return "\r\n"; - } - if (newLine == NewLine.LineFeed) - { - return "\n"; - } - if (newLine == NewLine.CarriageReturn) - { - return "\r"; - } - return string.Empty; - } - - public static int Length(this NewLine newLine) => newLine switch - { - NewLine.None => 0, - NewLine.CarriageReturn => 1, - NewLine.LineFeed => 1, - NewLine.CarriageReturnLineFeed => 2, - _ => throw new NotSupportedException(), + NewLine.CarriageReturnLineFeed => "\r\n", + NewLine.LineFeed => "\n", + NewLine.CarriageReturn => "\r", + _ => string.Empty, }; + + public static int Length(this NewLine newLine) => (int)newLine & 3; } } diff --git a/src/Markdig/Helpers/StringLineGroup.cs b/src/Markdig/Helpers/StringLineGroup.cs index e05d5ae59..ab238fd75 100644 --- a/src/Markdig/Helpers/StringLineGroup.cs +++ b/src/Markdig/Helpers/StringLineGroup.cs @@ -122,8 +122,8 @@ public void Add(StringSlice slice) // Optimization case for a single line. if (Count == 1) { - var l = Lines[0]; - lineOffsets?.Add(new LineOffset(l.Position, l.Column, l.Slice.Start - l.Position, l.Slice.Start, l.Slice.End + 1)); + ref StringLine line = ref Lines[0]; + lineOffsets?.Add(new LineOffset(line.Position, line.Column, line.Slice.Start - line.Position, line.Slice.Start, line.Slice.End + 1)); return Lines[0]; } @@ -149,7 +149,7 @@ public void Add(StringSlice slice) builder.Append(newLine.AsString()); previousStartOfLine = builder.Length; } - ref var line = ref Lines[i]; + ref StringLine line = ref Lines[i]; if (!line.Slice.IsEmpty) { builder.Append(line.Slice.Text, line.Slice.Start, line.Slice.Length); @@ -225,10 +225,10 @@ public Iterator(StringLineGroup lines) End = -1; for (int i = 0; i < lines.Count; i++) { - var line = lines.Lines[i]; + ref StringLine line = ref lines.Lines[i]; End += line.Slice.Length + line.NewLine.Length(); // Add chars } - NextChar(); + SkipChar(); } public int Start { get; private set; } @@ -244,7 +244,7 @@ public Iterator(StringLineGroup lines) public StringLineGroup Remaining() { var lines = _lines; - if (CurrentChar == '\0') + if (IsEmpty) { lines.Clear(); } @@ -257,8 +257,9 @@ public StringLineGroup Remaining() if (lines.Count > 0 && _offset > 0) { - lines.Lines[0].Column += _offset; - lines.Lines[0].Slice.Start += _offset; + ref StringLine line = ref lines.Lines[0]; + line.Column += _offset; + line.Slice.Start += _offset; } } @@ -318,7 +319,11 @@ public char NextChar() return CurrentChar; } - public readonly char PeekChar(int offset = 1) + public void SkipChar() => NextChar(); + + public readonly char PeekChar() => PeekChar(1); + + public readonly char PeekChar(int offset) { if (offset < 0) ThrowHelper.ArgumentOutOfRangeException("Negative offset are not supported for StringLineGroup", nameof(offset)); @@ -330,8 +335,8 @@ public char NextChar() offset += _offset; int sliceIndex = SliceIndex; - var line = _lines.Lines[sliceIndex]; - var slice = line.Slice; + ref StringLine line = ref _lines.Lines[sliceIndex]; + ref StringSlice slice = ref line.Slice; if (!(line.NewLine == NewLine.CarriageReturnLineFeed && offset == slice.Length + 1)) { while (offset > slice.Length) @@ -340,7 +345,7 @@ public char NextChar() offset -= slice.Length + 1; // + 1 for new line Debug.Assert(sliceIndex + 1 < _lines.Lines.Length, "'Start + offset > End' check above should prevent us from indexing out of range"); - slice = _lines.Lines[++sliceIndex].Slice; + slice = ref _lines.Lines[++sliceIndex].Slice; } } else diff --git a/src/Markdig/Helpers/StringSlice.cs b/src/Markdig/Helpers/StringSlice.cs index e805119ba..79979a17c 100644 --- a/src/Markdig/Helpers/StringSlice.cs +++ b/src/Markdig/Helpers/StringSlice.cs @@ -155,6 +155,17 @@ public char NextChar() return Text[start]; } + /// + /// Goes to the next character, incrementing the position. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void SkipChar() + { + int start = Start; + if (start <= End) + Start = start + 1; + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal int CountAndSkipChar(char matchChar) { @@ -461,7 +472,7 @@ public void Trim() public bool Overlaps(StringSlice other) { - if (Length == 0 || other.Length == 0) + if (IsEmpty || other.IsEmpty) { return false; } diff --git a/src/Markdig/Parsers/BlockProcessor.cs b/src/Markdig/Parsers/BlockProcessor.cs index d852d55a1..cf6e931b6 100644 --- a/src/Markdig/Parsers/BlockProcessor.cs +++ b/src/Markdig/Parsers/BlockProcessor.cs @@ -120,7 +120,7 @@ public BlockProcessor(MarkdownDocument document, BlockParserList parsers, Markdo /// /// Gets a value indicating whether the line is blank (valid only after has been called). /// - public bool IsBlankLine => CurrentChar == '\0'; + public bool IsBlankLine => Line.IsEmpty; /// /// Gets the current character being processed. diff --git a/src/Markdig/Parsers/HtmlBlockParser.cs b/src/Markdig/Parsers/HtmlBlockParser.cs index 047775c39..bf99d02d7 100644 --- a/src/Markdig/Parsers/HtmlBlockParser.cs +++ b/src/Markdig/Parsers/HtmlBlockParser.cs @@ -49,7 +49,7 @@ private BlockState MatchStart(BlockProcessor state) var line = state.Line; var startPosition = line.Start; - line.NextChar(); + line.SkipChar(); var result = TryParseTagType16(state, line, state.ColumnBeforeIndent, startPosition); // HTML blocks of type 7 cannot interrupt a paragraph: diff --git a/src/Markdig/Parsers/Inlines/AutolinkInlineParser.cs b/src/Markdig/Parsers/Inlines/AutolinkInlineParser.cs index 563f73aa9..5993a4c1e 100644 --- a/src/Markdig/Parsers/Inlines/AutolinkInlineParser.cs +++ b/src/Markdig/Parsers/Inlines/AutolinkInlineParser.cs @@ -47,7 +47,7 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice) else if (EnableHtmlParsing) { slice = saved; - if (!HtmlHelper.TryParseHtmlTag(ref slice, out string htmlTag)) + if (!HtmlHelper.TryParseHtmlTag(ref slice, out string? htmlTag)) { return false; } diff --git a/src/Markdig/Parsers/Inlines/EscapeInlineParser.cs b/src/Markdig/Parsers/Inlines/EscapeInlineParser.cs index 1cd4b7e65..a85c6d82e 100644 --- a/src/Markdig/Parsers/Inlines/EscapeInlineParser.cs +++ b/src/Markdig/Parsers/Inlines/EscapeInlineParser.cs @@ -36,7 +36,7 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice) IsFirstCharacterEscaped = true, }; processor.Inline.Span.End = processor.Inline.Span.Start + 1; - slice.NextChar(); + slice.SkipChar(); return true; } @@ -60,7 +60,7 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice) NewLine = newLine }; processor.Inline.Span.End = processor.Inline.Span.Start + 1; - slice.NextChar(); + slice.SkipChar(); return true; } } @@ -77,7 +77,7 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice) Column = column }; processor.Inline.Span.End = processor.Inline.Span.Start + 1; - slice.NextChar(); + slice.SkipChar(); return true; } } diff --git a/src/Markdig/Parsers/Inlines/LineBreakInlineParser.cs b/src/Markdig/Parsers/Inlines/LineBreakInlineParser.cs index 123d65b97..308033892 100644 --- a/src/Markdig/Parsers/Inlines/LineBreakInlineParser.cs +++ b/src/Markdig/Parsers/Inlines/LineBreakInlineParser.cs @@ -45,7 +45,7 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice) if (slice.PeekChar() == '\n') { newLine = NewLine.CarriageReturnLineFeed; - slice.NextChar(); // Skip \n + slice.SkipChar(); // Skip \n } else { @@ -61,10 +61,10 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice) { if (slice.CurrentChar == '\r' && slice.PeekChar() == '\n') { - slice.NextChar(); // Skip \n + slice.SkipChar(); // Skip \n } } - slice.NextChar(); // Skip \r or \n + slice.SkipChar(); // Skip \r or \n processor.Inline = new LineBreakInline { diff --git a/src/Markdig/Parsers/Inlines/LinkInlineParser.cs b/src/Markdig/Parsers/Inlines/LinkInlineParser.cs index 373ebd632..875f70284 100644 --- a/src/Markdig/Parsers/Inlines/LinkInlineParser.cs +++ b/src/Markdig/Parsers/Inlines/LinkInlineParser.cs @@ -77,7 +77,7 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice) slice = saved; // Else we insert a LinkDelimiter - slice.NextChar(); + slice.SkipChar(); var labelWithTrivia = new StringSlice(slice.Text, labelWithTriviaSpan.Start, labelWithTriviaSpan.End); processor.Inline = new LinkDelimiterInline(this) { @@ -93,7 +93,7 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice) return true; case ']': - slice.NextChar(); + slice.SkipChar(); if (processor.Inline != null) { if (TryProcessLinkOrImage(processor, ref slice)) @@ -353,8 +353,8 @@ private bool TryProcessLinkOrImage(InlineProcessor inlineState, ref StringSlice labelSpan = openParent.LabelSpan; isLabelSpanLocal = false; localLabel = LocalLabel.Empty; - text.NextChar(); // Skip [ - text.NextChar(); // Skip ] + text.SkipChar(); // Skip [ + text.SkipChar(); // Skip ] } } else diff --git a/src/Markdig/Renderers/Roundtrip/CodeBlockRenderer.cs b/src/Markdig/Renderers/Roundtrip/CodeBlockRenderer.cs index 69b52b734..6deb11f57 100644 --- a/src/Markdig/Renderers/Roundtrip/CodeBlockRenderer.cs +++ b/src/Markdig/Renderers/Roundtrip/CodeBlockRenderer.cs @@ -2,6 +2,7 @@ // This file is licensed under the BSD-Clause 2 license. // See the license.txt file in the project root for more information. +using Markdig.Helpers; using Markdig.Syntax; using System.Collections.Generic; @@ -90,8 +91,8 @@ public void WriteLeafRawLines(RoundtripRenderer renderer, LeafBlock leafBlock) var slices = lines.Lines; for (int i = 0; i < lines.Count; i++) { - var slice = slices[i].Slice; - renderer.Write(ref slices[i].Slice); + ref StringSlice slice = ref slices[i].Slice; + renderer.Write(ref slice); renderer.WriteLine(slice.NewLine); } }