Skip to content

Commit

Permalink
add all special chars supported by YAML
Browse files Browse the repository at this point in the history
  • Loading branch information
mruoss committed Nov 20, 2023
1 parent bdcccd5 commit 536da82
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 58 deletions.
90 changes: 45 additions & 45 deletions BENCHMARK.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Benchmark

Benchmark run from 2023-11-19 13:22:29.792152Z UTC
Benchmark run from 2023-11-20 14:52:22.679140Z UTC

## System

Expand Down Expand Up @@ -65,20 +65,20 @@ Run Time

<tr>
<td style="white-space: nowrap">Jason</td>
<td style="white-space: nowrap; text-align: right">284.78</td>
<td style="white-space: nowrap; text-align: right">3.51 ms</td>
<td style="white-space: nowrap; text-align: right">&plusmn;13.65%</td>
<td style="white-space: nowrap; text-align: right">3.25 ms</td>
<td style="white-space: nowrap; text-align: right">4.71 ms</td>
<td style="white-space: nowrap; text-align: right">279.16</td>
<td style="white-space: nowrap; text-align: right">3.58 ms</td>
<td style="white-space: nowrap; text-align: right">&plusmn;13.81%</td>
<td style="white-space: nowrap; text-align: right">3.34 ms</td>
<td style="white-space: nowrap; text-align: right">4.81 ms</td>
</tr>

<tr>
<td style="white-space: nowrap">Ymlr</td>
<td style="white-space: nowrap; text-align: right">1.58</td>
<td style="white-space: nowrap; text-align: right">632.01 ms</td>
<td style="white-space: nowrap; text-align: right">&plusmn;0.31%</td>
<td style="white-space: nowrap; text-align: right">632.22 ms</td>
<td style="white-space: nowrap; text-align: right">634.95 ms</td>
<td style="white-space: nowrap; text-align: right">3.64</td>
<td style="white-space: nowrap; text-align: right">274.95 ms</td>
<td style="white-space: nowrap; text-align: right">&plusmn;0.71%</td>
<td style="white-space: nowrap; text-align: right">275.05 ms</td>
<td style="white-space: nowrap; text-align: right">278.58 ms</td>
</tr>

</table>
Expand All @@ -93,14 +93,14 @@ Run Time Comparison
<th style="text-align: right">Slower</th>
<tr>
<td style="white-space: nowrap">Jason</td>
<td style="white-space: nowrap;text-align: right">284.78</td>
<td style="white-space: nowrap;text-align: right">279.16</td>
<td>&nbsp;</td>
</tr>

<tr>
<td style="white-space: nowrap">Ymlr</td>
<td style="white-space: nowrap; text-align: right">1.58</td>
<td style="white-space: nowrap; text-align: right">179.98x</td>
<td style="white-space: nowrap; text-align: right">3.64</td>
<td style="white-space: nowrap; text-align: right">76.76x</td>
</tr>

</table>
Expand All @@ -122,8 +122,8 @@ Memory Usage
</tr>
<tr>
<td style="white-space: nowrap">Ymlr</td>
<td style="white-space: nowrap">104.88 MB</td>
<td>21.83x</td>
<td style="white-space: nowrap">65.74 MB</td>
<td>13.68x</td>
</tr>
</table>

Expand All @@ -145,20 +145,20 @@ Run Time

<tr>
<td style="white-space: nowrap">Jason</td>
<td style="white-space: nowrap; text-align: right">127.80</td>
<td style="white-space: nowrap; text-align: right">7.82 ms</td>
<td style="white-space: nowrap; text-align: right">&plusmn;14.23%</td>
<td style="white-space: nowrap; text-align: right">7.43 ms</td>
<td style="white-space: nowrap; text-align: right">13.36 ms</td>
<td style="white-space: nowrap; text-align: right">122.70</td>
<td style="white-space: nowrap; text-align: right">8.15 ms</td>
<td style="white-space: nowrap; text-align: right">&plusmn;14.44%</td>
<td style="white-space: nowrap; text-align: right">7.75 ms</td>
<td style="white-space: nowrap; text-align: right">13.64 ms</td>
</tr>

<tr>
<td style="white-space: nowrap">Ymlr</td>
<td style="white-space: nowrap; text-align: right">22.00</td>
<td style="white-space: nowrap; text-align: right">45.46 ms</td>
<td style="white-space: nowrap; text-align: right">&plusmn;19.58%</td>
<td style="white-space: nowrap; text-align: right">45.77 ms</td>
<td style="white-space: nowrap; text-align: right">58.62 ms</td>
<td style="white-space: nowrap; text-align: right">21.67</td>
<td style="white-space: nowrap; text-align: right">46.15 ms</td>
<td style="white-space: nowrap; text-align: right">&plusmn;20.12%</td>
<td style="white-space: nowrap; text-align: right">46.06 ms</td>
<td style="white-space: nowrap; text-align: right">61.63 ms</td>
</tr>

</table>
Expand All @@ -173,14 +173,14 @@ Run Time Comparison
<th style="text-align: right">Slower</th>
<tr>
<td style="white-space: nowrap">Jason</td>
<td style="white-space: nowrap;text-align: right">127.80</td>
<td style="white-space: nowrap;text-align: right">122.70</td>
<td>&nbsp;</td>
</tr>

<tr>
<td style="white-space: nowrap">Ymlr</td>
<td style="white-space: nowrap; text-align: right">22.00</td>
<td style="white-space: nowrap; text-align: right">5.81x</td>
<td style="white-space: nowrap; text-align: right">21.67</td>
<td style="white-space: nowrap; text-align: right">5.66x</td>
</tr>

</table>
Expand All @@ -202,7 +202,7 @@ Memory Usage
</tr>
<tr>
<td style="white-space: nowrap">Ymlr</td>
<td style="white-space: nowrap">50.11 MB</td>
<td style="white-space: nowrap">50.09 MB</td>
<td>5.43x</td>
</tr>
</table>
Expand All @@ -225,20 +225,20 @@ Run Time

<tr>
<td style="white-space: nowrap">Jason</td>
<td style="white-space: nowrap; text-align: right">351.13</td>
<td style="white-space: nowrap; text-align: right">2.85 ms</td>
<td style="white-space: nowrap; text-align: right">&plusmn;246.31%</td>
<td style="white-space: nowrap; text-align: right">2.60 ms</td>
<td style="white-space: nowrap; text-align: right">4.17 ms</td>
<td style="white-space: nowrap; text-align: right">376.78</td>
<td style="white-space: nowrap; text-align: right">2.65 ms</td>
<td style="white-space: nowrap; text-align: right">&plusmn;3.50%</td>
<td style="white-space: nowrap; text-align: right">2.64 ms</td>
<td style="white-space: nowrap; text-align: right">2.87 ms</td>
</tr>

<tr>
<td style="white-space: nowrap">Ymlr</td>
<td style="white-space: nowrap; text-align: right">2.35</td>
<td style="white-space: nowrap; text-align: right">426.22 ms</td>
<td style="white-space: nowrap; text-align: right">&plusmn;0.78%</td>
<td style="white-space: nowrap; text-align: right">425.46 ms</td>
<td style="white-space: nowrap; text-align: right">435.53 ms</td>
<td style="white-space: nowrap; text-align: right">5.40</td>
<td style="white-space: nowrap; text-align: right">185.32 ms</td>
<td style="white-space: nowrap; text-align: right">&plusmn;0.98%</td>
<td style="white-space: nowrap; text-align: right">184.82 ms</td>
<td style="white-space: nowrap; text-align: right">190.55 ms</td>
</tr>

</table>
Expand All @@ -253,14 +253,14 @@ Run Time Comparison
<th style="text-align: right">Slower</th>
<tr>
<td style="white-space: nowrap">Jason</td>
<td style="white-space: nowrap;text-align: right">351.13</td>
<td style="white-space: nowrap;text-align: right">376.78</td>
<td>&nbsp;</td>
</tr>

<tr>
<td style="white-space: nowrap">Ymlr</td>
<td style="white-space: nowrap; text-align: right">2.35</td>
<td style="white-space: nowrap; text-align: right">149.66x</td>
<td style="white-space: nowrap; text-align: right">5.40</td>
<td style="white-space: nowrap; text-align: right">69.83x</td>
</tr>

</table>
Expand All @@ -282,7 +282,7 @@ Memory Usage
</tr>
<tr>
<td style="white-space: nowrap">Ymlr</td>
<td style="white-space: nowrap">78.99 MB</td>
<td>31.06x</td>
<td style="white-space: nowrap">52.62 MB</td>
<td>20.69x</td>
</tr>
</table>
33 changes: 25 additions & 8 deletions lib/ymlr/encode.ex
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,28 @@ defmodule Ymlr.Encode do
":"
]

@escape_chars ~c"\b\f\r\v\0\"\\"
@escape_char_mapping Enum.zip(@escape_chars, ~c"bfrv0\"\\")
@unicode_chars Enum.to_list(0x00..0x1F) ++ Enum.to_list(0x7F..0xFF)
@unicode_char_mapping Enum.reject(@unicode_chars, &Kernel.in(&1, ~c"\n\t" ++ @escape_chars))
@require_double_quotes Enum.map(~c"\b\f\r\v\0" ++ @unicode_char_mapping, &<<&1>>)
@printable_chars List.flatten([
# Tab (\t)
0x09,
# Line feed (LF \n)
0x0A,
# Carriage Return (CR \r)
# 0x0D, theoretically printable, seems to require double quotes.
# Next Line (NEL)
0x85,
# Printable ASCII
Enum.to_list(0x20..0x7E),
# Basic Multilingual Plane (BMP)
Enum.to_list(0xA0..0xFF)
])

@non_printable_chars Enum.to_list(0x00..0xA0) -- @printable_chars
@non_printable_chars_strings Enum.map(@non_printable_chars, &<<&1>>)

# see https://yaml.org/spec/1.2.2/#57-escaped-characters
@escape_chars ~c"\a\b\e\f\r\v\0\u00a0\u0085\u2028\u2029\"\\"
@escape_char_mapping Enum.zip(@escape_chars, ~c"abefrv0_NLP\"\\")
@non_printable_special_chars @non_printable_chars -- @escape_chars

@doc ~S"""
Encodes the given data as YAML string. Raises if it cannot be encoded.
Expand Down Expand Up @@ -162,7 +179,7 @@ defmodule Ymlr.Encode do
data == "True" -> ~S('True')
data == "False" -> ~S('False')
String.contains?(data, "\n") -> multiline(data, indent_level)
String.contains?(data, @require_double_quotes) -> with_double_quotes(data)
String.contains?(data, @non_printable_chars_strings) -> with_double_quotes(data)
String.at(data, 0) in @quote_when_first -> with_quotes(data)
String.at(data, -1) in @quote_when_last -> with_quotes(data)
String.starts_with?(data, "- ") -> with_quotes(data)
Expand Down Expand Up @@ -216,8 +233,8 @@ defmodule Ymlr.Encode do
defp escape_char(unquote(char)), do: <<?\\, unquote(escaped)>>
end

for uchar <- @unicode_char_mapping do
unicode_sequence = List.to_string(:io_lib.format("\\u~4.16.0B", [uchar]))
for uchar <- @non_printable_special_chars do
unicode_sequence = List.to_string(:io_lib.format("\\x~2.16.0B", [uchar]))
defp escape_char(unquote(uchar)), do: unquote(unicode_sequence)
end

Expand Down
17 changes: 12 additions & 5 deletions test/ymlr/encode_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -115,16 +115,23 @@ defmodule Ymlr.EncodeTest do

# https://yaml.org/spec/1.2.2/#example-escaped-characters
test "quoted strings - example-escaped-characters from 1.2.2 spec" do
assert_identity_and_output("Fun with \\", "Fun with \\")
assert_identity_and_output("\r \t \u000b \u0000", "\"\\r \t \\v \\0\"")
assert_identity_and_output(~S(Fun with \\), ~S(Fun with \\))
assert_identity_and_output("\" \u0007 \b \u001b \f", ~S("\" \a \b \e \f"))
# Line breaks inside scalar content must be normalized by the YAML processor.
# Each such line break must be parsed into a single line feed character.
# The original line break format is a presentation detail and must not be
# used to convey content information.
# I.e. the following cannot be tested for identity as \r will be parsed as \n.
assert_output("\n\r \t \u000b \u0000", "|-\n\n \r \t \v \0")
assert_identity_and_output("\r \t \u000b \u0000", ~s("\\r \t \\v \\0"))

assert_identity_and_output(
"\u0020 \u00a0 \u0085 \u2028 \u2029",
"\" \\u00A0 \\u0085 \u2028 \u2029\""
~S(" \_ \N \L \P")
)

assert_identity_and_output("\" \u0007 \b \u001b \f", "\"\\\" \\u0007 \\b \\u001B \\f\"")
assert_identity_and_output("\r \t \u000b \u0000", "\"\\r \t \\v \\0\"")
# Possible formats: \x13 \u0013 \U00000013. We use \x13
assert_identity_and_output("\u0013", "\"\\x13\"")
end

test "quoted strings - in map key (requires escape char)" do
Expand Down

0 comments on commit 536da82

Please sign in to comment.