Skip to content

Commit bdafaaa

Browse files
ShubhamChaturvedi7Shubham Chaturvedi
and
Shubham Chaturvedi
authored
fix(go): utf8-utf16 encoding (#792)
*Issue #, if available:* *Description of changes:* aws/private-aws-encryption-sdk-javascript-staging#764 By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. --------- Co-authored-by: Shubham Chaturvedi <scchatur@amazon.com>
1 parent d59ef8b commit bdafaaa

File tree

11 files changed

+206
-117
lines changed

11 files changed

+206
-117
lines changed

Diff for: .gitmodules

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44
[submodule "TestModels/dafny-dependencies/dafny"]
55
path = TestModels/dafny-dependencies/dafny
66
url = https://github.com/dafny-lang/dafny.git
7-
branch = actions-and-streaming-stdlibs
7+
branch = actions-and-streaming-stdlibs

Diff for: TestModels/SimpleTypes/SimpleString/runtimes/go/TestsFromDafny-go/WrappedSimpleTypesStringService/shim.go

-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: TestModels/SimpleTypes/SimpleString/runtimes/go/TestsFromDafny-go/go.mod

+1
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ require (
1010
)
1111

1212
replace github.com/smithy-lang/smithy-dafny/TestModels/SimpleTypes/SimpleString v0.0.0 => ../ImplementationFromDafny-go
13+
1314
replace github.com/aws/aws-cryptographic-material-providers-library/releases/go/smithy-dafny-standard-library => ../../../../../dafny-dependencies/StandardLibrary/runtimes/go/ImplementationFromDafny-go/

Diff for: TestModels/SimpleTypes/SimpleString/test/SimpleStringImplTest.dfy

+54-1
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,20 @@ module SimpleStringImplTest {
99
import UTF8
1010
method{:test} GetString(){
1111
var client :- expect SimpleString.SimpleString();
12-
TestGetString(client);
12+
TestAllCases(client);
13+
}
14+
15+
method TestAllCases(client: ISimpleTypesStringClient)
16+
requires client.ValidState()
17+
modifies client.Modifies
18+
ensures client.ValidState()
19+
{
20+
TestGetString(client);
1321
TestGetStringKnownValue(client);
1422
TestGetStringNonAscii(client);
23+
TestGetStringSurrogatePair(client);
24+
TestGetStringMultipleSurrogatePair(client);
25+
TestGetStringMultipleSurrogatePairAsString(client);
1526
TestGetStringUTF8(client);
1627
TestGetStringUTF8KnownValue(client);
1728
}
@@ -45,6 +56,48 @@ module SimpleStringImplTest {
4556
expect ret.value.UnwrapOr("") == utf16EncodedString;
4657
print ret;
4758
}
59+
method TestGetStringSurrogatePair(client: ISimpleTypesStringClient)
60+
requires client.ValidState()
61+
modifies client.Modifies
62+
ensures client.ValidState()
63+
{
64+
// Formula to combine high and low surrogate to get the unicode codepoint:
65+
// codepoint = (highSurrogate - 0xD800) * 0x400 + (lowSurrogate - 0xDC00) + 0x10000
66+
// The unicode codepoint of 𐀂 is 65538.
67+
// 𐀂 has high surrogate 55296 (0xD800) and low surrogate 56322 (0xDC02)
68+
// So,
69+
// 1. (55296 - 0xD800) * 0x400 = 0 * 1024 = 0
70+
// 2. (56322 - 0xDC00) = 2
71+
// 3. 0 + 2 + 0x10000 = 65538
72+
var surrogatePair: seq<char> := [0xD800 as char, 0xDC02 as char];
73+
var ret :- expect client.GetString(SimpleString.Types.GetStringInput(value:= Some(surrogatePair)));
74+
expect ret.value.UnwrapOr("") == surrogatePair;
75+
print ret;
76+
}
77+
method TestGetStringMultipleSurrogatePair(client: ISimpleTypesStringClient)
78+
requires client.ValidState()
79+
modifies client.Modifies
80+
ensures client.ValidState()
81+
{
82+
// 𐀂 := [0xD800 as char, 0xDC02 as char], 𐐷 := [0xD801 as char, 0xDC37 as char]
83+
// => 𐀂𐐷 := [0xD800 as char, 0xDC02 as char, 0xD801 as char, 0xDC37 as char]
84+
var surrogatePair: seq<char> := [0xD800 as char, 0xDC02 as char, 0xD801 as char, 0xDC37 as char];
85+
var ret :- expect client.GetString(SimpleString.Types.GetStringInput(value:= Some(surrogatePair)));
86+
expect ret.value.UnwrapOr("") == surrogatePair;
87+
print ret;
88+
}
89+
method TestGetStringMultipleSurrogatePairAsString(client: ISimpleTypesStringClient)
90+
requires client.ValidState()
91+
modifies client.Modifies
92+
ensures client.ValidState()
93+
{
94+
// 𐀂 := [0xD800 as char, 0xDC02 as char], 𐐷 := [0xD801 as char, 0xDC37 as char]
95+
// => 𐀂𐐷 := [0xD800 as char, 0xDC02 as char, 0xD801 as char, 0xDC37 as char]
96+
var surrogatePair := "\uD800\uDC02\uD801\uDC37";
97+
var ret :- expect client.GetString(SimpleString.Types.GetStringInput(value:= Some(surrogatePair)));
98+
expect ret.value.UnwrapOr("") == surrogatePair;
99+
print ret;
100+
}
48101
method TestGetStringUTF8(client: ISimpleTypesStringClient)
49102
requires client.ValidState()
50103
modifies client.Modifies

Diff for: TestModels/SimpleTypes/SimpleString/test/WrappedSimpleStringTest.dfy

+1-3
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ module WrappedSimpleTypesStringTest {
99
import opened Wrappers
1010
method{:test} GetString() {
1111
var client :- expect WrappedSimpleTypesStringService.WrappedSimpleString();
12-
SimpleStringImplTest.TestGetString(client);
13-
SimpleStringImplTest.TestGetStringKnownValue(client);
14-
SimpleStringImplTest. TestGetStringUTF8(client);
12+
SimpleStringImplTest.TestAllCases(client);
1513
}
1614
}

Diff for: TestModels/dafny-dependencies/StandardLibrary/runtimes/go/ImplementationFromDafny-go/UTF8/externs.go

+86-28
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,24 @@ package UTF8
22

33
import (
44
"fmt"
5-
"unicode"
5+
"math"
66
"unicode/utf16"
77
"unicode/utf8"
88

99
"github.com/aws/aws-cryptographic-material-providers-library/releases/go/smithy-dafny-standard-library/Wrappers"
1010
"github.com/dafny-lang/DafnyRuntimeGo/v4/dafny"
1111
)
1212

13+
// The following constants are copied from the Go utf16 lib and are used
14+
// to check the validity of the utf16 surrogate pairs.
15+
const (
16+
// 0xd800-0xdc00 encodes the high 10 bits of a pair.
17+
// 0xdc00-0xe000 encodes the low 10 bits of a pair.
18+
surr1 = 0xd800
19+
surr2 = 0xdc00
20+
surr3 = 0xe000
21+
)
22+
1323
//IMP: The below extern implementations are only compatible
1424
//with unicode-char:false transpiled code.
1525

@@ -19,46 +29,94 @@ import (
1929
// we need to encode the result in compatible dafny utf16 string before returning
2030
// the result.
2131
func Decode(utf8EncodedDafnySeq dafny.Sequence) Wrappers.Result {
22-
utf8EncodedByteArray := dafny.ToByteArray(utf8EncodedDafnySeq)
23-
24-
utf16Encoded := utf16.Encode([]rune(string(utf8EncodedByteArray)))
25-
var dafnyCharArray []dafny.Char
26-
for _, c := range utf16Encoded {
27-
if c == unicode.ReplacementChar {
28-
err := fmt.Errorf("Encountered Not Allowed Replacement Character: %s ", unicode.ReplacementChar)
29-
return Wrappers.Companion_Result_.Create_Failure_(dafny.SeqOfString(err.Error()))
30-
}
31-
dafnyCharArray = append(dafnyCharArray, dafny.Char(c))
32+
res, err := DecodeFromNativeGoByteArray(dafny.ToByteArray(utf8EncodedDafnySeq))
33+
if err != nil {
34+
return Wrappers.Companion_Result_.Create_Failure_(dafny.SeqOfString(err.Error()))
3235
}
33-
return Wrappers.Companion_Result_.Default(dafny.SeqOfChars(dafnyCharArray...))
36+
37+
return Wrappers.Companion_Result_.Create_Success_(res)
3438
}
3539

3640
// Encode encodes utf16 encoded dafny char (rune) to utf-8 Go rune sequence.
3741
// Anything we receive here is supposed to be utf16 encoded Go rune
3842
// since this extern is for unicode-char:false.
3943
func Encode(utf16EncodedDafnySeq dafny.Sequence) Wrappers.Result {
40-
encodedUtf16 := utf16EncodedDafnySeqToUint16(utf16EncodedDafnySeq)
41-
decodedUtf16 := utf16.Decode(encodedUtf16)
42-
var utf8EncodedBytes []byte
43-
for _, r := range decodedUtf16 {
44-
if !utf8.ValidRune(r) || r == unicode.ReplacementChar {
45-
return Wrappers.Companion_Result_.Create_Failure_(dafny.SeqOfString("Failed to utf8 encode rune"))
46-
}
47-
buf := make([]byte, utf8.RuneLen(r))
48-
n := utf8.EncodeRune(buf, r)
49-
utf8EncodedBytes = append(utf8EncodedBytes, buf[:n]...)
44+
utf8EncodedBytes, err := decodeUtf16(utf16EncodedDafnySeq)
45+
if err != nil {
46+
return Wrappers.Companion_Result_.Create_Failure_(dafny.SeqOfString(err.Error()))
5047
}
5148
return Wrappers.Companion_Result_.Create_Success_(dafny.SeqOfBytes(utf8EncodedBytes))
5249
}
5350

54-
func utf16EncodedDafnySeqToUint16(seq dafny.Sequence) []uint16 {
55-
var r []uint16
51+
// This method is to be called from the Type Conversion layer.
52+
// We reuse the same method so that all conversions are consistent.
53+
func DecodeFromNativeGoByteArray(utf8EncodedByteArray []byte) (dafny.Sequence, error) {
54+
if !utf8.Valid(utf8EncodedByteArray) {
55+
return nil, fmt.Errorf("invalid utf8 encoded sequence: %v", utf8EncodedByteArray)
56+
}
57+
utf16Encoded := utf16.Encode([]rune(string(utf8EncodedByteArray)))
58+
var dafnyCharArray []dafny.Char
59+
for _, c := range utf16Encoded {
60+
dafnyCharArray = append(dafnyCharArray, dafny.Char(c))
61+
}
62+
return dafny.SeqOfChars(dafnyCharArray...), nil
63+
}
64+
65+
// decode appends to buf the Unicode code point sequence represented
66+
// by the UTF-16 encoding seq, then encode the code point as utf8 and return the utf8 buffer
67+
func decodeUtf16(seq dafny.Sequence) ([]byte, error) {
68+
utf8EncodedBytes := []byte{}
69+
5670
for i := dafny.Iterate(seq); ; {
57-
val, ok := i()
58-
if !ok {
59-
return r
71+
firstVal, firstValExists := i()
72+
if !firstValExists {
73+
// Iterator has finished, return the buffer
74+
return utf8EncodedBytes, nil
6075
} else {
61-
r = append(r, uint16(val.(dafny.Char)))
76+
var ar rune
77+
78+
// We should be able to rely on dafny that anything inside the seq is utf16 encoded
79+
// with unicode-char: false. But given the Long Psi issue, it's better to be safe.
80+
// First check if it's a dafny.Char type, then check if it's within the limits of uint16.
81+
firstChar, firstValIsAChar := firstVal.(dafny.Char)
82+
if !firstValIsAChar || firstChar > math.MaxUint16 || firstChar < 0 {
83+
return nil, fmt.Errorf("invalid utf16 encoded sequence: %v", seq)
84+
}
85+
86+
// Downcast to uint16
87+
switch r1 := uint16(firstChar); {
88+
89+
case r1 < surr1, surr3 <= r1:
90+
// normal rune
91+
ar = rune(r1)
92+
93+
case utf16.IsSurrogate(rune(r1)):
94+
// If firstVal is surrogate, then we need the secondVal to construct the pair
95+
secondVal, ok := i()
96+
97+
// Same sanity check as line 84
98+
secondChar, secondValIsAChar := secondVal.(dafny.Char)
99+
if !ok || !secondValIsAChar || secondChar > math.MaxUint16 || secondChar < 0 {
100+
return nil, fmt.Errorf("invalid utf16 encoded sequence: %v", seq)
101+
}
102+
103+
// Check if the secondVal is within the valid low surrogate range
104+
if surr2 <= uint16(secondChar) && uint16(secondChar) < surr3 {
105+
// valid surrogate sequence
106+
ar = utf16.DecodeRune(rune(r1), rune(uint16(secondChar)))
107+
} else {
108+
return nil, fmt.Errorf("invalid utf16 encoded sequence: %v", seq)
109+
}
110+
default:
111+
return nil, fmt.Errorf("invalid utf16 encoded sequence: %v", seq)
112+
}
113+
114+
// Create the buffer (upto 4 bytes) to hold the utf8 rune
115+
buf := make([]byte, utf8.RuneLen(ar))
116+
n := utf8.EncodeRune(buf, ar)
117+
118+
// Append to the result
119+
utf8EncodedBytes = append(utf8EncodedBytes, buf[:n]...)
62120
}
63121
}
64122
}

Diff for: TestModels/dafny-dependencies/StandardLibrary/runtimes/go/TestsFromDafny-go/go.mod

-1
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,3 @@ require github.com/aws/aws-cryptographic-material-providers-library/releases/go/
77
replace github.com/aws/aws-cryptographic-material-providers-library/releases/go/smithy-dafny-standard-library v0.0.0 => ../ImplementationFromDafny-go
88

99
require github.com/dafny-lang/DafnyRuntimeGo/v4 v4.9.1
10-

Diff for: codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithygo/awssdk/shapevisitor/AwsSdkToDafnyShapeVisitor.java

+12-20
Original file line numberDiff line numberDiff line change
@@ -434,28 +434,20 @@ var enum interface{}
434434
}
435435

436436
if (shape.hasTrait(DafnyUtf8BytesTrait.class)) {
437-
writer.addUseImports(SmithyGoDependency.stdlib("unicode/utf8"));
437+
throw new UnsupportedOperationException(
438+
"Dafny utf8bytes trait is not supported in aws sdk models: " +
439+
shape.toShapeId().getName()
440+
);
438441
}
439-
final var underlyingType = shape.hasTrait(DafnyUtf8BytesTrait.class)
440-
? """
441-
dafny.SeqOf(func () []interface{} {
442-
utf8.ValidString(%s%s)
443-
b := []byte(%s%s)
444-
f := make([]interface{}, len(b))
445-
for i, v := range b {
446-
f[i] = v
442+
final var underlyingType =
443+
"""
444+
func () dafny.Sequence {
445+
res, err := UTF8.DecodeFromNativeGoByteArray([]byte(%s%s))
446+
if err != nil {
447+
panic("invalid utf8 input provided")
447448
}
448-
return f
449-
}()...)""".formatted(
450-
dereferenceIfRequired,
451-
dataSource,
452-
dereferenceIfRequired,
453-
dataSource
454-
)
455-
: "dafny.SeqOfChars([]dafny.Char(%s%s)...)".formatted(
456-
dereferenceIfRequired,
457-
dataSource
458-
);
449+
return res
450+
}()""".formatted(dereferenceIfRequired, dataSource);
459451

460452
return """
461453
func () %s {

Diff for: codegen/smithy-dafny-codegen/src/main/java/software/amazon/polymorph/smithygo/awssdk/shapevisitor/DafnyToAwsSdkShapeVisitor.java

+14-18
Original file line numberDiff line numberDiff line change
@@ -466,10 +466,12 @@ public String stringShape(final StringShape shape) {
466466
);
467467
}
468468

469-
//Handle the @utf8Bytes Trait
470-
final var underlyingType = shape.hasTrait(DafnyUtf8BytesTrait.class)
471-
? "uint8"
472-
: "dafny.Char";
469+
if (shape.hasTrait(DafnyUtf8BytesTrait.class)) {
470+
throw new UnsupportedOperationException(
471+
"Dafny utf8bytes trait is not supported in aws sdk models: " +
472+
shape.toShapeId().getName()
473+
);
474+
}
473475

474476
var nilCheck = "";
475477
final String unAssertedDataSource = dataSource.startsWith("input.(")
@@ -481,28 +483,22 @@ public String stringShape(final StringShape shape) {
481483
nilCheck =
482484
"if %s == nil { return nil }".formatted(unAssertedDataSource);
483485
} else {
484-
nilCheck = "if %s == nil { return s }".formatted(unAssertedDataSource);
486+
nilCheck =
487+
"if %s == nil { return \"\" }".formatted(unAssertedDataSource);
485488
}
486489
}
487490

488491
return """
489-
func() (%sstring) {
490-
var s string
491-
%s
492-
for i := dafny.Iterate(%s) ; ; {
493-
val, ok := i()
494-
if !ok {
495-
return %s[]string{s}[0]
496-
} else {
497-
s = s + string(val.(%s))
498-
}
499-
}
492+
func() (%sstring) {
493+
%s
494+
a := UTF8.Encode(%s.(dafny.Sequence)).Dtor_value()
495+
s := string(dafny.ToByteArray(a.(dafny.Sequence)))
496+
return %ss
500497
}()""".formatted(
501498
this.isPointable ? "*" : "",
502499
nilCheck,
503500
dataSource,
504-
this.isPointable ? "&" : "",
505-
underlyingType
501+
this.isPointable ? "&" : ""
506502
);
507503
}
508504

0 commit comments

Comments
 (0)