diff --git a/doc/site/values.markdown b/doc/site/values.markdown index f56232e87..389babe74 100644 --- a/doc/site/values.markdown +++ b/doc/site/values.markdown @@ -50,6 +50,16 @@ String literals are surrounded in double quotes: "hi there" +They can also span multiple lines: + +
+"hi
+there,
+again"
+
+ +### Escaping + A handful of escape characters are supported:
@@ -109,6 +119,59 @@ System.print("wow %((1..3).map {|n| n * n}.join())") //> wow 149
 An interpolated expression can even contain a string literal which in turn has
 its own nested interpolations, but doing that gets unreadable pretty quickly.
 
+### Raw strings
+
+A string literal can also be created using triple quotes `"""` which is
+parsed as a raw string. A raw string is no different
+from any other string, it's just parsed in a different way.
+
+**Raw strings do not process escapes and do not apply any interpolation**.
+
+
+"""hi there"""
+
+ +When a raw string spans multiple lines, the newline immediately +after the triple quote will be ignored, and any whitespace after +the last newline (before the closing triple quote) will be ignored too. + +
+  """
+    Hello world
+  """
+
+ +The value in the string above is ` Hello world`, it contains no newlines. +The newline after `"""` and the whitespace on the closing line are ignored. +Note that the whitespace on the line is preserved. + +A raw string will be parsed exactly as is in the file, unmodified. +This means it can contain quotes, invalid syntax, other data formats +and so on without being modified by Wren. + +
+"""
+  {
+    "hello": "wren",
+    "from" : "json"
+  }
+"""
+
+ +One more example, embedding wren code inside a string safely. + +
+"""
+A markdown string with embedded wren code example.
+
+    class Example {
+      construct code() {
+        //
+      }
+    }
+"""
+
+ ## Ranges A range is a little object that represents a consecutive range of numbers. They diff --git a/src/vm/wren_compiler.c b/src/vm/wren_compiler.c index 95c470dde..d3b567a67 100644 --- a/src/vm/wren_compiler.c +++ b/src/vm/wren_compiler.c @@ -849,6 +849,65 @@ static void readUnicodeEscape(Parser* parser, ByteBuffer* string, int length) } } +static void readRawString(Parser* parser) +{ + ByteBuffer string; + wrenByteBufferInit(&string); + TokenType type = TOKEN_STRING; + + //consume the second and third " + nextChar(parser); + nextChar(parser); + + //if there's a newline immediately after, + //discard it so it's not part of the literal + if(peekChar(parser) == '\n') nextChar(parser); + + int lastNewline = -1; + int whitespace = -1; + + for (;;) + { + char c = nextChar(parser); + char c1 = peekChar(parser); + char c2 = peekNextChar(parser); + + if(c == '\n') { + lastNewline = string.count; + whitespace = lastNewline; + } + + if(c == '"' && c1 == '"' && c2 == '"') break; + + if(c != '\n' && c != ' ' && c != '\t') whitespace = -1; + + if (c == '\0' || c1 == '\0' || c2 == '\0') + { + lexError(parser, "Unterminated raw string."); + + // Don't consume it if it isn't expected. Keeps us from reading past the + // end of an unterminated string. + parser->currentChar--; + break; + } + + wrenByteBufferWrite(parser->vm, &string, c); + } + + //consume the second and third " + nextChar(parser); + nextChar(parser); + + int count = string.count; + if(lastNewline != -1 && whitespace == lastNewline) count = lastNewline; + + parser->next.value = wrenNewStringLength(parser->vm, + (char*)string.data, count); + + wrenByteBufferClear(parser->vm, &string); + makeToken(parser, type); +} + // Finishes lexing a string literal. static void readString(Parser* parser) { @@ -1051,7 +1110,13 @@ static void nextToken(Parser* parser) } break; - case '"': readString(parser); return; + case '"': { + if(peekChar(parser) == '"' && peekNextChar(parser) == '"') { + readRawString(parser); + return; + } + readString(parser); return; + } case '_': readName(parser, peekChar(parser) == '_' ? TOKEN_STATIC_FIELD : TOKEN_FIELD); diff --git a/test/language/string/literals.wren b/test/language/string/literals.wren index a51124f9d..8cf8c8544 100644 --- a/test/language/string/literals.wren +++ b/test/language/string/literals.wren @@ -3,3 +3,28 @@ System.print("a string") // expect: a string // Non-ASCII. System.print("A~¶Þॐஃ") // expect: A~¶Þॐஃ + +// Raw strings. +System.print("""A raw string""") // expect: A raw string + +var long = " + A + multi line + regular string +" +System.print(long) // expect: + // expect: A + // expect: multi line + // expect: regular string + // expect: + +var raw = """ + A if*()* + multi line /{}() + raw string [\]/ + "json": "value" +""" +System.print(raw) // expect: A if*()* + // expect: multi line /{}() + // expect: raw string [\]/ + // expect: "json": "value" \ No newline at end of file diff --git a/test/language/string/unterminated_raw.wren b/test/language/string/unterminated_raw.wren new file mode 100644 index 000000000..be05432dc --- /dev/null +++ b/test/language/string/unterminated_raw.wren @@ -0,0 +1,2 @@ +// expect error line 2 +"""this string has no close quote \ No newline at end of file