Add raw string literals

Also document + test multi line literals.
wren-lang · Apr 4, 2021 · 981ea4a · 981ea4a
1 parent 345f919
commit 981ea4a
Show file tree

Hide file tree

Showing 4 changed files with 156 additions and 1 deletion.
diff --git a/doc/site/values.markdown b/doc/site/values.markdown
@@ -50,6 +50,16 @@ String literals are surrounded in double quotes:
 "hi there"
 </pre>
 
+They can also span multiple lines:
+
+<pre class="snippet">
+"hi
+there,
+again"
+</pre>
+
+### Escaping
+
 A handful of escape characters are supported:
 
 <pre class="snippet">
@@ -109,6 +119,59 @@ System.print("wow %((1..3).map {|n| n * n}.join())") //> wow 149
 An interpolated expression can even contain a string literal which in turn has
 its own nested interpolations, but doing that gets unreadable pretty quickly.
 
+### Raw strings
+
+A string literal can also be created using triple quotes `"""` which is
+parsed as a raw string. A raw string is no different
+from any other string, it's just parsed in a different way.
+
+**Raw strings do not process escapes and do not apply any interpolation**.
+
+<pre class="snippet">
+"""hi there"""
+</pre>
+
+When a raw string spans multiple lines, the newline immediately 
+after the triple quote will be ignored, and any whitespace after 
+the last newline (before the closing triple quote) will be ignored too.
+
+<pre class="snippet">
+  """
+    Hello world
+  """
+</pre>
+
+The value in the string above is `    Hello world`, it contains no newlines.
+The newline after `"""` and the whitespace on the closing line are ignored.
+Note that the whitespace on the line is preserved.
+
+A raw string will be parsed exactly as is in the file, unmodified.
+This means it can contain quotes, invalid syntax, other data formats 
+and so on without being modified by Wren.
+
+<pre class="snippet">
+"""
+  {
+    "hello": "wren",
+    "from" : "json"
+  }
+"""
+</pre>
+
+One more example, embedding wren code inside a string safely.
+
+<pre class="snippet">
+"""
+A markdown string with embedded wren code example.
+
+    class Example {
+      construct code() {
+        //
+      }
+    }
+"""
+</pre>
+
 ## Ranges
 
 A range is a little object that represents a consecutive range of numbers. They

diff --git a/src/vm/wren_compiler.c b/src/vm/wren_compiler.c
@@ -849,6 +849,65 @@ static void readUnicodeEscape(Parser* parser, ByteBuffer* string, int length)
   }
 }
 
+static void readRawString(Parser* parser)
+{
+  ByteBuffer string;
+  wrenByteBufferInit(&string);
+  TokenType type = TOKEN_STRING;
+
+  //consume the second and third "
+  nextChar(parser);
+  nextChar(parser);
+
+  //if there's a newline immediately after, 
+  //discard it so it's not part of the literal
+  if(peekChar(parser) == '\n') nextChar(parser);
+
+  int lastNewline = -1;
+  int whitespace = -1;
+
+  for (;;)
+  {
+    char c = nextChar(parser);
+    char c1 = peekChar(parser);
+    char c2 = peekNextChar(parser);
+
+    if(c == '\n') {
+      lastNewline = string.count;
+      whitespace = lastNewline;
+    }
+
+    if(c == '"' && c1 == '"' && c2 == '"') break;
+
+    if(c != '\n' && c != ' ' && c != '\t') whitespace = -1;
+
+    if (c == '\0' || c1 == '\0' || c2 == '\0')
+    {
+      lexError(parser, "Unterminated raw string.");
+
+      // Don't consume it if it isn't expected. Keeps us from reading past the
+      // end of an unterminated string.
+      parser->currentChar--;
+      break;
+    }
+
+    wrenByteBufferWrite(parser->vm, &string, c);
+  }
+
+  //consume the second and third "
+  nextChar(parser);
+  nextChar(parser);
+
+  int count = string.count;
+  if(lastNewline != -1 && whitespace == lastNewline) count = lastNewline;
+
+  parser->next.value = wrenNewStringLength(parser->vm,
+                                              (char*)string.data, count);
+
+  wrenByteBufferClear(parser->vm, &string);
+  makeToken(parser, type);
+}
+
 // Finishes lexing a string literal.
 static void readString(Parser* parser)
 {
@@ -1051,7 +1110,13 @@ static void nextToken(Parser* parser)
         }
         break;
 
-      case '"': readString(parser); return;
+      case '"': {
+        if(peekChar(parser) == '"' && peekNextChar(parser)  == '"') {
+          readRawString(parser);
+          return;
+        }
+        readString(parser); return;
+      }
       case '_':
         readName(parser,
                  peekChar(parser) == '_' ? TOKEN_STATIC_FIELD : TOKEN_FIELD);

diff --git a/test/language/string/literals.wren b/test/language/string/literals.wren
@@ -3,3 +3,28 @@ System.print("a string") // expect: a string
 
 // Non-ASCII.
 System.print("A~¶Þॐஃ") // expect: A~¶Þॐஃ
+
+// Raw strings.
+System.print("""A raw string""") // expect: A raw string
+
+var long = "
+  A
+  multi line
+  regular string
+"
+System.print(long) // expect: 
+                   // expect:   A
+                   // expect:   multi line
+                   // expect:   regular string
+                   // expect: 
+
+var raw = """
+  A if*(<invalid>)*
+  multi line /{}()
+  raw string [\]/
+  "json": "value"
+"""
+System.print(raw) // expect:   A if*(<invalid>)*
+                  // expect:   multi line /{}()
+                  // expect:   raw string [\]/
+                  // expect:   "json": "value"
diff --git a/test/language/string/unterminated_raw.wren b/test/language/string/unterminated_raw.wren
@@ -0,0 +1,2 @@
+// expect error line 2
+"""this string has no close quote