Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

String parsing bug fix.

  • Loading branch information...
commit 96bc340d54c09a770581ccc7116ae0489dc0ae56 1 parent c351ba2
@jmis jmis authored
Showing with 872 additions and 863 deletions.
  1. +379 −379 ClojureExtension.Parsing/Lexer.cs
  2. +493 −484 ClojureExtension.Tests/Parsing/LexerTests.cs
View
758 ClojureExtension.Parsing/Lexer.cs
@@ -1,380 +1,380 @@
-using System;
-using System.Collections.Generic;
-using System.Text;
-
-namespace ClojureExtension.Parsing
-{
- public class Lexer
- {
- private static readonly List<string> BuiltInFunctions = new List<string>()
- {
- "def", "if", "do", "let", "quote", "var", "fn", "loop",
- "recur", "throw", "try", "monitor-enter", "monitor-exit",
- "new", "set!", "."
- };
-
- private static readonly List<char> ValidNonLetterSymbolPrefixes = new List<char>()
- {
- '*', '+', '!', '-', '_', '?', '>', '<', '=', '$', '&'
- };
-
- private readonly PushBackCharacterStream _source;
-
- public Lexer(PushBackCharacterStream inputText)
- {
- _source = inputText;
- }
-
- public Token Next()
- {
- if (!_source.HasMore) return null;
-
- char currentChar = _source.Next();
- Token nextToken = null;
-
- if (currentChar == '(')
- {
- nextToken = new Token(TokenType.ListStart, currentChar.ToString(), _source.CurrentIndex - 1, 1);
- }
- else if (currentChar == ')')
- {
- nextToken = new Token(TokenType.ListEnd, currentChar.ToString(), _source.CurrentIndex - 1, 1);
- }
- else if (currentChar == '[')
- {
- nextToken = new Token(TokenType.VectorStart, currentChar.ToString(), _source.CurrentIndex - 1, 1);
- }
- else if (currentChar == ']')
- {
- nextToken = new Token(TokenType.VectorEnd, currentChar.ToString(), _source.CurrentIndex - 1, 1);
- }
- else if (currentChar == '{')
- {
- nextToken = new Token(TokenType.MapStart, currentChar.ToString(), _source.CurrentIndex - 1, 1);
- }
- else if (currentChar == '}')
- {
- nextToken = new Token(TokenType.MapEnd, currentChar.ToString(), _source.CurrentIndex - 1, 1);
- }
- else if (currentChar == ':')
- {
- _source.Push(currentChar);
- string keyword = ReadKeyword();
- nextToken = new Token(TokenType.Keyword, keyword, _source.CurrentIndex - keyword.Length, keyword.Length);
- }
- else if (IsString(currentChar, "#_"))
- {
- ReadChars(1);
- nextToken = new Token(TokenType.IgnoreReaderMacro, "#_", _source.CurrentIndex - 2, 2);
- }
- else if (BuiltInFunctions.Find(f => IsString(currentChar, f)) != null)
- {
- string match = BuiltInFunctions.Find(f => IsString(currentChar, f));
- ReadChars(match.Length-1);
- nextToken = new Token(TokenType.BuiltIn, match, _source.CurrentIndex - match.Length, match.Length);
- }
- else if (currentChar == '\\' && !IsNextCharWhitespace())
- {
- string character = ReadCharacter(currentChar);
- nextToken = new Token(TokenType.Character, character, _source.CurrentIndex - character.Length, character.Length);
- }
- else if (IsPrefix(currentChar, "0x"))
- {
- ReadChars(1);
- string number = "0x" + ReadNumber();
- nextToken = new Token(TokenType.HexNumber, number, _source.CurrentIndex - number.Length, number.Length);
- }
- else if (Char.IsNumber(currentChar))
- {
- _source.Push(currentChar);
- string number = ReadNumber();
- nextToken = new Token(TokenType.Number, number, _source.CurrentIndex - number.Length, number.Length);
- }
- else if (currentChar == '"')
- {
- _source.Push(currentChar);
- string str = ReadString();
- nextToken = new Token(TokenType.String, str, _source.CurrentIndex - str.Length, str.Length);
- }
- else if (IsWhitespace(currentChar))
- {
- _source.Push(currentChar);
- string str = ReadWhitespace();
- nextToken = new Token(TokenType.Whitespace, str, _source.CurrentIndex - str.Length, str.Length);
- }
- else if (currentChar == ';')
- {
- _source.Push(currentChar);
- string str = ReadComment();
- nextToken = new Token(TokenType.Comment, str, _source.CurrentIndex - str.Length, str.Length);
- }
- else if (IsString(currentChar, "true"))
- {
- ReadChars(3);
- nextToken = new Token(TokenType.Boolean, "true", _source.CurrentIndex - 4, 4);
- }
- else if (IsString(currentChar, "false"))
- {
- ReadChars(4);
- nextToken = new Token(TokenType.Boolean, "false", _source.CurrentIndex - 5, 5);
- }
- else if (IsString(currentChar, "nil"))
- {
- ReadChars(2);
- nextToken = new Token(TokenType.Nil, "nil", _source.CurrentIndex - 3, 3);
- }
- else if (IsSymbolPrefix(currentChar))
- {
- _source.Push(currentChar);
- string str = ReadSymbol();
- nextToken = new Token(TokenType.Symbol, str, _source.CurrentIndex - str.Length, str.Length);
- }
- else
- {
- nextToken = new Token(TokenType.Unknown, currentChar.ToString(), _source.CurrentIndex - 1, 1);
- }
-
- return nextToken;
- }
-
- private string ReadCharacter(char currentChar)
- {
- if (IsString(currentChar, "\\newline")) return currentChar + ReadChars(7);
- else if (IsString(currentChar, "\\space")) return currentChar + ReadChars(5);
- else if (IsString(currentChar, "\\tab")) return currentChar + ReadChars(3);
-
- string firstCharacter = ReadChars(1);
-
- if (firstCharacter == "u")
- {
- string nextFourCharacters = ReadChars(4);
- if (IsCharacterDefinedByHexDigits(nextFourCharacters)) return currentChar + firstCharacter + nextFourCharacters;
- _source.Push(nextFourCharacters);
- }
-
- return currentChar + firstCharacter;
- }
-
- private static bool IsCharacterDefinedByHexDigits(string str)
- {
- if (str.Length != 4) return false;
- string hexChars = "abcdefABCDEF0123456789";
-
- foreach (char c in str)
- if (!hexChars.Contains(c.ToString()))
- return false;
-
- return true;
- }
-
- private bool IsNextCharWhitespace()
- {
- string nextChar = ReadChars(1);
- bool nextCharIsWhitespace = string.IsNullOrEmpty(nextChar) && IsWhitespace(nextChar[0]);
- _source.Push(nextChar);
- return nextCharIsWhitespace;
- }
-
- private bool IsPrefix(char currentChar, string stringToMatch)
- {
- StringBuilder str = new StringBuilder(currentChar.ToString());
- str.Append(ReadChars(stringToMatch.Length - 1));
- string nextChar = ReadChars(1);
-
- if (nextChar.Length > 0 && !IsDataStructureStart(nextChar[0]) && !IsTerminatingChar(nextChar[0]))
- {
- _source.Push(nextChar);
- bool isMatch = str.ToString() == stringToMatch;
- _source.Push(str.ToString().Substring(1));
- return isMatch;
- }
-
- _source.Push(nextChar);
- _source.Push(str.ToString().Substring(1));
- return false;
- }
-
- private bool IsString(char currentChar, string stringToMatch)
- {
- StringBuilder str = new StringBuilder(currentChar.ToString());
- str.Append(ReadChars(stringToMatch.Length - 1));
- string nextChar = ReadChars(1);
-
- if (nextChar.Length == 0 || IsDataStructureStart(nextChar[0]) || IsTerminatingChar(nextChar[0]))
- {
- _source.Push(nextChar);
- bool isMatch = str.ToString() == stringToMatch;
- _source.Push(str.ToString().Substring(1));
- return isMatch;
- }
-
- _source.Push(nextChar);
- _source.Push(str.ToString().Substring(1));
- return false;
- }
-
- private string ReadChars(int charCount)
- {
- var chars = new StringBuilder();
-
- for (int i = 0; i < charCount; i++)
- {
- if (_source.HasMore) chars.Append(_source.Next());
- else return chars.ToString();
- }
-
- return chars.ToString();
- }
-
- private string ReadKeyword()
- {
- var parsedKeyword = new StringBuilder();
- char currentChar = _source.Next();
-
- while (!IsTerminatingChar(currentChar) && !IsDataStructureStart(currentChar))
- {
- parsedKeyword.Append(currentChar);
- if (_source.HasMore) currentChar = _source.Next();
- else return parsedKeyword.ToString();
- }
-
- _source.Push(currentChar);
- return parsedKeyword.ToString();
- }
-
- private string ReadSymbol()
- {
- var parsedSymbol = new StringBuilder();
- char currentChar = _source.Next();
-
- while (!IsTerminatingChar(currentChar))
- {
- parsedSymbol.Append(currentChar);
- if (_source.HasMore) currentChar = _source.Next();
- else return parsedSymbol.ToString();
- }
-
- _source.Push(currentChar);
- return parsedSymbol.ToString();
- }
-
- private string ReadComment()
- {
- return PutBackTrailingReturnCharacters(ReadToEndOfLineIncludingReturnCharacters());
- }
-
- private string PutBackTrailingReturnCharacters(string text)
- {
- for (int i=text.Length-1; i>=0; i--)
- {
- if (text[i] == '\r' || text[i] == '\n')
- {
- _source.Push(text[i]);
- }
- else
- {
- return text.Substring(0, i + 1);
- }
- }
-
- return string.Empty;
- }
-
- private string ReadToEndOfLineIncludingReturnCharacters()
- {
- var parsedLine = new StringBuilder();
- char currentChar = _source.Next();
-
- while (currentChar != '\r' && currentChar != '\n')
- {
- parsedLine.Append(currentChar);
- if (_source.HasMore) currentChar = _source.Next();
- else return parsedLine.ToString();
- }
-
- parsedLine.Append(currentChar);
-
- if (currentChar == '\r')
- {
- currentChar = _source.Next();
- if (currentChar == '\n') parsedLine.Append(currentChar);
- else _source.Push(currentChar);
- }
-
- return parsedLine.ToString();
- }
-
- private string ReadWhitespace()
- {
- var parsedWhitespace = new StringBuilder();
- char currentChar = _source.Next();
-
- while (IsWhitespace(currentChar))
- {
- parsedWhitespace.Append(currentChar);
- if (_source.HasMore) currentChar = _source.Next();
- else return parsedWhitespace.ToString();
- }
-
- _source.Push(currentChar);
- return parsedWhitespace.ToString();
- }
-
- private string ReadNumber()
- {
- var parsedNumber = new StringBuilder();
- char currentChar = _source.Next();
-
- while (!IsTerminatingChar(currentChar))
- {
- parsedNumber.Append(currentChar);
- if (_source.HasMore) currentChar = _source.Next();
- else return parsedNumber.ToString();
- }
-
- _source.Push(currentChar);
- return parsedNumber.ToString();
- }
-
- private string ReadString()
- {
- var parsedString = new StringBuilder();
- char currentChar = _source.Next();
- parsedString.Append(currentChar);
- if (!_source.HasMore) return parsedString.ToString();
- currentChar = _source.Next();
- bool previousCharWasBackslash = false;
-
- while (currentChar != '"' || (currentChar == '"' && previousCharWasBackslash))
- {
- parsedString.Append(currentChar);
- previousCharWasBackslash = currentChar == '\\';
- if (_source.HasMore) currentChar = _source.Next();
- else return parsedString.ToString();
- }
-
- parsedString.Append(currentChar);
- return parsedString.ToString();
- }
-
- private static bool IsWhitespace(char c)
- {
- return Char.IsWhiteSpace(c) || c == ',';
- }
-
- private static bool IsTerminatingChar(char c)
- {
- return c == ')' || c == '}' || c == ']' || IsWhitespace(c) || c == ';' || c == '"' || c == '\\';
- }
-
- private static bool IsDataStructureStart(char c)
- {
- return c == '(' || c == '{' || c == '[';
- }
-
- private static bool IsSymbolPrefix(char c)
- {
- return Char.IsLetter(c) || ValidNonLetterSymbolPrefixes.Contains(c);
- }
- }
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace ClojureExtension.Parsing
+{
+ public class Lexer
+ {
+ private static readonly List<string> BuiltInFunctions = new List<string>()
+ {
+ "def", "if", "do", "let", "quote", "var", "fn", "loop",
+ "recur", "throw", "try", "monitor-enter", "monitor-exit",
+ "new", "set!", "."
+ };
+
+ private static readonly List<char> ValidNonLetterSymbolPrefixes = new List<char>()
+ {
+ '*', '+', '!', '-', '_', '?', '>', '<', '=', '$', '&'
+ };
+
+ private readonly PushBackCharacterStream _source;
+
+ public Lexer(PushBackCharacterStream inputText)
+ {
+ _source = inputText;
+ }
+
+ public Token Next()
+ {
+ if (!_source.HasMore) return null;
+
+ char currentChar = _source.Next();
+ Token nextToken = null;
+
+ if (currentChar == '(')
+ {
+ nextToken = new Token(TokenType.ListStart, currentChar.ToString(), _source.CurrentIndex - 1, 1);
+ }
+ else if (currentChar == ')')
+ {
+ nextToken = new Token(TokenType.ListEnd, currentChar.ToString(), _source.CurrentIndex - 1, 1);
+ }
+ else if (currentChar == '[')
+ {
+ nextToken = new Token(TokenType.VectorStart, currentChar.ToString(), _source.CurrentIndex - 1, 1);
+ }
+ else if (currentChar == ']')
+ {
+ nextToken = new Token(TokenType.VectorEnd, currentChar.ToString(), _source.CurrentIndex - 1, 1);
+ }
+ else if (currentChar == '{')
+ {
+ nextToken = new Token(TokenType.MapStart, currentChar.ToString(), _source.CurrentIndex - 1, 1);
+ }
+ else if (currentChar == '}')
+ {
+ nextToken = new Token(TokenType.MapEnd, currentChar.ToString(), _source.CurrentIndex - 1, 1);
+ }
+ else if (currentChar == ':')
+ {
+ _source.Push(currentChar);
+ string keyword = ReadKeyword();
+ nextToken = new Token(TokenType.Keyword, keyword, _source.CurrentIndex - keyword.Length, keyword.Length);
+ }
+ else if (IsString(currentChar, "#_"))
+ {
+ ReadChars(1);
+ nextToken = new Token(TokenType.IgnoreReaderMacro, "#_", _source.CurrentIndex - 2, 2);
+ }
+ else if (BuiltInFunctions.Find(f => IsString(currentChar, f)) != null)
+ {
+ string match = BuiltInFunctions.Find(f => IsString(currentChar, f));
+ ReadChars(match.Length-1);
+ nextToken = new Token(TokenType.BuiltIn, match, _source.CurrentIndex - match.Length, match.Length);
+ }
+ else if (currentChar == '\\' && !IsNextCharWhitespace())
+ {
+ string character = ReadCharacter(currentChar);
+ nextToken = new Token(TokenType.Character, character, _source.CurrentIndex - character.Length, character.Length);
+ }
+ else if (IsPrefix(currentChar, "0x"))
+ {
+ ReadChars(1);
+ string number = "0x" + ReadNumber();
+ nextToken = new Token(TokenType.HexNumber, number, _source.CurrentIndex - number.Length, number.Length);
+ }
+ else if (Char.IsNumber(currentChar))
+ {
+ _source.Push(currentChar);
+ string number = ReadNumber();
+ nextToken = new Token(TokenType.Number, number, _source.CurrentIndex - number.Length, number.Length);
+ }
+ else if (currentChar == '"')
+ {
+ _source.Push(currentChar);
+ string str = ReadString();
+ nextToken = new Token(TokenType.String, str, _source.CurrentIndex - str.Length, str.Length);
+ }
+ else if (IsWhitespace(currentChar))
+ {
+ _source.Push(currentChar);
+ string str = ReadWhitespace();
+ nextToken = new Token(TokenType.Whitespace, str, _source.CurrentIndex - str.Length, str.Length);
+ }
+ else if (currentChar == ';')
+ {
+ _source.Push(currentChar);
+ string str = ReadComment();
+ nextToken = new Token(TokenType.Comment, str, _source.CurrentIndex - str.Length, str.Length);
+ }
+ else if (IsString(currentChar, "true"))
+ {
+ ReadChars(3);
+ nextToken = new Token(TokenType.Boolean, "true", _source.CurrentIndex - 4, 4);
+ }
+ else if (IsString(currentChar, "false"))
+ {
+ ReadChars(4);
+ nextToken = new Token(TokenType.Boolean, "false", _source.CurrentIndex - 5, 5);
+ }
+ else if (IsString(currentChar, "nil"))
+ {
+ ReadChars(2);
+ nextToken = new Token(TokenType.Nil, "nil", _source.CurrentIndex - 3, 3);
+ }
+ else if (IsSymbolPrefix(currentChar))
+ {
+ _source.Push(currentChar);
+ string str = ReadSymbol();
+ nextToken = new Token(TokenType.Symbol, str, _source.CurrentIndex - str.Length, str.Length);
+ }
+ else
+ {
+ nextToken = new Token(TokenType.Unknown, currentChar.ToString(), _source.CurrentIndex - 1, 1);
+ }
+
+ return nextToken;
+ }
+
+ private string ReadCharacter(char currentChar)
+ {
+ if (IsString(currentChar, "\\newline")) return currentChar + ReadChars(7);
+ else if (IsString(currentChar, "\\space")) return currentChar + ReadChars(5);
+ else if (IsString(currentChar, "\\tab")) return currentChar + ReadChars(3);
+
+ string firstCharacter = ReadChars(1);
+
+ if (firstCharacter == "u")
+ {
+ string nextFourCharacters = ReadChars(4);
+ if (IsCharacterDefinedByHexDigits(nextFourCharacters)) return currentChar + firstCharacter + nextFourCharacters;
+ _source.Push(nextFourCharacters);
+ }
+
+ return currentChar + firstCharacter;
+ }
+
+ private static bool IsCharacterDefinedByHexDigits(string str)
+ {
+ if (str.Length != 4) return false;
+ string hexChars = "abcdefABCDEF0123456789";
+
+ foreach (char c in str)
+ if (!hexChars.Contains(c.ToString()))
+ return false;
+
+ return true;
+ }
+
+ private bool IsNextCharWhitespace()
+ {
+ string nextChar = ReadChars(1);
+ bool nextCharIsWhitespace = string.IsNullOrEmpty(nextChar) && IsWhitespace(nextChar[0]);
+ _source.Push(nextChar);
+ return nextCharIsWhitespace;
+ }
+
+ private bool IsPrefix(char currentChar, string stringToMatch)
+ {
+ StringBuilder str = new StringBuilder(currentChar.ToString());
+ str.Append(ReadChars(stringToMatch.Length - 1));
+ string nextChar = ReadChars(1);
+
+ if (nextChar.Length > 0 && !IsDataStructureStart(nextChar[0]) && !IsTerminatingChar(nextChar[0]))
+ {
+ _source.Push(nextChar);
+ bool isMatch = str.ToString() == stringToMatch;
+ _source.Push(str.ToString().Substring(1));
+ return isMatch;
+ }
+
+ _source.Push(nextChar);
+ _source.Push(str.ToString().Substring(1));
+ return false;
+ }
+
+ private bool IsString(char currentChar, string stringToMatch)
+ {
+ StringBuilder str = new StringBuilder(currentChar.ToString());
+ str.Append(ReadChars(stringToMatch.Length - 1));
+ string nextChar = ReadChars(1);
+
+ if (nextChar.Length == 0 || IsDataStructureStart(nextChar[0]) || IsTerminatingChar(nextChar[0]))
+ {
+ _source.Push(nextChar);
+ bool isMatch = str.ToString() == stringToMatch;
+ _source.Push(str.ToString().Substring(1));
+ return isMatch;
+ }
+
+ _source.Push(nextChar);
+ _source.Push(str.ToString().Substring(1));
+ return false;
+ }
+
+ private string ReadChars(int charCount)
+ {
+ var chars = new StringBuilder();
+
+ for (int i = 0; i < charCount; i++)
+ {
+ if (_source.HasMore) chars.Append(_source.Next());
+ else return chars.ToString();
+ }
+
+ return chars.ToString();
+ }
+
+ private string ReadKeyword()
+ {
+ var parsedKeyword = new StringBuilder();
+ char currentChar = _source.Next();
+
+ while (!IsTerminatingChar(currentChar) && !IsDataStructureStart(currentChar))
+ {
+ parsedKeyword.Append(currentChar);
+ if (_source.HasMore) currentChar = _source.Next();
+ else return parsedKeyword.ToString();
+ }
+
+ _source.Push(currentChar);
+ return parsedKeyword.ToString();
+ }
+
+ private string ReadSymbol()
+ {
+ var parsedSymbol = new StringBuilder();
+ char currentChar = _source.Next();
+
+ while (!IsTerminatingChar(currentChar))
+ {
+ parsedSymbol.Append(currentChar);
+ if (_source.HasMore) currentChar = _source.Next();
+ else return parsedSymbol.ToString();
+ }
+
+ _source.Push(currentChar);
+ return parsedSymbol.ToString();
+ }
+
+ private string ReadComment()
+ {
+ return PutBackTrailingReturnCharacters(ReadToEndOfLineIncludingReturnCharacters());
+ }
+
+ private string PutBackTrailingReturnCharacters(string text)
+ {
+ for (int i=text.Length-1; i>=0; i--)
+ {
+ if (text[i] == '\r' || text[i] == '\n')
+ {
+ _source.Push(text[i]);
+ }
+ else
+ {
+ return text.Substring(0, i + 1);
+ }
+ }
+
+ return string.Empty;
+ }
+
+ private string ReadToEndOfLineIncludingReturnCharacters()
+ {
+ var parsedLine = new StringBuilder();
+ char currentChar = _source.Next();
+
+ while (currentChar != '\r' && currentChar != '\n')
+ {
+ parsedLine.Append(currentChar);
+ if (_source.HasMore) currentChar = _source.Next();
+ else return parsedLine.ToString();
+ }
+
+ parsedLine.Append(currentChar);
+
+ if (currentChar == '\r')
+ {
+ currentChar = _source.Next();
+ if (currentChar == '\n') parsedLine.Append(currentChar);
+ else _source.Push(currentChar);
+ }
+
+ return parsedLine.ToString();
+ }
+
+ private string ReadWhitespace()
+ {
+ var parsedWhitespace = new StringBuilder();
+ char currentChar = _source.Next();
+
+ while (IsWhitespace(currentChar))
+ {
+ parsedWhitespace.Append(currentChar);
+ if (_source.HasMore) currentChar = _source.Next();
+ else return parsedWhitespace.ToString();
+ }
+
+ _source.Push(currentChar);
+ return parsedWhitespace.ToString();
+ }
+
+ private string ReadNumber()
+ {
+ var parsedNumber = new StringBuilder();
+ char currentChar = _source.Next();
+
+ while (!IsTerminatingChar(currentChar))
+ {
+ parsedNumber.Append(currentChar);
+ if (_source.HasMore) currentChar = _source.Next();
+ else return parsedNumber.ToString();
+ }
+
+ _source.Push(currentChar);
+ return parsedNumber.ToString();
+ }
+
+ private string ReadString()
+ {
+ var parsedString = new StringBuilder();
+ char currentChar = _source.Next();
+ parsedString.Append(currentChar);
+ if (!_source.HasMore) return parsedString.ToString();
+ currentChar = _source.Next();
+ bool previousCharWasBackslash = false;
+
+ while (currentChar != '"' || (currentChar == '"' && previousCharWasBackslash))
+ {
+ parsedString.Append(currentChar);
+ previousCharWasBackslash = currentChar == '\\' && !previousCharWasBackslash;
+ if (_source.HasMore) currentChar = _source.Next();
+ else return parsedString.ToString();
+ }
+
+ parsedString.Append(currentChar);
+ return parsedString.ToString();
+ }
+
+ private static bool IsWhitespace(char c)
+ {
+ return Char.IsWhiteSpace(c) || c == ',';
+ }
+
+ private static bool IsTerminatingChar(char c)
+ {
+ return c == ')' || c == '}' || c == ']' || IsWhitespace(c) || c == ';' || c == '"' || c == '\\';
+ }
+
+ private static bool IsDataStructureStart(char c)
+ {
+ return c == '(' || c == '{' || c == '[';
+ }
+
+ private static bool IsSymbolPrefix(char c)
+ {
+ return Char.IsLetter(c) || ValidNonLetterSymbolPrefixes.Contains(c);
+ }
+ }
}
View
977 ClojureExtension.Tests/Parsing/LexerTests.cs
@@ -1,484 +1,493 @@
-using System.IO;
-using ClojureExtension.Parsing;
-using Microsoft.VisualStudio.TestTools.UnitTesting;
-
-namespace ClojureExtension.Tests.Parsing
-{
- [TestClass]
- public class LexerTests
- {
- [TestMethod]
- public void ShouldReturnNullWhenAtEndOfStream()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("")));
- Assert.IsNull(lexer.Next());
- }
-
- [TestMethod]
- public void ShouldReturnNumberTokenTypeWhenInputIsNumber()
- {
- var stream = new PushBackCharacterStream(new StringReader("123"));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Number, token.Type);
- Assert.AreEqual("123", token.Text);
- Assert.IsFalse(stream.HasMore);
- }
-
- [TestMethod]
- public void ShouldReturnNumberTokenTypeWhenInputIsInvalidNumber()
- {
- var stream = new PushBackCharacterStream(new StringReader("123asdf"));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Number, token.Type);
- Assert.AreEqual("123asdf", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnListStartTokenTypeWhenInputIsAnOpenParen()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("(")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.ListStart, token.Type);
- Assert.AreEqual("(", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnListEndTokenTypeWhenInputIsAClosedParen()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader(")")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.ListEnd, token.Type);
- Assert.AreEqual(")", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnVectorStartTokenTypeWhenInputIsAnOpenBracket()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("[")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.VectorStart, token.Type);
- Assert.AreEqual("[", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnVectorEndTokenTypeWhenInputIsAClosedBracket()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("]")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.VectorEnd, token.Type);
- Assert.AreEqual("]", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnMapStartTokenTypeWhenInputIsAnOpenCurlyBrace()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("{")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.MapStart, token.Type);
- Assert.AreEqual("{", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnMapEndTokenTypeWhenInputIsAClosedCurlyBrace()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("}")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.MapEnd, token.Type);
- Assert.AreEqual("}", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnStringForProperlyTerminatingString()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\"asdf\"")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.String, token.Type);
- Assert.AreEqual("\"asdf\"", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnStringForRunOnString()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\"asdfasdf")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.String, token.Type);
- Assert.AreEqual("\"asdfasdf", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnStringThatDoesNotTerminateOnBackslashQuote()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\"asdf\\\"asdf\"")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.String, token.Type);
- Assert.AreEqual("\"asdf\\\"asdf\"", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnWhitespaceForTabsSpacesCommasAndReturnCharacters()
- {
- string input = " \t \r\n , ";
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader(input)));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Whitespace, token.Type);
- Assert.AreEqual(input, token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnNumberFollowByWhitespaceAndAString()
- {
- var stream = new PushBackCharacterStream(new StringReader("123 \"asdf\""));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Number, token.Type);
- Assert.AreEqual("123", token.Text);
- Assert.AreEqual(0, token.StartIndex);
-
- token = lexer.Next();
- Assert.AreEqual(TokenType.Whitespace, token.Type);
- Assert.AreEqual(" ", token.Text);
- Assert.AreEqual(3, token.StartIndex);
-
- token = lexer.Next();
- Assert.AreEqual(TokenType.String, token.Type);
- Assert.AreEqual("\"asdf\"", token.Text);
- Assert.AreEqual(4, token.StartIndex);
- }
-
- [TestMethod]
- public void ShouldReturnRealNumber()
- {
- var stream = new PushBackCharacterStream(new StringReader("123.321"));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Number, token.Type);
- Assert.AreEqual("123.321", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnHexNumber()
- {
- var stream = new PushBackCharacterStream(new StringReader("0x123A"));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.HexNumber, token.Type);
- Assert.AreEqual("0x123A", token.Text);
- Assert.IsFalse(stream.HasMore);
- }
-
- [TestMethod]
- public void ShouldReturnCommentWithTrailingWhitespace()
- {
- var stream = new PushBackCharacterStream(new StringReader("; test text \r\n"));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Comment, token.Type);
- Assert.AreEqual("; test text ", token.Text);
-
- token = lexer.Next();
- Assert.AreEqual(TokenType.Whitespace, token.Type);
- Assert.AreEqual("\r\n", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnCommentThatExtendsToEndOfInput()
- {
- var stream = new PushBackCharacterStream(new StringReader("; test"));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Comment, token.Type);
- Assert.AreEqual("; test", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnCommentToEndOfLineOnly()
- {
- var stream = new PushBackCharacterStream(new StringReader("; test\r\n123"));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Comment, token.Type);
- Assert.AreEqual("; test", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnSymbol()
- {
- var stream = new PushBackCharacterStream(new StringReader("test"));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Symbol, token.Type);
- Assert.AreEqual("test", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnSymbolWhenItHasADot()
- {
- var stream = new PushBackCharacterStream(new StringReader("namespace.test"));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Symbol, token.Type);
- Assert.AreEqual("namespace.test", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnSymbolImmediatelyFollowedByComment()
- {
- var stream = new PushBackCharacterStream(new StringReader("test;comment"));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Symbol, token.Type);
- Assert.AreEqual("test", token.Text);
-
- token = lexer.Next();
- Assert.AreEqual(TokenType.Comment, token.Type);
- Assert.AreEqual(";comment", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnTwoSymbolsSeparatedByWhitespace()
- {
- var stream = new PushBackCharacterStream(new StringReader("symbol1 symbol2"));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Symbol, token.Type);
- Assert.AreEqual("symbol1", token.Text);
-
- token = lexer.Next();
- Assert.AreEqual(TokenType.Whitespace, token.Type);
-
- token = lexer.Next();
- Assert.AreEqual(TokenType.Symbol, token.Type);
- Assert.AreEqual("symbol2", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnKeyword()
- {
- var stream = new PushBackCharacterStream(new StringReader(":asdf"));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Keyword, token.Type);
- Assert.AreEqual(":asdf", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnKeywordWithNoName()
- {
- var stream = new PushBackCharacterStream(new StringReader(":"));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Keyword, token.Type);
- Assert.AreEqual(":", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnKeywordFollowByListStart()
- {
- var stream = new PushBackCharacterStream(new StringReader(":asdf("));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Keyword, token.Type);
- Assert.AreEqual(":asdf", token.Text);
- Assert.AreEqual(TokenType.ListStart, lexer.Next().Type);
- }
-
- [TestMethod]
- public void ShouldReturnBooleanWhenTrueIsInput()
- {
- var stream = new PushBackCharacterStream(new StringReader("true"));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Boolean, token.Type);
- Assert.AreEqual("true", token.Text);
- Assert.IsFalse(stream.HasMore);
- }
-
- [TestMethod]
- public void ShouldReturnBooleanWhenFalseIsInput()
- {
- var stream = new PushBackCharacterStream(new StringReader("false"));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Boolean, token.Type);
- Assert.AreEqual("false", token.Text);
- Assert.IsFalse(stream.HasMore);
- }
-
- [TestMethod]
- public void ShouldReturnNil()
- {
- var stream = new PushBackCharacterStream(new StringReader("nil"));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Nil, token.Type);
- Assert.AreEqual("nil", token.Text);
- Assert.IsFalse(stream.HasMore);
- }
-
- [TestMethod]
- public void ShouldStopParsingSymbolWhenDoubleQuoteFound()
- {
- var stream = new PushBackCharacterStream(new StringReader("asdf\"str\""));
- Lexer lexer = new Lexer(stream);
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Symbol, token.Type);
- Assert.AreEqual("asdf", token.Text);
-
- token = lexer.Next();
- Assert.AreEqual(TokenType.String, token.Type);
- Assert.AreEqual("\"str\"", token.Text);
- }
-
- [TestMethod]
- public void ShouldReadBackslashNewLineAsCharacter()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\newline")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Character, token.Type);
- Assert.AreEqual("\\newline", token.Text);
- }
-
- [TestMethod]
- public void ShouldReadBackslashTabAsCharacter()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\tab")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Character, token.Type);
- Assert.AreEqual("\\tab", token.Text);
- }
-
- [TestMethod]
- public void ShouldReadBackslashSpaceAsCharacter()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\space")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Character, token.Type);
- Assert.AreEqual("\\space", token.Text);
- }
-
- [TestMethod]
- public void ShouldReadBackslashUFollowedByFourHexDigitsAsCharacter()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\uF04A")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Character, token.Type);
- Assert.AreEqual("\\uF04A", token.Text);
- }
-
- [TestMethod]
- public void ShouldReadBackslashUAsChar()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\u")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Character, token.Type);
- Assert.AreEqual("\\u", token.Text);
- }
-
- [TestMethod]
- public void ShouldReadBackslashAAsChar()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\a")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Character, token.Type);
- Assert.AreEqual("\\a", token.Text);
- }
-
- [TestMethod]
- public void ShouldReadBackslashABackSlashFAsTwoCharacters()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\a\\f")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Character, token.Type);
- Assert.AreEqual("\\a", token.Text);
- token = lexer.Next();
- Assert.AreEqual(TokenType.Character, token.Type);
- Assert.AreEqual("\\f", token.Text);
- }
-
- [TestMethod]
- public void ShouldReadBackslashUFollowedByTwoHexDigitsAsSingleUCharacterFollowedByANumber()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\u19")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Character, token.Type);
- Assert.AreEqual("\\u", token.Text);
- token = lexer.Next();
- Assert.AreEqual(TokenType.Number, token.Type);
- Assert.AreEqual("19", token.Text);
- }
-
- [TestMethod]
- public void ShouldReadBackslashUFollowedByThreeHexDigitsAndAZAsSingleUCharacterFollowedByASymbol()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\uAF9Z")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Character, token.Type);
- Assert.AreEqual("\\u", token.Text);
- token = lexer.Next();
- Assert.AreEqual(TokenType.Symbol, token.Type);
- Assert.AreEqual("AF9Z", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnSymbolFollowedByCharacter()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("asdf\\s")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Symbol, token.Type);
- Assert.AreEqual("asdf", token.Text);
- token = lexer.Next();
- Assert.AreEqual(TokenType.Character, token.Type);
- Assert.AreEqual("\\s", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnNumberFollowedByCharacter()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("123\\s")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Number, token.Type);
- Assert.AreEqual("123", token.Text);
- token = lexer.Next();
- Assert.AreEqual(TokenType.Character, token.Type);
- Assert.AreEqual("\\s", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnSymbolStartingWithAmpersand()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("&123asdf")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Symbol, token.Type);
- Assert.AreEqual("&123asdf", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnSymbolWithOnlyASingleAmpersand()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("&")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.Symbol, token.Type);
- Assert.AreEqual("&", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnIgnoreReaderMacro()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("#_(defn")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.IgnoreReaderMacro, token.Type);
- Assert.AreEqual("#_", token.Text);
- }
-
- [TestMethod]
- public void ShouldReturnStringTokenWhenInputIsOnlyADoubleQuote()
- {
- Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\"")));
- Token token = lexer.Next();
- Assert.AreEqual(TokenType.String, token.Type);
- Assert.AreEqual("\"", token.Text);
- }
- }
-}
+using System.IO;
+using ClojureExtension.Parsing;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+
+namespace ClojureExtension.Tests.Parsing
+{
+ [TestClass]
+ public class LexerTests
+ {
+ [TestMethod]
+ public void ShouldReturnNullWhenAtEndOfStream()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("")));
+ Assert.IsNull(lexer.Next());
+ }
+
+ [TestMethod]
+ public void ShouldReturnNumberTokenTypeWhenInputIsNumber()
+ {
+ var stream = new PushBackCharacterStream(new StringReader("123"));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Number, token.Type);
+ Assert.AreEqual("123", token.Text);
+ Assert.IsFalse(stream.HasMore);
+ }
+
+ [TestMethod]
+ public void ShouldReturnNumberTokenTypeWhenInputIsInvalidNumber()
+ {
+ var stream = new PushBackCharacterStream(new StringReader("123asdf"));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Number, token.Type);
+ Assert.AreEqual("123asdf", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnListStartTokenTypeWhenInputIsAnOpenParen()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("(")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.ListStart, token.Type);
+ Assert.AreEqual("(", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnListEndTokenTypeWhenInputIsAClosedParen()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader(")")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.ListEnd, token.Type);
+ Assert.AreEqual(")", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnVectorStartTokenTypeWhenInputIsAnOpenBracket()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("[")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.VectorStart, token.Type);
+ Assert.AreEqual("[", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnVectorEndTokenTypeWhenInputIsAClosedBracket()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("]")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.VectorEnd, token.Type);
+ Assert.AreEqual("]", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnMapStartTokenTypeWhenInputIsAnOpenCurlyBrace()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("{")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.MapStart, token.Type);
+ Assert.AreEqual("{", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnMapEndTokenTypeWhenInputIsAClosedCurlyBrace()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("}")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.MapEnd, token.Type);
+ Assert.AreEqual("}", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnStringForProperlyTerminatingString()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\"asdf\"")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.String, token.Type);
+ Assert.AreEqual("\"asdf\"", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnStringForRunOnString()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\"asdfasdf")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.String, token.Type);
+ Assert.AreEqual("\"asdfasdf", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnStringThatDoesNotTerminateOnBackslashQuote()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\"asdf\\\"asdf\"")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.String, token.Type);
+ Assert.AreEqual("\"asdf\\\"asdf\"", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnWhitespaceForTabsSpacesCommasAndReturnCharacters()
+ {
+ string input = " \t \r\n , ";
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader(input)));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Whitespace, token.Type);
+ Assert.AreEqual(input, token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnNumberFollowByWhitespaceAndAString()
+ {
+ var stream = new PushBackCharacterStream(new StringReader("123 \"asdf\""));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Number, token.Type);
+ Assert.AreEqual("123", token.Text);
+ Assert.AreEqual(0, token.StartIndex);
+
+ token = lexer.Next();
+ Assert.AreEqual(TokenType.Whitespace, token.Type);
+ Assert.AreEqual(" ", token.Text);
+ Assert.AreEqual(3, token.StartIndex);
+
+ token = lexer.Next();
+ Assert.AreEqual(TokenType.String, token.Type);
+ Assert.AreEqual("\"asdf\"", token.Text);
+ Assert.AreEqual(4, token.StartIndex);
+ }
+
+ [TestMethod]
+ public void ShouldReturnRealNumber()
+ {
+ var stream = new PushBackCharacterStream(new StringReader("123.321"));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Number, token.Type);
+ Assert.AreEqual("123.321", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnHexNumber()
+ {
+ var stream = new PushBackCharacterStream(new StringReader("0x123A"));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.HexNumber, token.Type);
+ Assert.AreEqual("0x123A", token.Text);
+ Assert.IsFalse(stream.HasMore);
+ }
+
+ [TestMethod]
+ public void ShouldReturnCommentWithTrailingWhitespace()
+ {
+ var stream = new PushBackCharacterStream(new StringReader("; test text \r\n"));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Comment, token.Type);
+ Assert.AreEqual("; test text ", token.Text);
+
+ token = lexer.Next();
+ Assert.AreEqual(TokenType.Whitespace, token.Type);
+ Assert.AreEqual("\r\n", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnCommentThatExtendsToEndOfInput()
+ {
+ var stream = new PushBackCharacterStream(new StringReader("; test"));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Comment, token.Type);
+ Assert.AreEqual("; test", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnCommentToEndOfLineOnly()
+ {
+ var stream = new PushBackCharacterStream(new StringReader("; test\r\n123"));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Comment, token.Type);
+ Assert.AreEqual("; test", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnSymbol()
+ {
+ var stream = new PushBackCharacterStream(new StringReader("test"));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Symbol, token.Type);
+ Assert.AreEqual("test", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnSymbolWhenItHasADot()
+ {
+ var stream = new PushBackCharacterStream(new StringReader("namespace.test"));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Symbol, token.Type);
+ Assert.AreEqual("namespace.test", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnSymbolImmediatelyFollowedByComment()
+ {
+ var stream = new PushBackCharacterStream(new StringReader("test;comment"));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Symbol, token.Type);
+ Assert.AreEqual("test", token.Text);
+
+ token = lexer.Next();
+ Assert.AreEqual(TokenType.Comment, token.Type);
+ Assert.AreEqual(";comment", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnTwoSymbolsSeparatedByWhitespace()
+ {
+ var stream = new PushBackCharacterStream(new StringReader("symbol1 symbol2"));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Symbol, token.Type);
+ Assert.AreEqual("symbol1", token.Text);
+
+ token = lexer.Next();
+ Assert.AreEqual(TokenType.Whitespace, token.Type);
+
+ token = lexer.Next();
+ Assert.AreEqual(TokenType.Symbol, token.Type);
+ Assert.AreEqual("symbol2", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnKeyword()
+ {
+ var stream = new PushBackCharacterStream(new StringReader(":asdf"));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Keyword, token.Type);
+ Assert.AreEqual(":asdf", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnKeywordWithNoName()
+ {
+ var stream = new PushBackCharacterStream(new StringReader(":"));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Keyword, token.Type);
+ Assert.AreEqual(":", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnKeywordFollowByListStart()
+ {
+ var stream = new PushBackCharacterStream(new StringReader(":asdf("));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Keyword, token.Type);
+ Assert.AreEqual(":asdf", token.Text);
+ Assert.AreEqual(TokenType.ListStart, lexer.Next().Type);
+ }
+
+ [TestMethod]
+ public void ShouldReturnBooleanWhenTrueIsInput()
+ {
+ var stream = new PushBackCharacterStream(new StringReader("true"));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Boolean, token.Type);
+ Assert.AreEqual("true", token.Text);
+ Assert.IsFalse(stream.HasMore);
+ }
+
+ [TestMethod]
+ public void ShouldReturnBooleanWhenFalseIsInput()
+ {
+ var stream = new PushBackCharacterStream(new StringReader("false"));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Boolean, token.Type);
+ Assert.AreEqual("false", token.Text);
+ Assert.IsFalse(stream.HasMore);
+ }
+
+ [TestMethod]
+ public void ShouldReturnNil()
+ {
+ var stream = new PushBackCharacterStream(new StringReader("nil"));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Nil, token.Type);
+ Assert.AreEqual("nil", token.Text);
+ Assert.IsFalse(stream.HasMore);
+ }
+
+ [TestMethod]
+ public void ShouldStopParsingSymbolWhenDoubleQuoteFound()
+ {
+ var stream = new PushBackCharacterStream(new StringReader("asdf\"str\""));
+ Lexer lexer = new Lexer(stream);
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Symbol, token.Type);
+ Assert.AreEqual("asdf", token.Text);
+
+ token = lexer.Next();
+ Assert.AreEqual(TokenType.String, token.Type);
+ Assert.AreEqual("\"str\"", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReadBackslashNewLineAsCharacter()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\newline")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Character, token.Type);
+ Assert.AreEqual("\\newline", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReadBackslashTabAsCharacter()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\tab")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Character, token.Type);
+ Assert.AreEqual("\\tab", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReadBackslashSpaceAsCharacter()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\space")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Character, token.Type);
+ Assert.AreEqual("\\space", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReadBackslashUFollowedByFourHexDigitsAsCharacter()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\uF04A")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Character, token.Type);
+ Assert.AreEqual("\\uF04A", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReadBackslashUAsChar()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\u")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Character, token.Type);
+ Assert.AreEqual("\\u", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReadBackslashAAsChar()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\a")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Character, token.Type);
+ Assert.AreEqual("\\a", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReadBackslashABackSlashFAsTwoCharacters()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\a\\f")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Character, token.Type);
+ Assert.AreEqual("\\a", token.Text);
+ token = lexer.Next();
+ Assert.AreEqual(TokenType.Character, token.Type);
+ Assert.AreEqual("\\f", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReadBackslashUFollowedByTwoHexDigitsAsSingleUCharacterFollowedByANumber()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\u19")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Character, token.Type);
+ Assert.AreEqual("\\u", token.Text);
+ token = lexer.Next();
+ Assert.AreEqual(TokenType.Number, token.Type);
+ Assert.AreEqual("19", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReadBackslashUFollowedByThreeHexDigitsAndAZAsSingleUCharacterFollowedByASymbol()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\\uAF9Z")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Character, token.Type);
+ Assert.AreEqual("\\u", token.Text);
+ token = lexer.Next();
+ Assert.AreEqual(TokenType.Symbol, token.Type);
+ Assert.AreEqual("AF9Z", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnSymbolFollowedByCharacter()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("asdf\\s")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Symbol, token.Type);
+ Assert.AreEqual("asdf", token.Text);
+ token = lexer.Next();
+ Assert.AreEqual(TokenType.Character, token.Type);
+ Assert.AreEqual("\\s", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnNumberFollowedByCharacter()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("123\\s")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Number, token.Type);
+ Assert.AreEqual("123", token.Text);
+ token = lexer.Next();
+ Assert.AreEqual(TokenType.Character, token.Type);
+ Assert.AreEqual("\\s", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnSymbolStartingWithAmpersand()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("&123asdf")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Symbol, token.Type);
+ Assert.AreEqual("&123asdf", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnSymbolWithOnlyASingleAmpersand()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("&")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.Symbol, token.Type);
+ Assert.AreEqual("&", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnIgnoreReaderMacro()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("#_(defn")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.IgnoreReaderMacro, token.Type);
+ Assert.AreEqual("#_", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldReturnStringTokenWhenInputIsOnlyADoubleQuote()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\"")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.String, token.Type);
+ Assert.AreEqual("\"", token.Text);
+ }
+
+ [TestMethod]
+ public void ShouldAllowStringToEndWithAnEscapedBackslash()
+ {
+ Lexer lexer = new Lexer(new PushBackCharacterStream(new StringReader("\"string\\\\\"not string")));
+ Token token = lexer.Next();
+ Assert.AreEqual(TokenType.String, token.Type);
+ Assert.AreEqual("\"string\\\\\"", token.Text);
+ }
+ }
+}
Please sign in to comment.
Something went wrong with that request. Please try again.