@@ -70,6 +70,7 @@ pub const Token = struct {
7070 Identifier ,
7171 StringLiteral : StrLitKind ,
7272 Eof ,
73+ NoEolAtEof ,
7374 Builtin ,
7475 Bang ,
7576 Equal ,
@@ -139,6 +140,7 @@ pub const Token = struct {
139140pub const Tokenizer = struct {
140141 buffer : []const u8 ,
141142 index : usize ,
143+ actual_file_end : usize ,
142144
143145 pub const Location = struct {
144146 line : usize ,
@@ -177,10 +179,24 @@ pub const Tokenizer = struct {
177179 }
178180
179181 pub fn init (buffer : []const u8 ) - > Tokenizer {
180- return Tokenizer {
181- .buffer = buffer ,
182- .index = 0 ,
183- };
182+ if (buffer .len == 0 or buffer [buffer .len - 1 ] == '\n ' ) {
183+ return Tokenizer {
184+ .buffer = buffer ,
185+ .index = 0 ,
186+ .actual_file_end = buffer .len ,
187+ };
188+ } else {
189+ // last line is incomplete, so skip it, and give an error when we get there.
190+ var source_len = buffer .len ;
191+ while (source_len > 0 ) : (source_len -= 1 ) {
192+ if (buffer [source_len - 1 ] == '\n ' ) break ;
193+ }
194+ return Tokenizer {
195+ .buffer = buffer [0.. source_len ],
196+ .index = 0 ,
197+ .actual_file_end = buffer .len ,
198+ };
199+ }
184200 }
185201
186202 const State = enum {
@@ -497,7 +513,11 @@ pub const Tokenizer = struct {
497513 }
498514 }
499515 result .end = self .index ;
500- // TODO check state when returning EOF
516+ if (result .id == Token .Id .Eof and self .actual_file_end != self .buffer .len ) {
517+ // instead of an Eof, give an error token
518+ result .id = Token .Id .NoEolAtEof ;
519+ result .end = self .actual_file_end ;
520+ }
501521 return result ;
502522 }
503523
@@ -507,3 +527,30 @@ pub const Tokenizer = struct {
507527};
508528
509529
530+
531+ test "tokenizer" {
532+ // source must end with eol
533+ testTokenize ("no newline" , []Token.Id {
534+ }, false );
535+ testTokenize ("test\n " , []Token.Id {
536+ Token .Id .Keyword_test ,
537+ }, true );
538+ testTokenize ("test\n no newline" , []Token.Id {
539+ Token .Id .Keyword_test ,
540+ }, false );
541+ }
542+
543+ fn testTokenize (source : []const u8 , expected_tokens : []const Token.Id , expected_eol_at_eof : bool ) {
544+ var tokenizer = Tokenizer .init (source );
545+ for (expected_tokens ) | expected_token_id | {
546+ const token = tokenizer .next ();
547+ std .debug .assert (@TagType (Token .Id )(token .id ) == @TagType (Token .Id )(expected_token_id ));
548+ switch (expected_token_id ) {
549+ Token .Id .StringLiteral = > | kind | {
550+ @panic ("TODO: how do i test this?" );
551+ },
552+ else = > {},
553+ }
554+ }
555+ std .debug .assert (tokenizer .next ().id == if (expected_eol_at_eof ) Token .Id .Eof else Token .Id .NoEolAtEof );
556+ }
0 commit comments