Skip to content

Commit 45ab752

Browse files
committed
source files must end with newline
1 parent 39c7bd2 commit 45ab752

File tree

1 file changed

+52
-5
lines changed

1 file changed

+52
-5
lines changed

src-self-hosted/tokenizer.zig

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ pub const Token = struct {
7070
Identifier,
7171
StringLiteral: StrLitKind,
7272
Eof,
73+
NoEolAtEof,
7374
Builtin,
7475
Bang,
7576
Equal,
@@ -139,6 +140,7 @@ pub const Token = struct {
139140
pub const Tokenizer = struct {
140141
buffer: []const u8,
141142
index: usize,
143+
actual_file_end: usize,
142144

143145
pub const Location = struct {
144146
line: usize,
@@ -177,10 +179,24 @@ pub const Tokenizer = struct {
177179
}
178180

179181
pub fn init(buffer: []const u8) -> Tokenizer {
180-
return Tokenizer {
181-
.buffer = buffer,
182-
.index = 0,
183-
};
182+
if (buffer.len == 0 or buffer[buffer.len - 1] == '\n') {
183+
return Tokenizer {
184+
.buffer = buffer,
185+
.index = 0,
186+
.actual_file_end = buffer.len,
187+
};
188+
} else {
189+
// last line is incomplete, so skip it, and give an error when we get there.
190+
var source_len = buffer.len;
191+
while (source_len > 0) : (source_len -= 1) {
192+
if (buffer[source_len - 1] == '\n') break;
193+
}
194+
return Tokenizer {
195+
.buffer = buffer[0..source_len],
196+
.index = 0,
197+
.actual_file_end = buffer.len,
198+
};
199+
}
184200
}
185201

186202
const State = enum {
@@ -497,7 +513,11 @@ pub const Tokenizer = struct {
497513
}
498514
}
499515
result.end = self.index;
500-
// TODO check state when returning EOF
516+
if (result.id == Token.Id.Eof and self.actual_file_end != self.buffer.len) {
517+
// instead of an Eof, give an error token
518+
result.id = Token.Id.NoEolAtEof;
519+
result.end = self.actual_file_end;
520+
}
501521
return result;
502522
}
503523

@@ -507,3 +527,30 @@ pub const Tokenizer = struct {
507527
};
508528

509529

530+
531+
test "tokenizer" {
532+
// source must end with eol
533+
testTokenize("no newline", []Token.Id {
534+
}, false);
535+
testTokenize("test\n", []Token.Id {
536+
Token.Id.Keyword_test,
537+
}, true);
538+
testTokenize("test\nno newline", []Token.Id {
539+
Token.Id.Keyword_test,
540+
}, false);
541+
}
542+
543+
fn testTokenize(source: []const u8, expected_tokens: []const Token.Id, expected_eol_at_eof: bool) {
544+
var tokenizer = Tokenizer.init(source);
545+
for (expected_tokens) |expected_token_id| {
546+
const token = tokenizer.next();
547+
std.debug.assert(@TagType(Token.Id)(token.id) == @TagType(Token.Id)(expected_token_id));
548+
switch (expected_token_id) {
549+
Token.Id.StringLiteral => |kind| {
550+
@panic("TODO: how do i test this?");
551+
},
552+
else => {},
553+
}
554+
}
555+
std.debug.assert(tokenizer.next().id == if (expected_eol_at_eof) Token.Id.Eof else Token.Id.NoEolAtEof);
556+
}

0 commit comments

Comments
 (0)