From dea7c348df1ddd4f912dff5e950dfb1bcc29b8c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Wed, 29 Jun 2022 09:23:09 +0300 Subject: [PATCH] mem: add splitBackwards (#11908) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * mem: refactor tests of split() - add a few cases for .rest() - use expectEqualSlices() * mem: add splitBackwards Over the last couple of weeks weeks I needed to iterate over a collection backwards at least twice. Do we want to have this in stdlib? If yes, click "Merge" and start using today! Free shipping and returns (before 1.0). Why is this useful? ------------------- I need this for building an error wrapper: errors are added in the wrapper from "lowest" level to "highest" level, and then printed in reverse order. Imagine `UpdateUsers` call, which needs to return `error.InvalidInput` and a wrappable error context. In Go we would add a context to the error when returning it: // if update_user fails, add context on which user we are operating if err := update_user(user); err != nil { return fmt.Errorf("user id=%d: %w", user.id, err) } Since Zig cannot pass anything else than u16 with an error (#2647), I will pass a `err_ctx: *Err`, to the callers, where they can, besides returning an error, augment it with auxiliary data. `Err` is a preallocated array that can add zero-byte-separated strings. For a concrete example, imagine such a call graph: update_user(User, *Err) error{InvalidInput}!<...> validate_user([]const u8, *Err) error{InvalidInput}!<...> Where `validate_user` would like, besides only the error, signal the invalid field. And `update_user`, besides the error, would signal the offending user id. We also don't want the low-level functions to know in which context they are operating to construct a meaningful error message: if validation fails, they append their "context" to the buffer. To translate/augment the Go example above: pub fn validate_user(err_ctx: *Err, user: User) error{InvalidInput}!void { const name = user.name; if (!ascii.isAlpha(name)) { err_ctx.print("name '{s}' must be ascii-letters only", .{name}); return error.InvalidInput; } <...> } // update_user validates each user and does something with it. pub fn update_user(err_ctx: *Err, user: User) error{InvalidInput}!void { // validate the user before updating it validate_user(user) catch { err_ctx.print("user id={d}", .{user.id}); return error.InvalidInput; }; <...> } Then the top-level function (in my case, CLI) will read the buffer backwards (splitting on `"\x00"`) and print: user id=123: name 'Žvangalas' must be ascii-letters only To read that buffer backwards, dear readers of this commit message, I need `mem.splitBackwards`. --- lib/std/mem.zig | 165 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 146 insertions(+), 19 deletions(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 046339b6fcc7..f54610f06091 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -1707,23 +1707,32 @@ pub fn split(comptime T: type, buffer: []const T, delimiter: []const T) SplitIte test "split" { var it = split(u8, "abc|def||ghi", "|"); - try testing.expect(eql(u8, it.next().?, "abc")); - try testing.expect(eql(u8, it.next().?, "def")); - try testing.expect(eql(u8, it.next().?, "")); - try testing.expect(eql(u8, it.next().?, "ghi")); + try testing.expectEqualSlices(u8, it.rest(), "abc|def||ghi"); + try testing.expectEqualSlices(u8, it.next().?, "abc"); + + try testing.expectEqualSlices(u8, it.rest(), "def||ghi"); + try testing.expectEqualSlices(u8, it.next().?, "def"); + + try testing.expectEqualSlices(u8, it.rest(), "|ghi"); + try testing.expectEqualSlices(u8, it.next().?, ""); + + try testing.expectEqualSlices(u8, it.rest(), "ghi"); + try testing.expectEqualSlices(u8, it.next().?, "ghi"); + + try testing.expectEqualSlices(u8, it.rest(), ""); try testing.expect(it.next() == null); it = split(u8, "", "|"); - try testing.expect(eql(u8, it.next().?, "")); + try testing.expectEqualSlices(u8, it.next().?, ""); try testing.expect(it.next() == null); it = split(u8, "|", "|"); - try testing.expect(eql(u8, it.next().?, "")); - try testing.expect(eql(u8, it.next().?, "")); + try testing.expectEqualSlices(u8, it.next().?, ""); + try testing.expectEqualSlices(u8, it.next().?, ""); try testing.expect(it.next() == null); it = split(u8, "hello", " "); - try testing.expect(eql(u8, it.next().?, "hello")); + try testing.expectEqualSlices(u8, it.next().?, "hello"); try testing.expect(it.next() == null); var it16 = split( @@ -1731,17 +1740,18 @@ test "split" { std.unicode.utf8ToUtf16LeStringLiteral("hello"), std.unicode.utf8ToUtf16LeStringLiteral(" "), ); - try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("hello"))); + try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("hello")); try testing.expect(it16.next() == null); } test "split (multibyte)" { var it = split(u8, "a, b ,, c, d, e", ", "); - try testing.expect(eql(u8, it.next().?, "a")); - try testing.expect(eql(u8, it.next().?, "b ,")); - try testing.expect(eql(u8, it.next().?, "c")); - try testing.expect(eql(u8, it.next().?, "d")); - try testing.expect(eql(u8, it.next().?, "e")); + try testing.expectEqualSlices(u8, it.next().?, "a"); + try testing.expectEqualSlices(u8, it.rest(), "b ,, c, d, e"); + try testing.expectEqualSlices(u8, it.next().?, "b ,"); + try testing.expectEqualSlices(u8, it.next().?, "c"); + try testing.expectEqualSlices(u8, it.next().?, "d"); + try testing.expectEqualSlices(u8, it.next().?, "e"); try testing.expect(it.next() == null); var it16 = split( @@ -1749,11 +1759,99 @@ test "split (multibyte)" { std.unicode.utf8ToUtf16LeStringLiteral("a, b ,, c, d, e"), std.unicode.utf8ToUtf16LeStringLiteral(", "), ); - try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("a"))); - try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("b ,"))); - try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("c"))); - try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("d"))); - try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("e"))); + try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("a")); + try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("b ,")); + try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("c")); + try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("d")); + try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("e")); + try testing.expect(it16.next() == null); +} + +/// Returns an iterator that iterates backwards over the slices of `buffer` +/// that are separated by bytes in `delimiter`. +/// splitBackwards(u8, "abc|def||ghi", "|") +/// will return slices for "ghi", "", "def", "abc", null, in that order. +/// If `delimiter` does not exist in buffer, +/// the iterator will return `buffer`, null, in that order. +/// The delimiter length must not be zero. +pub fn splitBackwards(comptime T: type, buffer: []const T, delimiter: []const T) SplitBackwardsIterator(T) { + assert(delimiter.len != 0); + return SplitBackwardsIterator(T){ + .index = buffer.len, + .buffer = buffer, + .delimiter = delimiter, + }; +} + +test "splitBackwards" { + var it = splitBackwards(u8, "abc|def||ghi", "|"); + try testing.expectEqualSlices(u8, it.rest(), "abc|def||ghi"); + try testing.expectEqualSlices(u8, it.next().?, "ghi"); + + try testing.expectEqualSlices(u8, it.rest(), "abc|def|"); + try testing.expectEqualSlices(u8, it.next().?, ""); + + try testing.expectEqualSlices(u8, it.rest(), "abc|def"); + try testing.expectEqualSlices(u8, it.next().?, "def"); + + try testing.expectEqualSlices(u8, it.rest(), "abc"); + try testing.expectEqualSlices(u8, it.next().?, "abc"); + + try testing.expectEqualSlices(u8, it.rest(), ""); + try testing.expect(it.next() == null); + + it = splitBackwards(u8, "", "|"); + try testing.expectEqualSlices(u8, it.next().?, ""); + try testing.expect(it.next() == null); + + it = splitBackwards(u8, "|", "|"); + try testing.expectEqualSlices(u8, it.next().?, ""); + try testing.expectEqualSlices(u8, it.next().?, ""); + try testing.expect(it.next() == null); + + it = splitBackwards(u8, "hello", " "); + try testing.expectEqualSlices(u8, it.next().?, "hello"); + try testing.expect(it.next() == null); + + var it16 = splitBackwards( + u16, + std.unicode.utf8ToUtf16LeStringLiteral("hello"), + std.unicode.utf8ToUtf16LeStringLiteral(" "), + ); + try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("hello")); + try testing.expect(it16.next() == null); +} + +test "splitBackwards (multibyte)" { + var it = splitBackwards(u8, "a, b ,, c, d, e", ", "); + try testing.expectEqualSlices(u8, it.rest(), "a, b ,, c, d, e"); + try testing.expectEqualSlices(u8, it.next().?, "e"); + + try testing.expectEqualSlices(u8, it.rest(), "a, b ,, c, d"); + try testing.expectEqualSlices(u8, it.next().?, "d"); + + try testing.expectEqualSlices(u8, it.rest(), "a, b ,, c"); + try testing.expectEqualSlices(u8, it.next().?, "c"); + + try testing.expectEqualSlices(u8, it.rest(), "a, b ,"); + try testing.expectEqualSlices(u8, it.next().?, "b ,"); + + try testing.expectEqualSlices(u8, it.rest(), "a"); + try testing.expectEqualSlices(u8, it.next().?, "a"); + + try testing.expectEqualSlices(u8, it.rest(), ""); + try testing.expect(it.next() == null); + + var it16 = splitBackwards( + u16, + std.unicode.utf8ToUtf16LeStringLiteral("a, b ,, c, d, e"), + std.unicode.utf8ToUtf16LeStringLiteral(", "), + ); + try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("e")); + try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("d")); + try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("c")); + try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("b ,")); + try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("a")); try testing.expect(it16.next() == null); } @@ -1862,6 +1960,35 @@ pub fn SplitIterator(comptime T: type) type { }; } +pub fn SplitBackwardsIterator(comptime T: type) type { + return struct { + buffer: []const T, + index: ?usize, + delimiter: []const T, + + const Self = @This(); + + /// Returns a slice of the next field, or null if splitting is complete. + pub fn next(self: *Self) ?[]const T { + const end = self.index orelse return null; + const start = if (lastIndexOf(T, self.buffer[0..end], self.delimiter)) |delim_start| blk: { + self.index = delim_start; + break :blk delim_start + self.delimiter.len; + } else blk: { + self.index = null; + break :blk 0; + }; + return self.buffer[start..end]; + } + + /// Returns a slice of the remaining bytes. Does not affect iterator state. + pub fn rest(self: Self) []const T { + const end = self.index orelse 0; + return self.buffer[0..end]; + } + }; +} + /// Naively combines a series of slices with a separator. /// Allocates memory for the result, which must be freed by the caller. pub fn join(allocator: Allocator, separator: []const u8, slices: []const []const u8) ![]u8 {