From bc5c4677558799696251b489d318847d92186aad Mon Sep 17 00:00:00 2001 From: Jens Goldberg Date: Thu, 30 Dec 2021 15:12:20 +0000 Subject: [PATCH 1/5] Support parsing tz timezone data --- lib/std/tz.zig | 218 +++++++++++++++++++++++++++++++++++++ lib/std/tz/asia_tokyo.tzif | Bin 0 -> 537 bytes 2 files changed, 218 insertions(+) create mode 100644 lib/std/tz.zig create mode 100644 lib/std/tz/asia_tokyo.tzif diff --git a/lib/std/tz.zig b/lib/std/tz.zig new file mode 100644 index 000000000000..2680a8d48c23 --- /dev/null +++ b/lib/std/tz.zig @@ -0,0 +1,218 @@ +const std = @import("std.zig"); +const builtin = @import("builtin"); + +pub const Transition = struct { + ts: i64, + timetype: *Timetype, +}; + +pub const Timetype = struct { + offset: i32, + flags: u8, + name_data: [6:0]u8, + + pub fn name(self: Timetype) [:0]const u8 { + return std.mem.sliceTo(self.name_data[0..], 0); + } + + pub fn isDst(self: Timetype) bool { + return (self.flags & 0x01) > 0; + } + + pub fn standardTimeIndicator(self: Timetype) bool { + return (self.flags & 0x02) > 0; + } + + pub fn utIndicator(self: Timetype) bool { + return (self.flags & 0x04) > 0; + } +}; + +pub const Leapsecond = struct { + occurrence: i48, + correction: i16, +}; + +pub const Tz = struct { + allocator: std.mem.Allocator, + transitions: []const Transition, + timetypes: []const Timetype, + leapseconds: []const Leapsecond, + footer: []const u8, + + pub fn parse(allocator: std.mem.Allocator, data: []const u8) !Tz { + const header_size = 4 + 1 + 15 + 6 * 4; + if (data.len < header_size) return error.BadSize; + + const magic_l = data[0..4]; + const version_l = data[4]; + if (!std.mem.eql(u8, magic_l, "TZif")) return error.BadHeader; + if (version_l != '2' and version_l != '3') return error.BadVersion; + + // Parse the legacy header and skip the entire thing + const isutcnt_l = std.mem.readIntBig(u32, data[20..24]); + const isstdcnt_l = std.mem.readIntBig(u32, data[24..28]); + const leapcnt_l = std.mem.readIntBig(u32, data[28..32]); + const timecnt_l = std.mem.readIntBig(u32, data[32..36]); + const typecnt_l = std.mem.readIntBig(u32, data[36..40]); + const charcnt_l = std.mem.readIntBig(u32, data[40..44]); + const data_block_size_legacy = timecnt_l * 5 + typecnt_l * 6 + charcnt_l + leapcnt_l * 8 + isstdcnt_l + isutcnt_l; + if (data.len < header_size + data_block_size_legacy) return error.BadSize; + + const data2 = data[header_size + data_block_size_legacy ..]; + if (data2.len < header_size) return error.BadSize; + + const magic = data2[0..4]; + const version = data2[4]; + if (!std.mem.eql(u8, magic, "TZif")) return error.BadHeader; + if (version != '2' and version != '3') return error.BadVersion; + + const isutcnt = std.mem.readIntBig(u32, data2[20..24]); + const isstdcnt = std.mem.readIntBig(u32, data2[24..28]); + const leapcnt = std.mem.readIntBig(u32, data2[28..32]); + const timecnt = std.mem.readIntBig(u32, data2[32..36]); + const typecnt = std.mem.readIntBig(u32, data2[36..40]); + const charcnt = std.mem.readIntBig(u32, data2[40..44]); + + if (isstdcnt != 0 and isstdcnt != typecnt) return error.Malformed; // rfc8536: isstdcnt [...] MUST either be zero or equal to "typecnt" + if (isutcnt != 0 and isutcnt != typecnt) return error.Malformed; // rfc8536: isutcnt [...] MUST either be zero or equal to "typecnt" + if (typecnt == 0) return error.Malformed; // rfc8536: typecnt [...] MUST NOT be zero + if (charcnt == 0) return error.Malformed; // rfc8536: charcnt [...] MUST NOT be zero + + const data_block_size = timecnt * 9 + typecnt * 6 + charcnt + leapcnt * 12 + isstdcnt + isutcnt; + if (data2.len < header_size + data_block_size) return error.BadSize; + + var leapseconds = try allocator.alloc(Leapsecond, leapcnt); + errdefer allocator.free(leapseconds); + var transitions = try allocator.alloc(Transition, timecnt); + errdefer allocator.free(transitions); + var timetypes = try allocator.alloc(Timetype, typecnt); + errdefer allocator.free(timetypes); + + var p: usize = header_size; + + // First, parse timezone designators ahead of time so that we can reject malformed files early + const designators = data2[header_size + timecnt * 9 + typecnt * 6 .. header_size + timecnt * 9 + typecnt * 6 + charcnt]; + if (designators[designators.len - 1] != 0) return error.Malformed; // rfc8536: charcnt [...] includes the trailing NUL (0x00) octet + + // Parse transition types + var i: usize = 0; + while (i < timecnt) : (i += 1) { + transitions[i].ts = std.mem.readIntSliceBig(i64, data2[p .. p + 8]); + p += 8; + } + + i = 0; + while (i < timecnt) : (i += 1) { + const tt = data2[p]; + if (tt >= timetypes.len) return error.Malformed; // rfc8536: Each type index MUST be in the range [0, "typecnt" - 1] + transitions[i].timetype = &timetypes[tt]; + p += 1; + } + + // Parse time types + i = 0; + while (i < typecnt) : (i += 1) { + const offset = std.mem.readIntSliceBig(i32, data2[p .. p + 4]); + if (offset < -2147483648) return error.Malformed; // rfc8536: utoff [...] MUST NOT be -2**31 + const dst = data2[p + 4]; + if (dst != 0 and dst != 1) return error.Malformed; // rfc8536: (is)dst [...] The value MUST be 0 or 1. + const idx = data2[p + 5]; + if (idx > designators.len - 1) return error.Malformed; // rfc8536: (desig)idx [...] Each index MUST be in the range [0, "charcnt" - 1] + + const name = std.mem.sliceTo(designators[idx..], 0); + + // We are mandating the "SHOULD" 6-character limit so we can pack the struct better, and to conform to POSIX. + if (name.len > 6) return error.Malformed; // rfc8536: Time zone designations SHOULD consist of at least three (3) and no more than six (6) ASCII characters. + + timetypes[i] = .{ + .offset = offset, + .flags = dst, + .name_data = undefined, + }; + + std.mem.copy(u8, timetypes[i].name_data[0..], name); + timetypes[i].name_data[name.len] = 0; + + p += 6; + } + + // Skip the designators we got earlier + p += charcnt; + + // Parse leap seconds + i = 0; + while (i < leapcnt) : (i += 1) { + const occur = std.mem.readIntSliceBig(i64, data2[p .. p + 8]); + if (occur < 0) return error.Malformed; // rfc8536: occur [...] MUST be nonnegative + if (i > 0 and leapseconds[i - 1].occurrence + 2419199 > occur) return error.Malformed; // rfc8536: occur [...] each later value MUST be at least 2419199 greater than the previous value + if (occur > std.math.maxInt(i48)) return error.Malformed; // Unreasonably far into the future + + const corr = std.mem.readIntSliceBig(i32, data2[p + 8 .. p + 12]); + if (i == 0 and corr != -1 and corr != 1) return error.Malformed; // rfc8536: The correction value in the first leap-second record, if present, MUST be either one (1) or minus one (-1) + if (i > 0 and leapseconds[i - 1].correction != corr + 1 and leapseconds[i - 1].correction != corr - 1) return error.Malformed; // rfc8536: The correction values in adjacent leap- second records MUST differ by exactly one (1) + if (corr > std.math.maxInt(i16)) return error.Malformed; // Unreasonably large correction + + leapseconds[i] = .{ + .occurrence = @intCast(i48, occur), + .correction = @intCast(i16, corr), + }; + p += 12; + } + + // Parse standard/wall indicators + i = 0; + while (i < isstdcnt) : (i += 1) { + const stdtime = data2[p]; + if (stdtime == 1) { + timetypes[i].flags |= 0x02; + } + p += 1; + } + + // Parse UT/local indicators + i = 0; + while (i < isutcnt) : (i += 1) { + const ut = data2[p]; + if (ut == 1) { + timetypes[i].flags |= 0x04; + if (!timetypes[i].standardTimeIndicator()) return error.Malformed; // rfc8536: standard/wall value MUST be one (1) if the UT/local value is one (1) + } + p += 1; + } + + // Footer + if (data2[p..].len < 2) return error.Malformed; // rfc8536 requires at least 2 newlines + if (data2[p] != '\n') return error.Malformed; // Not a rfc8536 footer + const footer_end = std.mem.indexOfScalar(u8, data2[p + 1 ..], '\n') orelse return error.Malformed; // No 2nd rfc8536 newline + const footer = try allocator.dupe(u8, data2[p + 1 .. p + 1 + footer_end]); + errdefer allocator.free(footer); + + return Tz{ + .allocator = allocator, + .transitions = transitions, + .timetypes = timetypes, + .leapseconds = leapseconds, + .footer = footer, + }; + } + + pub fn deinit(self: *Tz) void { + self.allocator.free(self.footer); + self.allocator.free(self.leapseconds); + self.allocator.free(self.transitions); + self.allocator.free(self.timetypes); + } +}; + +test "parse" { + // Asia/Tokyo is good for embedding, as Japan only had DST for a short while during the US occupation + const data = @embedFile("tz/asia_tokyo.tzif"); + var tz = try Tz.parse(std.testing.allocator, data); + defer tz.deinit(); + + try std.testing.expectEqual(tz.transitions.len, 9); + try std.testing.expect(std.mem.eql(u8, tz.transitions[3].timetype.name(), "JDT")); + try std.testing.expectEqual(tz.transitions[5].ts, -620298000); // 1950-05-06 15:00:00 (UTC) + try std.testing.expectEqual(tz.leapseconds[13].occurrence, 567993613); // 1988-01-01 00:00:13 (IAT) +} diff --git a/lib/std/tz/asia_tokyo.tzif b/lib/std/tz/asia_tokyo.tzif new file mode 100644 index 0000000000000000000000000000000000000000..15d780f7a80124f35f91b495e915f7098b3fd268 GIT binary patch literal 537 zcmWHE%1kq2AP5+NDnJ+nLhx&m1}fnMVrC%b`40rChn5t8=<9Y&5c+N82N3_pk6&Q^ z&9l~E{%v*!2)$MoOy7C-2u$DY`Tv255egX?nn4CL^ekXxVPL46z`)_-8^Yk_0>r@~ zAPEMRO%Wgp>{`|(Z$LB?NPxXfz6nTyb#tZ%gJ_@$3=G^JZ7o2G6~yM%=57U2Y#=s2 zk3EQH2eAd-7PSK@4iH=D+%gai@*;!i{8ODkiVMURZ}@O1)^5(Kezj5I;C5QwcO95V?>34_=M z&s9LQ2#9UG!($4N5(Tl%dIUhU7>Mn>U3waj5(lw8W7wwyDG3nU|NUeTEeT?WPS%|X Vq@+OXhzz$`KuVem7?8S_TmWhRZW{mq literal 0 HcmV?d00001 From e4672c95f116eefd0a87a1f857062017e6a7fd97 Mon Sep 17 00:00:00 2001 From: Jens Goldberg Date: Thu, 30 Dec 2021 18:20:29 +0000 Subject: [PATCH 2/5] Actually expose the tz file --- lib/std/std.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/std/std.zig b/lib/std/std.zig index f94185f2fb29..5773cc646c60 100644 --- a/lib/std/std.zig +++ b/lib/std/std.zig @@ -81,6 +81,7 @@ pub const sort = @import("sort.zig"); pub const ascii = @import("ascii.zig"); pub const testing = @import("testing.zig"); pub const time = @import("time.zig"); +pub const tz = @import("tz.zig"); pub const unicode = @import("unicode.zig"); pub const valgrind = @import("valgrind.zig"); pub const wasm = @import("wasm.zig"); From 9a564356661d31bc7fad5b670bad8ebeecc5dad4 Mon Sep 17 00:00:00 2001 From: Jens Goldberg Date: Fri, 31 Dec 2021 17:17:49 +0000 Subject: [PATCH 3/5] tz parsing reader interface, test thicc files, and exclude tzif --- build.zig | 1 + lib/std/std.zig | 2 +- lib/std/tz.zig | 208 +++++++++++++++++-------------- lib/std/tz/antarctica_davis.tzif | Bin 0 -> 837 bytes 4 files changed, 114 insertions(+), 97 deletions(-) create mode 100644 lib/std/tz/antarctica_davis.tzif diff --git a/build.zig b/build.zig index 700cf403bc96..a11460962826 100644 --- a/build.zig +++ b/build.zig @@ -98,6 +98,7 @@ pub fn build(b: *Builder) !void { ".z.9", ".gz", "rfc1951.txt", + ".tzif", }, .blank_extensions = &[_][]const u8{ "test.zig", diff --git a/lib/std/std.zig b/lib/std/std.zig index 5773cc646c60..5ae09a7f5b69 100644 --- a/lib/std/std.zig +++ b/lib/std/std.zig @@ -39,6 +39,7 @@ pub const StringArrayHashMapUnmanaged = array_hash_map.StringArrayHashMapUnmanag pub const TailQueue = @import("linked_list.zig").TailQueue; pub const Target = @import("target.zig").Target; pub const Thread = @import("Thread.zig"); +pub const Tz = @import("tz.zig").Tz; pub const array_hash_map = @import("array_hash_map.zig"); pub const atomic = @import("atomic.zig"); @@ -81,7 +82,6 @@ pub const sort = @import("sort.zig"); pub const ascii = @import("ascii.zig"); pub const testing = @import("testing.zig"); pub const time = @import("time.zig"); -pub const tz = @import("tz.zig"); pub const unicode = @import("unicode.zig"); pub const valgrind = @import("valgrind.zig"); pub const wasm = @import("wasm.zig"); diff --git a/lib/std/tz.zig b/lib/std/tz.zig index 2680a8d48c23..528150d1a2f3 100644 --- a/lib/std/tz.zig +++ b/lib/std/tz.zig @@ -40,160 +40,163 @@ pub const Tz = struct { leapseconds: []const Leapsecond, footer: []const u8, - pub fn parse(allocator: std.mem.Allocator, data: []const u8) !Tz { - const header_size = 4 + 1 + 15 + 6 * 4; - if (data.len < header_size) return error.BadSize; - - const magic_l = data[0..4]; - const version_l = data[4]; - if (!std.mem.eql(u8, magic_l, "TZif")) return error.BadHeader; - if (version_l != '2' and version_l != '3') return error.BadVersion; - - // Parse the legacy header and skip the entire thing - const isutcnt_l = std.mem.readIntBig(u32, data[20..24]); - const isstdcnt_l = std.mem.readIntBig(u32, data[24..28]); - const leapcnt_l = std.mem.readIntBig(u32, data[28..32]); - const timecnt_l = std.mem.readIntBig(u32, data[32..36]); - const typecnt_l = std.mem.readIntBig(u32, data[36..40]); - const charcnt_l = std.mem.readIntBig(u32, data[40..44]); - const data_block_size_legacy = timecnt_l * 5 + typecnt_l * 6 + charcnt_l + leapcnt_l * 8 + isstdcnt_l + isutcnt_l; - if (data.len < header_size + data_block_size_legacy) return error.BadSize; - - const data2 = data[header_size + data_block_size_legacy ..]; - if (data2.len < header_size) return error.BadSize; - - const magic = data2[0..4]; - const version = data2[4]; - if (!std.mem.eql(u8, magic, "TZif")) return error.BadHeader; - if (version != '2' and version != '3') return error.BadVersion; - - const isutcnt = std.mem.readIntBig(u32, data2[20..24]); - const isstdcnt = std.mem.readIntBig(u32, data2[24..28]); - const leapcnt = std.mem.readIntBig(u32, data2[28..32]); - const timecnt = std.mem.readIntBig(u32, data2[32..36]); - const typecnt = std.mem.readIntBig(u32, data2[36..40]); - const charcnt = std.mem.readIntBig(u32, data2[40..44]); - - if (isstdcnt != 0 and isstdcnt != typecnt) return error.Malformed; // rfc8536: isstdcnt [...] MUST either be zero or equal to "typecnt" - if (isutcnt != 0 and isutcnt != typecnt) return error.Malformed; // rfc8536: isutcnt [...] MUST either be zero or equal to "typecnt" - if (typecnt == 0) return error.Malformed; // rfc8536: typecnt [...] MUST NOT be zero - if (charcnt == 0) return error.Malformed; // rfc8536: charcnt [...] MUST NOT be zero - - const data_block_size = timecnt * 9 + typecnt * 6 + charcnt + leapcnt * 12 + isstdcnt + isutcnt; - if (data2.len < header_size + data_block_size) return error.BadSize; - - var leapseconds = try allocator.alloc(Leapsecond, leapcnt); + pub fn parse(allocator: std.mem.Allocator, reader: anytype) !Tz { + _ = allocator; + const Header = extern struct { + magic: [4]u8, + version: u8, + reserved: [15]u8, + }; + + const Counts = extern struct { + isutcnt: u32, + isstdcnt: u32, + leapcnt: u32, + timecnt: u32, + typecnt: u32, + charcnt: u32, + }; + + // Parse and skip the legacy header and data + { + const header = try reader.readStruct(Header); + if (!std.mem.eql(u8, &header.magic, "TZif")) return error.BadHeader; + if (header.version == 0) return error.UnsupportedLegacyFormat; + if (header.version != '2' and header.version != '3') return error.BadVersion; + + var counts = try reader.readStruct(Counts); + if (builtin.target.cpu.arch.endian() != std.builtin.Endian.Big) { + std.mem.bswapAllFields(Counts, &counts); + } + + const skipv = counts.timecnt * 5 + counts.typecnt * 6 + counts.charcnt + counts.leapcnt * 8 + counts.isstdcnt + counts.isutcnt; + try reader.skipBytes(skipv, .{}); + } + + const header = try reader.readStruct(Header); + if (!std.mem.eql(u8, &header.magic, "TZif")) return error.BadHeader; + if (header.version != '2' and header.version != '3') return error.BadVersion; + + var counts = try reader.readStruct(Counts); + if (builtin.target.cpu.arch.endian() != std.builtin.Endian.Big) { + std.mem.bswapAllFields(Counts, &counts); + } + + if (counts.isstdcnt != 0 and counts.isstdcnt != counts.typecnt) return error.Malformed; // rfc8536: isstdcnt [...] MUST either be zero or equal to "typecnt" + if (counts.isutcnt != 0 and counts.isutcnt != counts.typecnt) return error.Malformed; // rfc8536: isutcnt [...] MUST either be zero or equal to "typecnt" + if (counts.typecnt == 0) return error.Malformed; // rfc8536: typecnt [...] MUST NOT be zero + if (counts.charcnt == 0) return error.Malformed; // rfc8536: charcnt [...] MUST NOT be zero + if (counts.charcnt > 256 + 6) return error.Malformed; // Not explicitly banned by rfc8536 but nonsensical + + var leapseconds = try allocator.alloc(Leapsecond, counts.leapcnt); errdefer allocator.free(leapseconds); - var transitions = try allocator.alloc(Transition, timecnt); + var transitions = try allocator.alloc(Transition, counts.timecnt); errdefer allocator.free(transitions); - var timetypes = try allocator.alloc(Timetype, typecnt); + var timetypes = try allocator.alloc(Timetype, counts.typecnt); errdefer allocator.free(timetypes); - var p: usize = header_size; - - // First, parse timezone designators ahead of time so that we can reject malformed files early - const designators = data2[header_size + timecnt * 9 + typecnt * 6 .. header_size + timecnt * 9 + typecnt * 6 + charcnt]; - if (designators[designators.len - 1] != 0) return error.Malformed; // rfc8536: charcnt [...] includes the trailing NUL (0x00) octet - // Parse transition types var i: usize = 0; - while (i < timecnt) : (i += 1) { - transitions[i].ts = std.mem.readIntSliceBig(i64, data2[p .. p + 8]); - p += 8; + while (i < counts.timecnt) : (i += 1) { + transitions[i].ts = try reader.readIntBig(i64); } i = 0; - while (i < timecnt) : (i += 1) { - const tt = data2[p]; + while (i < counts.timecnt) : (i += 1) { + const tt = try reader.readByte(); if (tt >= timetypes.len) return error.Malformed; // rfc8536: Each type index MUST be in the range [0, "typecnt" - 1] transitions[i].timetype = &timetypes[tt]; - p += 1; } // Parse time types i = 0; - while (i < typecnt) : (i += 1) { - const offset = std.mem.readIntSliceBig(i32, data2[p .. p + 4]); + while (i < counts.typecnt) : (i += 1) { + const offset = try reader.readIntBig(i32); if (offset < -2147483648) return error.Malformed; // rfc8536: utoff [...] MUST NOT be -2**31 - const dst = data2[p + 4]; + const dst = try reader.readByte(); if (dst != 0 and dst != 1) return error.Malformed; // rfc8536: (is)dst [...] The value MUST be 0 or 1. - const idx = data2[p + 5]; - if (idx > designators.len - 1) return error.Malformed; // rfc8536: (desig)idx [...] Each index MUST be in the range [0, "charcnt" - 1] - - const name = std.mem.sliceTo(designators[idx..], 0); - - // We are mandating the "SHOULD" 6-character limit so we can pack the struct better, and to conform to POSIX. - if (name.len > 6) return error.Malformed; // rfc8536: Time zone designations SHOULD consist of at least three (3) and no more than six (6) ASCII characters. - + const idx = try reader.readByte(); + if (idx > counts.charcnt - 1) return error.Malformed; // rfc8536: (desig)idx [...] Each index MUST be in the range [0, "charcnt" - 1] timetypes[i] = .{ .offset = offset, .flags = dst, .name_data = undefined, }; - std.mem.copy(u8, timetypes[i].name_data[0..], name); - timetypes[i].name_data[name.len] = 0; - - p += 6; + // Temporarily cache idx in name_data to be processed after we've read the designator names below + timetypes[i].name_data[0] = idx; } - // Skip the designators we got earlier - p += charcnt; + var designators_data: [256 + 6]u8 = undefined; + try reader.readNoEof(designators_data[0..counts.charcnt]); + const designators = designators_data[0..counts.charcnt]; + if (designators[designators.len - 1] != 0) return error.Malformed; // rfc8536: charcnt [...] includes the trailing NUL (0x00) octet + + // Iterate through the timetypes again, setting the designator names + for (timetypes) |*tt| { + const name = std.mem.sliceTo(designators[tt.name_data[0]..], 0); + // We are mandating the "SHOULD" 6-character limit so we can pack the struct better, and to conform to POSIX. + if (name.len > 6) return error.Malformed; // rfc8536: Time zone designations SHOULD consist of at least three (3) and no more than six (6) ASCII characters. + std.mem.copy(u8, tt.name_data[0..], name); + tt.name_data[name.len] = 0; + } // Parse leap seconds i = 0; - while (i < leapcnt) : (i += 1) { - const occur = std.mem.readIntSliceBig(i64, data2[p .. p + 8]); + while (i < counts.leapcnt) : (i += 1) { + const occur = try reader.readIntBig(i64); if (occur < 0) return error.Malformed; // rfc8536: occur [...] MUST be nonnegative if (i > 0 and leapseconds[i - 1].occurrence + 2419199 > occur) return error.Malformed; // rfc8536: occur [...] each later value MUST be at least 2419199 greater than the previous value if (occur > std.math.maxInt(i48)) return error.Malformed; // Unreasonably far into the future - const corr = std.mem.readIntSliceBig(i32, data2[p + 8 .. p + 12]); + const corr = try reader.readIntBig(i32); if (i == 0 and corr != -1 and corr != 1) return error.Malformed; // rfc8536: The correction value in the first leap-second record, if present, MUST be either one (1) or minus one (-1) - if (i > 0 and leapseconds[i - 1].correction != corr + 1 and leapseconds[i - 1].correction != corr - 1) return error.Malformed; // rfc8536: The correction values in adjacent leap- second records MUST differ by exactly one (1) + if (i > 0 and leapseconds[i - 1].correction != corr + 1 and leapseconds[i - 1].correction != corr - 1) return error.Malformed; // rfc8536: The correction values in adjacent leap-second records MUST differ by exactly one (1) if (corr > std.math.maxInt(i16)) return error.Malformed; // Unreasonably large correction leapseconds[i] = .{ .occurrence = @intCast(i48, occur), .correction = @intCast(i16, corr), }; - p += 12; } // Parse standard/wall indicators i = 0; - while (i < isstdcnt) : (i += 1) { - const stdtime = data2[p]; + while (i < counts.isstdcnt) : (i += 1) { + const stdtime = try reader.readByte(); if (stdtime == 1) { timetypes[i].flags |= 0x02; } - p += 1; } // Parse UT/local indicators i = 0; - while (i < isutcnt) : (i += 1) { - const ut = data2[p]; + while (i < counts.isutcnt) : (i += 1) { + const ut = try reader.readByte(); if (ut == 1) { timetypes[i].flags |= 0x04; if (!timetypes[i].standardTimeIndicator()) return error.Malformed; // rfc8536: standard/wall value MUST be one (1) if the UT/local value is one (1) } - p += 1; } + if ((try reader.readByte()) != '\n') return error.Malformed; // An rfc8536 footer must start with a newline + // Footer - if (data2[p..].len < 2) return error.Malformed; // rfc8536 requires at least 2 newlines - if (data2[p] != '\n') return error.Malformed; // Not a rfc8536 footer - const footer_end = std.mem.indexOfScalar(u8, data2[p + 1 ..], '\n') orelse return error.Malformed; // No 2nd rfc8536 newline - const footer = try allocator.dupe(u8, data2[p + 1 .. p + 1 + footer_end]); - errdefer allocator.free(footer); + var footerdata_buf: [128]u8 = undefined; + const footer = reader.readUntilDelimiter(&footerdata_buf, '\n') catch |err| switch (err) { + error.StreamTooLong => return error.OverlargeFooter, // Read more than 128 bytes, much larger than any reasonable POSIX TZ string + else => return err, + }; + + const footer_dup = try allocator.dupe(u8, footer); + errdefer allocator.free(footer_dup); return Tz{ .allocator = allocator, .transitions = transitions, .timetypes = timetypes, .leapseconds = leapseconds, - .footer = footer, + .footer = footer_dup, }; } @@ -205,14 +208,27 @@ pub const Tz = struct { } }; -test "parse" { - // Asia/Tokyo is good for embedding, as Japan only had DST for a short while during the US occupation +test "slim" { const data = @embedFile("tz/asia_tokyo.tzif"); - var tz = try Tz.parse(std.testing.allocator, data); + var in_stream = std.io.fixedBufferStream(data); + + var tz = try std.Tz.parse(std.testing.allocator, in_stream.reader()); defer tz.deinit(); try std.testing.expectEqual(tz.transitions.len, 9); try std.testing.expect(std.mem.eql(u8, tz.transitions[3].timetype.name(), "JDT")); - try std.testing.expectEqual(tz.transitions[5].ts, -620298000); // 1950-05-06 15:00:00 (UTC) - try std.testing.expectEqual(tz.leapseconds[13].occurrence, 567993613); // 1988-01-01 00:00:13 (IAT) + try std.testing.expectEqual(tz.transitions[5].ts, -620298000); // 1950-05-06 15:00:00 UTC + try std.testing.expectEqual(tz.leapseconds[13].occurrence, 567993613); // 1988-01-01 00:00:00 UTC (+23s in TAI, and +13 in the data since it doesn't store the initial 10 second offset) +} + +test "fat" { + const data = @embedFile("tz/antarctica_davis.tzif"); + var in_stream = std.io.fixedBufferStream(data); + + var tz = try std.Tz.parse(std.testing.allocator, in_stream.reader()); + defer tz.deinit(); + + try std.testing.expectEqual(tz.transitions.len, 8); + try std.testing.expect(std.mem.eql(u8, tz.transitions[3].timetype.name(), "+05")); + try std.testing.expectEqual(tz.transitions[4].ts, 1268251224); // 2010-03-10 20:00:00 UTC } diff --git a/lib/std/tz/antarctica_davis.tzif b/lib/std/tz/antarctica_davis.tzif new file mode 100644 index 0000000000000000000000000000000000000000..662aa0098a8284e11dfee3ccd44dcaf8dcba34e9 GIT binary patch literal 837 zcmb7?y-Nad9Eb0G_tZS!Qj>@vi8A9d$|Wig^eRXp0|_UGUbg5TFrthoA~KSqmnDK~ zYpK7XA!rMirVyfNs-VH^`-#3wO&$Dr9^~%O&trCKZaOCXxeW9PPKb`>$L+O)Vc|XX zAlFkD!f4g2j#WN$<7ek;I`MH|)HF>LM4euz*VFyqS79FSNM`T`cvMk@h!UsMCm1es zxJNXeJc~q~xzw^m)LUmJ@VvpMmX?V|N5r;5G&$`Sy&}n+_$IrURK}n%^Fei zFV-cZEm-JTC)%6CPj;fCwfg8FHiVB3H;K;n;+>NycV^zUh_0?!hl}X$_T;yT9;Mdi zA$ohS)g7X*e?#^W8xvQdU7~*^Z>|sn>DO|A7@RCe_lQlo+2lSEd*Szp*?;Ux(`nci z#J(Ul#)X|hYz<;>5SxQVy7xc3;~Cx_&ili;0h~L)xdohiz_|&WyTG{(ocqAJ5u7{0 xxfPsy!MPcnyTQ2~ocqDKA)GtHxh0%?!nrA&yTZ9GocqFmH Date: Fri, 31 Dec 2021 22:57:06 +0000 Subject: [PATCH 4/5] Remove a no-op line that was left in by mistake --- lib/std/tz.zig | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/std/tz.zig b/lib/std/tz.zig index 528150d1a2f3..f1d19601efc4 100644 --- a/lib/std/tz.zig +++ b/lib/std/tz.zig @@ -41,7 +41,6 @@ pub const Tz = struct { footer: []const u8, pub fn parse(allocator: std.mem.Allocator, reader: anytype) !Tz { - _ = allocator; const Header = extern struct { magic: [4]u8, version: u8, From a54788ba7af64e59d3fbaf8951e3164f1347a2f4 Mon Sep 17 00:00:00 2001 From: Jens Goldberg Date: Sat, 1 Jan 2022 12:47:08 +0000 Subject: [PATCH 5/5] Support legacy TZ format, expose header struct to a potential writer --- lib/std/tz.zig | 137 +++++++++++++++++++-------------- lib/std/tz/europe_vatican.tzif | Bin 0 -> 951 bytes 2 files changed, 78 insertions(+), 59 deletions(-) create mode 100644 lib/std/tz/europe_vatican.tzif diff --git a/lib/std/tz.zig b/lib/std/tz.zig index f1d19601efc4..d505a790b72f 100644 --- a/lib/std/tz.zig +++ b/lib/std/tz.zig @@ -38,70 +38,71 @@ pub const Tz = struct { transitions: []const Transition, timetypes: []const Timetype, leapseconds: []const Leapsecond, - footer: []const u8, + footer: ?[]const u8, - pub fn parse(allocator: std.mem.Allocator, reader: anytype) !Tz { - const Header = extern struct { - magic: [4]u8, - version: u8, - reserved: [15]u8, - }; - - const Counts = extern struct { + const Header = extern struct { + magic: [4]u8, + version: u8, + reserved: [15]u8, + counts: extern struct { isutcnt: u32, isstdcnt: u32, leapcnt: u32, timecnt: u32, typecnt: u32, charcnt: u32, - }; + }, + }; - // Parse and skip the legacy header and data - { - const header = try reader.readStruct(Header); - if (!std.mem.eql(u8, &header.magic, "TZif")) return error.BadHeader; - if (header.version == 0) return error.UnsupportedLegacyFormat; - if (header.version != '2' and header.version != '3') return error.BadVersion; + pub fn parse(allocator: std.mem.Allocator, reader: anytype) !Tz { + var legacy_header = try reader.readStruct(Header); + if (!std.mem.eql(u8, &legacy_header.magic, "TZif")) return error.BadHeader; + if (legacy_header.version != 0 and legacy_header.version != '2' and legacy_header.version != '3') return error.BadVersion; - var counts = try reader.readStruct(Counts); - if (builtin.target.cpu.arch.endian() != std.builtin.Endian.Big) { - std.mem.bswapAllFields(Counts, &counts); - } + if (builtin.target.cpu.arch.endian() != std.builtin.Endian.Big) { + std.mem.bswapAllFields(@TypeOf(legacy_header.counts), &legacy_header.counts); + } - const skipv = counts.timecnt * 5 + counts.typecnt * 6 + counts.charcnt + counts.leapcnt * 8 + counts.isstdcnt + counts.isutcnt; + if (legacy_header.version == 0) { + return parseBlock(allocator, reader, legacy_header, true); + } else { + // If the format is modern, just skip over the legacy data + const skipv = legacy_header.counts.timecnt * 5 + legacy_header.counts.typecnt * 6 + legacy_header.counts.charcnt + legacy_header.counts.leapcnt * 8 + legacy_header.counts.isstdcnt + legacy_header.counts.isutcnt; try reader.skipBytes(skipv, .{}); - } - const header = try reader.readStruct(Header); - if (!std.mem.eql(u8, &header.magic, "TZif")) return error.BadHeader; - if (header.version != '2' and header.version != '3') return error.BadVersion; + var header = try reader.readStruct(Header); + if (!std.mem.eql(u8, &header.magic, "TZif")) return error.BadHeader; + if (header.version != '2' and header.version != '3') return error.BadVersion; + if (builtin.target.cpu.arch.endian() != std.builtin.Endian.Big) { + std.mem.bswapAllFields(@TypeOf(header.counts), &header.counts); + } - var counts = try reader.readStruct(Counts); - if (builtin.target.cpu.arch.endian() != std.builtin.Endian.Big) { - std.mem.bswapAllFields(Counts, &counts); + return parseBlock(allocator, reader, header, false); } + } - if (counts.isstdcnt != 0 and counts.isstdcnt != counts.typecnt) return error.Malformed; // rfc8536: isstdcnt [...] MUST either be zero or equal to "typecnt" - if (counts.isutcnt != 0 and counts.isutcnt != counts.typecnt) return error.Malformed; // rfc8536: isutcnt [...] MUST either be zero or equal to "typecnt" - if (counts.typecnt == 0) return error.Malformed; // rfc8536: typecnt [...] MUST NOT be zero - if (counts.charcnt == 0) return error.Malformed; // rfc8536: charcnt [...] MUST NOT be zero - if (counts.charcnt > 256 + 6) return error.Malformed; // Not explicitly banned by rfc8536 but nonsensical + fn parseBlock(allocator: std.mem.Allocator, reader: anytype, header: Header, legacy: bool) !Tz { + if (header.counts.isstdcnt != 0 and header.counts.isstdcnt != header.counts.typecnt) return error.Malformed; // rfc8536: isstdcnt [...] MUST either be zero or equal to "typecnt" + if (header.counts.isutcnt != 0 and header.counts.isutcnt != header.counts.typecnt) return error.Malformed; // rfc8536: isutcnt [...] MUST either be zero or equal to "typecnt" + if (header.counts.typecnt == 0) return error.Malformed; // rfc8536: typecnt [...] MUST NOT be zero + if (header.counts.charcnt == 0) return error.Malformed; // rfc8536: charcnt [...] MUST NOT be zero + if (header.counts.charcnt > 256 + 6) return error.Malformed; // Not explicitly banned by rfc8536 but nonsensical - var leapseconds = try allocator.alloc(Leapsecond, counts.leapcnt); + var leapseconds = try allocator.alloc(Leapsecond, header.counts.leapcnt); errdefer allocator.free(leapseconds); - var transitions = try allocator.alloc(Transition, counts.timecnt); + var transitions = try allocator.alloc(Transition, header.counts.timecnt); errdefer allocator.free(transitions); - var timetypes = try allocator.alloc(Timetype, counts.typecnt); + var timetypes = try allocator.alloc(Timetype, header.counts.typecnt); errdefer allocator.free(timetypes); // Parse transition types var i: usize = 0; - while (i < counts.timecnt) : (i += 1) { - transitions[i].ts = try reader.readIntBig(i64); + while (i < header.counts.timecnt) : (i += 1) { + transitions[i].ts = if (legacy) try reader.readIntBig(i32) else try reader.readIntBig(i64); } i = 0; - while (i < counts.timecnt) : (i += 1) { + while (i < header.counts.timecnt) : (i += 1) { const tt = try reader.readByte(); if (tt >= timetypes.len) return error.Malformed; // rfc8536: Each type index MUST be in the range [0, "typecnt" - 1] transitions[i].timetype = &timetypes[tt]; @@ -109,13 +110,13 @@ pub const Tz = struct { // Parse time types i = 0; - while (i < counts.typecnt) : (i += 1) { + while (i < header.counts.typecnt) : (i += 1) { const offset = try reader.readIntBig(i32); if (offset < -2147483648) return error.Malformed; // rfc8536: utoff [...] MUST NOT be -2**31 const dst = try reader.readByte(); if (dst != 0 and dst != 1) return error.Malformed; // rfc8536: (is)dst [...] The value MUST be 0 or 1. const idx = try reader.readByte(); - if (idx > counts.charcnt - 1) return error.Malformed; // rfc8536: (desig)idx [...] Each index MUST be in the range [0, "charcnt" - 1] + if (idx > header.counts.charcnt - 1) return error.Malformed; // rfc8536: (desig)idx [...] Each index MUST be in the range [0, "charcnt" - 1] timetypes[i] = .{ .offset = offset, .flags = dst, @@ -127,8 +128,8 @@ pub const Tz = struct { } var designators_data: [256 + 6]u8 = undefined; - try reader.readNoEof(designators_data[0..counts.charcnt]); - const designators = designators_data[0..counts.charcnt]; + try reader.readNoEof(designators_data[0..header.counts.charcnt]); + const designators = designators_data[0..header.counts.charcnt]; if (designators[designators.len - 1] != 0) return error.Malformed; // rfc8536: charcnt [...] includes the trailing NUL (0x00) octet // Iterate through the timetypes again, setting the designator names @@ -142,8 +143,8 @@ pub const Tz = struct { // Parse leap seconds i = 0; - while (i < counts.leapcnt) : (i += 1) { - const occur = try reader.readIntBig(i64); + while (i < header.counts.leapcnt) : (i += 1) { + const occur: i64 = if (legacy) try reader.readIntBig(i32) else try reader.readIntBig(i64); if (occur < 0) return error.Malformed; // rfc8536: occur [...] MUST be nonnegative if (i > 0 and leapseconds[i - 1].occurrence + 2419199 > occur) return error.Malformed; // rfc8536: occur [...] each later value MUST be at least 2419199 greater than the previous value if (occur > std.math.maxInt(i48)) return error.Malformed; // Unreasonably far into the future @@ -161,7 +162,7 @@ pub const Tz = struct { // Parse standard/wall indicators i = 0; - while (i < counts.isstdcnt) : (i += 1) { + while (i < header.counts.isstdcnt) : (i += 1) { const stdtime = try reader.readByte(); if (stdtime == 1) { timetypes[i].flags |= 0x02; @@ -170,7 +171,7 @@ pub const Tz = struct { // Parse UT/local indicators i = 0; - while (i < counts.isutcnt) : (i += 1) { + while (i < header.counts.isutcnt) : (i += 1) { const ut = try reader.readByte(); if (ut == 1) { timetypes[i].flags |= 0x04; @@ -178,29 +179,34 @@ pub const Tz = struct { } } - if ((try reader.readByte()) != '\n') return error.Malformed; // An rfc8536 footer must start with a newline - // Footer - var footerdata_buf: [128]u8 = undefined; - const footer = reader.readUntilDelimiter(&footerdata_buf, '\n') catch |err| switch (err) { - error.StreamTooLong => return error.OverlargeFooter, // Read more than 128 bytes, much larger than any reasonable POSIX TZ string - else => return err, - }; - - const footer_dup = try allocator.dupe(u8, footer); - errdefer allocator.free(footer_dup); + var footer: ?[]u8 = null; + if (!legacy) { + if ((try reader.readByte()) != '\n') return error.Malformed; // An rfc8536 footer must start with a newline + var footerdata_buf: [128]u8 = undefined; + const footer_mem = reader.readUntilDelimiter(&footerdata_buf, '\n') catch |err| switch (err) { + error.StreamTooLong => return error.OverlargeFooter, // Read more than 128 bytes, much larger than any reasonable POSIX TZ string + else => return err, + }; + if (footer_mem.len != 0) { + footer = try allocator.dupe(u8, footer_mem); + } + } + errdefer if (footer) |ft| allocator.free(ft); return Tz{ .allocator = allocator, .transitions = transitions, .timetypes = timetypes, .leapseconds = leapseconds, - .footer = footer_dup, + .footer = footer, }; } pub fn deinit(self: *Tz) void { - self.allocator.free(self.footer); + if (self.footer) |footer| { + self.allocator.free(footer); + } self.allocator.free(self.leapseconds); self.allocator.free(self.transitions); self.allocator.free(self.timetypes); @@ -231,3 +237,16 @@ test "fat" { try std.testing.expect(std.mem.eql(u8, tz.transitions[3].timetype.name(), "+05")); try std.testing.expectEqual(tz.transitions[4].ts, 1268251224); // 2010-03-10 20:00:00 UTC } + +test "legacy" { + // Taken from Slackware 8.0, from 2001 + const data = @embedFile("tz/europe_vatican.tzif"); + var in_stream = std.io.fixedBufferStream(data); + + var tz = try std.Tz.parse(std.testing.allocator, in_stream.reader()); + defer tz.deinit(); + + try std.testing.expectEqual(tz.transitions.len, 170); + try std.testing.expect(std.mem.eql(u8, tz.transitions[69].timetype.name(), "CET")); + try std.testing.expectEqual(tz.transitions[123].ts, 1414285200); // 2014-10-26 01:00:00 UTC +} diff --git a/lib/std/tz/europe_vatican.tzif b/lib/std/tz/europe_vatican.tzif new file mode 100644 index 0000000000000000000000000000000000000000..fe55064d1d4788234e64254137fb507d4d68c291 GIT binary patch literal 951 zcmchWeN4=89LImxIVH)bs6&^KROF#&r95=57AFrKlF}jN;he9Nl&3sDt=St*YZVbU zk1;D|g?Y%b>ZBY(bCc)Hz8jX*q1$YR`Pcis*Jtm&_x|^zieHi<{H?B?l?97Vbz;fg z@`Lp}-}smpR95rCvI>4^_cwO}sy!)%;i#PM z?hl|k>m+rZHiF&LIa9r|o;Qs3#r^1dGP<_12j02-LCYpQ>UNOY{L6S-<1JIWte&-B z)|)yyN|`xtm+9NfI{tmjakWr+fr}RGR9h9qGjYTk)@6G-mkir(>Z(1!th=Y0Y!so~ zrX|{BJ7PAotq$RK_94vfv>nWt-T1Y=$MW5z#=-W_)emD2QrQ!<7&XkaA!C~s34RTw*nFeTdG%#Ty4N|Je zJxoD^eS*kC=1D{B`jDqY4$sd4@OtS1Z=*AYR$60N=@`s_WJp;?63U=38tD9n_`V3v5~SFaA=MQ~eUppD zW*wGPWnyWQ3d>H+h3583TD~hBT74|7SnLM2Bal{34