From f0c499593e02c1eee237aeb6edb82d60b744a229 Mon Sep 17 00:00:00 2001 From: yuankunzhang Date: Sun, 21 Sep 2025 19:42:25 +0800 Subject: [PATCH] feat: add support for `TZ="timezone"` date spec --- src/items/builder.rs | 41 +++-- src/items/mod.rs | 86 ++++++++--- src/items/timezone.rs | 340 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 435 insertions(+), 32 deletions(-) create mode 100644 src/items/timezone.rs diff --git a/src/items/builder.rs b/src/items/builder.rs index da74e84..b98db6a 100644 --- a/src/items/builder.rs +++ b/src/items/builder.rs @@ -18,6 +18,7 @@ pub(crate) struct DateTimeBuilder { time: Option, weekday: Option, offset: Option, + timezone: Option, relative: Vec, } @@ -33,6 +34,20 @@ impl DateTimeBuilder { self } + /// Sets the timezone rule for the builder. + /// + /// By default, the builder uses the time zone rules indicated by the `TZ` + /// environment variable, or the system default rules if `TZ` is not set. + /// This method allows overriding the time zone rules. + fn set_timezone(mut self, tz: jiff::tz::TimeZone) -> Result { + if self.timezone.is_some() { + return Err("timezone rule cannot appear more than once"); + } + + self.timezone = Some(tz); + Ok(self) + } + /// Sets a timestamp value. Timestamp values are exclusive to other date/time /// items (date, time, weekday, timezone, relative adjustments). pub(super) fn set_timestamp(mut self, ts: epoch::Timestamp) -> Result { @@ -51,7 +66,7 @@ impl DateTimeBuilder { Ok(self) } - pub(super) fn set_date(mut self, date: date::Date) -> Result { + fn set_date(mut self, date: date::Date) -> Result { if self.timestamp.is_some() { return Err("timestamp cannot be combined with other date/time items"); } else if self.date.is_some() { @@ -62,7 +77,7 @@ impl DateTimeBuilder { Ok(self) } - pub(super) fn set_time(mut self, time: time::Time) -> Result { + fn set_time(mut self, time: time::Time) -> Result { if self.timestamp.is_some() { return Err("timestamp cannot be combined with other date/time items"); } else if self.time.is_some() { @@ -75,7 +90,7 @@ impl DateTimeBuilder { Ok(self) } - pub(super) fn set_weekday(mut self, weekday: weekday::Weekday) -> Result { + fn set_weekday(mut self, weekday: weekday::Weekday) -> Result { if self.timestamp.is_some() { return Err("timestamp cannot be combined with other date/time items"); } else if self.weekday.is_some() { @@ -86,7 +101,7 @@ impl DateTimeBuilder { Ok(self) } - pub(super) fn set_offset(mut self, timezone: offset::Offset) -> Result { + fn set_offset(mut self, timezone: offset::Offset) -> Result { if self.timestamp.is_some() { return Err("timestamp cannot be combined with other date/time items"); } else if self.offset.is_some() @@ -99,10 +114,7 @@ impl DateTimeBuilder { Ok(self) } - pub(super) fn push_relative( - mut self, - relative: relative::Relative, - ) -> Result { + fn push_relative(mut self, relative: relative::Relative) -> Result { if self.timestamp.is_some() { return Err("timestamp cannot be combined with other date/time items"); } @@ -117,7 +129,7 @@ impl DateTimeBuilder { /// If a date is already set but lacks a year, the number is interpreted as /// a year. Otherwise, it's interpreted as a time in HHMM, HMM, HH, or H /// format. - pub(super) fn set_pure(mut self, pure: String) -> Result { + fn set_pure(mut self, pure: String) -> Result { if self.timestamp.is_some() { return Err("timestamp cannot be combined with other date/time items"); } @@ -149,7 +161,11 @@ impl DateTimeBuilder { } pub(super) fn build(self) -> Result { - let base = self.base.unwrap_or(Zoned::now()); + let base = self.base.unwrap_or(if let Some(tz) = &self.timezone { + jiff::Timestamp::now().to_zoned(tz.clone()) + } else { + Zoned::now() + }); // If a timestamp is set, we use it to build the `Zoned` object. if let Some(ts) = self.timestamp { @@ -158,11 +174,11 @@ impl DateTimeBuilder { // If any of the following items are set, we truncate the time portion // of the base date to zero; otherwise, we use the base date as is. - let mut dt = if self.timestamp.is_none() - && self.date.is_none() + let mut dt = if self.date.is_none() && self.time.is_none() && self.weekday.is_none() && self.offset.is_none() + && self.timezone.is_none() { base } else { @@ -264,6 +280,7 @@ impl TryFrom> for DateTimeBuilder { Item::Weekday(weekday) => builder.set_weekday(weekday)?, Item::Offset(offset) => builder.set_offset(offset)?, Item::Relative(rel) => builder.push_relative(rel)?, + Item::TimeZone(tz) => builder.set_timezone(tz)?, Item::Pure(pure) => builder.set_pure(pure)?, } } diff --git a/src/items/mod.rs b/src/items/mod.rs index d6f9cfe..deb790c 100644 --- a/src/items/mod.rs +++ b/src/items/mod.rs @@ -25,6 +25,7 @@ //! - [`pure`] //! - [`relative`] //! - [`time`] +//! - [`timezone`] //! - [`weekday`] //! - [`year`] @@ -36,6 +37,7 @@ mod offset; mod pure; mod relative; mod time; +mod timezone; mod weekday; mod year; @@ -67,14 +69,14 @@ enum Item { Weekday(weekday::Weekday), Relative(relative::Relative), Offset(offset::Offset), + TimeZone(jiff::tz::TimeZone), Pure(String), } /// Parse a date and time string and build a `Zoned` object. The parsed result /// is resolved against the given base date and time. pub(crate) fn parse_at_date + Clone>(base: Zoned, input: S) -> Result { - let input = input.as_ref().to_ascii_lowercase(); - match parse(&mut input.as_str()) { + match parse(&mut input.as_ref()) { Ok(builder) => builder.set_base(base).build(), Err(e) => Err(e.into()), } @@ -83,8 +85,7 @@ pub(crate) fn parse_at_date + Clone>(base: Zoned, input: S) -> Res /// Parse a date and time string and build a `Zoned` object. The parsed result /// is resolved against the current local date and time. pub(crate) fn parse_at_local + Clone>(input: S) -> Result { - let input = input.as_ref().to_ascii_lowercase(); - match parse(&mut input.as_str()) { + match parse(&mut input.as_ref()) { Ok(builder) => builder.build(), // the builder uses current local date and time if no base is given. Err(e) => Err(e.into()), } @@ -95,12 +96,14 @@ pub(crate) fn parse_at_local + Clone>(input: S) -> Result + Clone>(input: S) -> Result ModalResult { trace("parse", alt((parse_timestamp, parse_items))).parse_next(input) } -/// Parse a timestamp. +/// Parse a standalone epoch timestamp (e.g., `@1758724019`). +/// +/// GNU `date` specifies that a timestamp item is *complete* and *must not* be +/// combined with any other date/time item. /// -/// From the GNU docs: +/// Notes: /// -/// > (Timestamp) Such a number cannot be combined with any other date item, as -/// > it specifies a complete timestamp. +/// - If a timezone rule (`TZ="..."`) appears at the beginning of the input, it +/// has no effect on the epoch value. We intentionally parse and ignore it. +/// - Trailing input (aside from optional whitespaces) is rejected. fn parse_timestamp(input: &mut &str) -> ModalResult { + // Parse and ignore an optional leading timezone rule. + let _ = timezone::parse(input); + trace( "parse_timestamp", + // Expect exactly one timestamp and then EOF (allowing trailing spaces). terminated(epoch::parse.map(Item::Timestamp), preceded(space, eof)), ) - .verify_map(|ts: Item| { - if let Item::Timestamp(ts) = ts { - DateTimeBuilder::new().set_timestamp(ts).ok() - } else { - None - } + .verify_map(|item: Item| match item { + Item::Timestamp(ts) => DateTimeBuilder::new().set_timestamp(ts).ok(), + _ => None, }) .parse_next(input) } -/// Parse a sequence of items. +/// Parse a sequence of date/time items, honoring an optional leading TZ rule. +/// +/// Notes: +/// +/// - If a timezone rule (`TZ="..."`) appears at the beginning of the input, +/// parse it first. The timezone rule is case-sensitive. +/// - After the optional timezone rule is parsed, we convert the input to +/// lowercase to allow case-insensitive parsing of the remaining items. +/// - Trailing input (aside from optional whitespaces) is rejected. fn parse_items(input: &mut &str) -> ModalResult { - let (items, _): (Vec, _) = trace( + // Parse and consume an optional leading timezone rule. + let tz = timezone::parse(input).map(Item::TimeZone); + + // Convert input to lowercase for case-insensitive parsing. + let lower = input.to_ascii_lowercase(); + let input = &mut lower.as_str(); + + let (mut items, _): (Vec, _) = trace( "parse_items", + // Parse zero or more items until EOF (allowing trailing spaces). repeat_till(0.., parse_item, preceded(space, eof)), ) .parse_next(input)?; + if let Ok(tz) = tz { + items.push(tz); + } + items.try_into().map_err(|e| expect_error(input, e)) } @@ -251,7 +279,7 @@ fn expect_error(input: &mut &str, reason: &'static str) -> ErrMode mod tests { use jiff::{civil::DateTime, tz::TimeZone, ToSpan, Zoned}; - use super::{parse, DateTimeBuilder}; + use super::*; fn at_date(builder: DateTimeBuilder, base: Zoned) -> Zoned { builder.set_base(base).build().unwrap() @@ -527,4 +555,22 @@ mod tests { assert_eq!(result.hour(), 1); assert_eq!(result.minute(), 0); } + + #[test] + fn timezone_rule() { + let parse_build = |mut s| parse(&mut s).unwrap().build().unwrap(); + + for (input, expected) in [ + ( + r#"TZ="Europe/Paris" 2025-01-02"#, + "2025-01-02 00:00:00[Europe/Paris]".parse().unwrap(), + ), + ( + r#"TZ="Europe/Paris" 2025-01-02 03:04:05"#, + "2025-01-02 03:04:05[Europe/Paris]".parse().unwrap(), + ), + ] { + assert_eq!(parse_build(input), expected, "{input}"); + } + } } diff --git a/src/items/timezone.rs b/src/items/timezone.rs new file mode 100644 index 0000000..0414ee8 --- /dev/null +++ b/src/items/timezone.rs @@ -0,0 +1,340 @@ +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! Parse a timezone item. The timezone item must be at the beginning of the +//! input string and in the `TZ="..."` format. +//! +//! From the GNU docs: +//! +//! > Normally, dates are interpreted using the rules of the current time zone, +//! > which in turn are specified by the ‘TZ’ environment variable, or by a +//! > system default if ‘TZ’ is not set. To specify a different set of default +//! > time zone rules that apply just to one date, start the date with a string +//! > of the form ‘TZ="RULE"’. The two quote characters (‘"’) must be present in +//! > the date, and any quotes or backslashes within RULE must be escaped by a +//! > backslash. + +use jiff::tz::{Offset, TimeZone}; +use winnow::{ + combinator::{alt, delimited, opt, preceded, repeat}, + stream::AsChar, + token::{one_of, take_while}, + ModalResult, Parser, +}; + +use super::primitive::{dec_uint, plus_or_minus}; + +pub(super) fn parse(input: &mut &str) -> ModalResult { + delimited("TZ=\"", preceded(opt(':'), alt((posix, iana))), '"').parse_next(input) +} + +/// Parse a posix (proleptic) timezone string (e.g., "UTC7", "JST-9"). +/// +/// TODO: This implementation is incomplete. It currently only parses the +/// `STDOFFSET` part of the format. +/// +/// From the GNU docs: +/// +/// > The proleptic format is: +/// > +/// > STDOFFSET[DST[OFFSET][,START[/TIME],END[/TIME]]] +/// > +/// > The STD string specifies the time zone abbreviation, which must be at +/// > least three bytes long. ... +/// > +/// > The OFFSET specifies the time value you must add to the local time to +/// > get a UTC value. It has syntax like: +/// > +/// > [+|-]HH[:MM[:SS]] +/// > +/// > This is positive if the local time zone is west of the Prime Meridian +/// > and negative if it is east; this is opposite from the usual convention +/// > that positive time zone offsets are east of the Prime Meridian. The +/// > hour HH must be between 0 and 24 and may be a single digit, and the +/// > minutes MM and seconds SS, if present, must be between 0 and 59. +fn posix(input: &mut &str) -> ModalResult { + (take_while(3.., AsChar::is_alpha), posix_offset) + .verify_map(|(_, offset)| Offset::from_seconds(offset).ok().map(|o| o.to_time_zone())) + .parse_next(input) +} + +/// Parse an IANA (geographical) timezone string (e.g., "Europe/Paris"). If the +/// string is not a valid IANA timezone name, the UTC timezone is returned. +/// +/// Compatibility notes: +/// +/// - The implementation uses `jiff::tz::TimeZone::get()` to resolve time zones. +/// Only canonical/aliased IANA names are accepted. Absolute file paths are +/// not supported. +/// - GNU `date` resolves time zones from the tzdata files under +/// `/usr/share/zoneinfo` (respecting `TZDIR`) and also accepts an absolute +/// path when the string starts with `/`. +/// +/// From the GNU docs: +/// +/// > If the format's CHARACTERS begin with ‘/’ it is an absolute file +/// > name; otherwise the library looks for the file +/// > ‘/usr/share/zoneinfo/CHARACTERS’. The ‘zoneinfo’ directory contains +/// > data files describing time zone rulesets in many different parts of the +/// > world. The names represent major cities, with subdirectories for +/// > geographical areas; for example, ‘America/New_York’, ‘Europe/London’, +/// > ‘Asia/Tokyo’. These data files are installed by the system +/// > administrator, who also sets ‘/etc/localtime’ to point to the data file +/// > for the local time zone ruleset. +fn iana(input: &mut &str) -> ModalResult { + repeat( + 0.., + alt(( + preceded('\\', one_of(['\\', '"'])).map(|c: char| c.to_string()), + take_while(1, |c| c != '"' && c != '\\').map(str::to_string), + )), + ) + .map(|parts: Vec| parts.concat()) + .map(|s| TimeZone::get(&s).unwrap_or(TimeZone::UTC)) + .parse_next(input) +} + +fn posix_offset(input: &mut &str) -> ModalResult { + let uint = dec_uint::; + + ( + opt(plus_or_minus), + alt(( + (uint, preceded(':', uint), preceded(':', uint)).map(|(h, m, s)| (h, m, s)), + (uint, preceded(':', uint)).map(|(h, m)| (h, m, 0)), + uint.map(|h| (h, 0, 0)), + )), + ) + .map(|(sign, (h, m, s))| { + // The sign is opposite from the usual convention: + // - Positive offsets are west of UTC. + // - Negative offsets are east of UTC. + let sign = if sign == Some('-') { 1 } else { -1 }; + + // - If hour is greater than 24, clamp it to 24. + // - If minute is greater than 59, clamp it to 59. + // - If second is greater than 59, clamp it to 59. + let h = h.min(24) as i32; + let m = m.min(59) as i32; + let s = s.min(59) as i32; + + sign * (h * 3600 + m * 60 + s) + }) + .parse_next(input) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn tz_rule() { + // empty string + for (input, expected) in [ + (r#"TZ="""#, "UTC"), + (r#"TZ=":""#, "UTC"), + (r#"TZ=" ""#, "UTC"), + (r#"TZ=": ""#, "UTC"), + ] { + let mut s = input; + assert_eq!( + parse(&mut s).unwrap().iana_name(), + Some(expected), + "{input}" + ); + } + + // iana + for (input, expected) in [ + (r#"TZ="Etc/Zulu""#, "Etc/Zulu"), + (r#"TZ=":Etc/Zulu""#, "Etc/Zulu"), + (r#"TZ="America/New_York""#, "America/New_York"), + (r#"TZ=":America/New_York""#, "America/New_York"), + (r#"TZ="Asia/Tokyo""#, "Asia/Tokyo"), + (r#"TZ=":Asia/Tokyo""#, "Asia/Tokyo"), + (r#"TZ="Unknown/Timezone""#, "UTC"), + (r#"TZ=":Unknown/Timezone""#, "UTC"), + ] { + let mut s = input; + assert_eq!( + parse(&mut s).unwrap().iana_name(), + Some(expected), + "{input}" + ); + } + + // posix + for (input, expected) in [ + (r#"TZ="UTC0""#, 0), + (r#"TZ=":UTC0""#, 0), + (r#"TZ="UTC+5""#, -5 * 3600), + (r#"TZ=":UTC+5""#, -5 * 3600), + (r#"TZ="UTC-5""#, 5 * 3600), + (r#"TZ=":UTC-5""#, 5 * 3600), + (r#"TZ="UTC+5:20""#, -(5 * 3600 + 20 * 60)), + (r#"TZ=":UTC+5:20""#, -(5 * 3600 + 20 * 60)), + (r#"TZ="UTC-5:20""#, 5 * 3600 + 20 * 60), + (r#"TZ=":UTC-5:20""#, 5 * 3600 + 20 * 60), + (r#"TZ="UTC+5:20:15""#, -(5 * 3600 + 20 * 60 + 15)), + (r#"TZ=":UTC+5:20:15""#, -(5 * 3600 + 20 * 60 + 15)), + (r#"TZ="UTC-5:20:15""#, 5 * 3600 + 20 * 60 + 15), + (r#"TZ=":UTC-5:20:15""#, 5 * 3600 + 20 * 60 + 15), + ] { + let mut s = input; + assert_eq!( + parse(&mut s).unwrap().to_fixed_offset().unwrap().seconds(), + expected, + "{input}" + ); + } + + // invalid + for input in [ + r#"UTC"#, // missing "TZ=" + r#"tz="UTC""#, // lowercase "tz" + r#"TZ=UTC"#, // missing quotes + ] { + let mut s = input; + assert!(parse(&mut s).is_err(), "{input}"); + } + } + + #[test] + fn parse_iana() { + for (input, expected) in [ + ("UTC", "UTC"), // utc timezone + ("Etc/Zulu", "Etc/Zulu"), // etc timezone + ("America/New_York", "America/New_York"), // named timezone + ("Asia/Tokyo", "Asia/Tokyo"), // named timezone + ("Unknown/Timezone", "UTC"), // unknown timezone + ] { + let mut s = input; + assert_eq!(iana(&mut s).unwrap().iana_name(), Some(expected), "{input}"); + } + } + + #[test] + fn parse_posix() { + let to_seconds = |input: &str| { + let mut s = input; + posix(&mut s).unwrap().to_fixed_offset().unwrap().seconds() + }; + + // hour + for (input, expected) in [ + ("UTC0", 0), + ("UTC+0", 0), + ("UTC-0", 0), + ("UTC000", 0), + ("UTC+5", -5 * 3600), + ("UTC-5", 5 * 3600), + ("ABC0", 0), + ("ABC+5", -5 * 3600), + ("ABC-5", 5 * 3600), + ] { + assert_eq!(to_seconds(input), expected, "{input}"); + } + + // hour:minute + for (input, expected) in [ + ("UTC0:0", 0), + ("UTC+0:0", 0), + ("UTC-0:0", 0), + ("UTC00:00", 0), + ("UTC+5:20", -(5 * 3600 + 20 * 60)), + ("UTC-5:20", 5 * 3600 + 20 * 60), + ("ABC0:0", 0), + ("ABC+5:20", -(5 * 3600 + 20 * 60)), + ("ABC-5:20", 5 * 3600 + 20 * 60), + ] { + assert_eq!(to_seconds(input), expected, "{input}"); + } + + // hour:minute:second + for (input, expected) in [ + ("UTC0:0:0", 0), + ("UTC+0:0:0", 0), + ("UTC-0:0:0", 0), + ("UTC00:00:00", 0), + ("UTC+5:20:15", -(5 * 3600 + 20 * 60 + 15)), + ("UTC-5:20:15", 5 * 3600 + 20 * 60 + 15), + ("ABC0:0:0", 0), + ("ABC+5:20:15", -(5 * 3600 + 20 * 60 + 15)), + ("ABC-5:20:15", 5 * 3600 + 20 * 60 + 15), + ] { + assert_eq!(to_seconds(input), expected, "{input}"); + } + + // invalid + for input in [ + "AB", // too short + "A1C", // not just letters + ] { + let mut s = input; + assert!(posix(&mut s).is_err(), "{input}"); + } + } + + #[test] + fn parse_posix_offset() { + // hour + for (input, expected) in [ + ("0", 0), // zero hour + ("00", 0), // zero hour, two digits + ("000", 0), // zero hour, three digits + ("+0", 0), // zero hour, explicit plus + ("-0", 0), // zero hour, explicit minus + ("5", -5 * 3600), // positive hour + ("-5", 5 * 3600), // negative hour + ("005", -5 * 3600), // positive hour with leading zeros + ("-05", 5 * 3600), // negative hour with leading zeros + ("25", -24 * 3600), // hour > 24, clamps to 24 + ("-25", 24 * 3600), // hour > 24, clamps to 24 + ] { + let mut s = input; + assert_eq!(posix_offset(&mut s).unwrap(), expected, "{input}"); + } + + // hour:minute + for (input, expected) in [ + ("0:0", 0), // zero hour and minute + ("00:00", 0), // zero hour and minute, two digits + ("000:000", 0), // zero hour and minute, three digits + ("+0:0", 0), // zero hour and minute, explicit plus + ("-0:0", 0), // zero hour and minute, explicit minus + ("5:20", -(5 * 3600 + 20 * 60)), // positive hour and minute + ("-5:20", 5 * 3600 + 20 * 60), // negative hour and minute + ("005:020", -(5 * 3600 + 20 * 60)), // positive hour and minute with leading zeros + ("-05:20", 5 * 3600 + 20 * 60), // negative hour and minute with leading zeros + ("25:20", -(24 * 3600 + 20 * 60)), // hour > 24, clamps to 24 + ("-25:20", 24 * 3600 + 20 * 60), // hour > 24, clamps to 24 + ("5:60", -(5 * 3600 + 59 * 60)), // minute > 59, clamps to 59 + ("-5:60", 5 * 3600 + 59 * 60), // minute > 59, clamps to 59 + ] { + let mut s = input; + assert_eq!(posix_offset(&mut s).unwrap(), expected, "{input}"); + } + + // hour:minute:second + for (input, expected) in [ + ("0:0:0", 0), // zero hour, minute, and second + ("00:00:00", 0), // zero hour, minute, and second, two digits + ("000:000:000", 0), // zero hour, minute, and second, three digits + ("+0:0:0", 0), // zero hour, minute, and second, explicit plus + ("-0:0:0", 0), // zero hour, minute, and second, explicit minus + ("5:20:15", -(5 * 3600 + 20 * 60 + 15)), // positive hour, minute, and second + ("-5:20:15", 5 * 3600 + 20 * 60 + 15), // negative hour, minute, and second + ("005:020:015", -(5 * 3600 + 20 * 60 + 15)), // positive hour, minute, and second with leading zeros + ("-05:20:15", 5 * 3600 + 20 * 60 + 15), // negative hour, minute, and second with leading zeros + ("25:20:15", -(24 * 3600 + 20 * 60 + 15)), // hour > 24, clamps to 24 + ("-25:20:15", 24 * 3600 + 20 * 60 + 15), // hour > 24, clamps to 24 + ("5:60:15", -(5 * 3600 + 59 * 60 + 15)), // minute > 59, clamps to 59 + ("-5:60:15", 5 * 3600 + 59 * 60 + 15), // minute > 59, clamps to 59 + ("5:20:60", -(5 * 3600 + 20 * 60 + 59)), // second > 59, clamps to 59 + ("-5:20:60", 5 * 3600 + 20 * 60 + 59), // second > 59, clamps to 59 + ] { + let mut s = input; + assert_eq!(posix_offset(&mut s).unwrap(), expected, "{input}"); + } + } +}