From 5c686bcfdfeccfefa4c64bb43fba69e03783e693 Mon Sep 17 00:00:00 2001 From: yuankunzhang Date: Tue, 12 Aug 2025 20:18:15 +0800 Subject: [PATCH] refactor: replace floating-point seconds with precise integer representation Replace f64-based timestamp and second parsing with structured types using separate second and nanosecond fields to eliminate floating-point precision issues. --- src/items/builder.rs | 25 ++++---- src/items/combined.rs | 3 +- src/items/epoch.rs | 138 +++++++++++++++++++++++++++++++++++++---- src/items/mod.rs | 2 +- src/items/primitive.rs | 7 ++- src/items/time.rs | 109 +++++++++++++++++++++++--------- 6 files changed, 226 insertions(+), 58 deletions(-) diff --git a/src/items/builder.rs b/src/items/builder.rs index dc07d11..f11d94c 100644 --- a/src/items/builder.rs +++ b/src/items/builder.rs @@ -3,7 +3,7 @@ use chrono::{DateTime, Datelike, FixedOffset, NaiveDate, TimeZone, Timelike}; -use super::{date, relative, time, timezone, weekday, year}; +use super::{date, epoch, relative, time, timezone, weekday, year}; /// The builder is used to construct a DateTime object from various components. /// The parser creates a `DateTimeBuilder` object with the parsed components, @@ -13,7 +13,7 @@ use super::{date, relative, time, timezone, weekday, year}; #[derive(Debug, Default)] pub(crate) struct DateTimeBuilder { base: Option>, - timestamp: Option, + timestamp: Option, date: Option, time: Option, weekday: Option, @@ -35,7 +35,7 @@ impl DateTimeBuilder { /// Sets a timestamp value. Timestamp values are exclusive to other date/time /// items (date, time, weekday, timezone, relative adjustments). - pub(super) fn set_timestamp(mut self, ts: f64) -> Result { + pub(super) fn set_timestamp(mut self, ts: epoch::Timestamp) -> Result { if self.timestamp.is_some() { return Err("timestamp cannot appear more than once"); } else if self.date.is_some() @@ -148,15 +148,15 @@ impl DateTimeBuilder { self.set_time(time) } - fn build_from_timestamp(ts: f64, tz: &FixedOffset) -> Option> { - // TODO: How to make the fract -> nanosecond conversion more precise? - // Maybe considering using the - // [rust_decimal](https://crates.io/crates/rust_decimal) crate? - match chrono::Utc.timestamp_opt(ts as i64, (ts.fract() * 10f64.powi(9)).round() as u32) { + fn build_from_timestamp( + ts: epoch::Timestamp, + tz: &FixedOffset, + ) -> Option> { + match chrono::Utc.timestamp_opt(ts.second, ts.nanosecond) { chrono::MappedLocalTime::Single(t) => Some(t.with_timezone(tz)), chrono::MappedLocalTime::Ambiguous(earliest, _latest) => { - // TODO: When there is a fold in the local time, which value - // do we choose? For now, we use the earliest one. + // When there is a fold in the local time, we use the earliest + // one. Some(earliest.with_timezone(tz)) } chrono::MappedLocalTime::None => None, // Invalid timestamp @@ -210,6 +210,7 @@ impl DateTimeBuilder { hour, minute, second, + nanosecond, ref offset, }) = self.time { @@ -224,8 +225,8 @@ impl DateTimeBuilder { dt.day(), hour, minute, - second as u32, - (second.fract() * 10f64.powi(9)).round() as u32, + second, + nanosecond, offset, )?; } diff --git a/src/items/combined.rs b/src/items/combined.rs index 79bb248..cce0a1a 100644 --- a/src/items/combined.rs +++ b/src/items/combined.rs @@ -55,7 +55,8 @@ mod tests { time: Time { hour: 10, minute: 10, - second: 55.0, + second: 55, + nanosecond: 0, offset: None, }, }); diff --git a/src/items/epoch.rs b/src/items/epoch.rs index 7a44a23..ca455d3 100644 --- a/src/items/epoch.rs +++ b/src/items/epoch.rs @@ -15,35 +15,147 @@ //! > ‘@1483228800’ represents 2017-01-01 00:00:00 UTC, and there is no way to //! > represent the intervening leap second 2016-12-31 23:59:60 UTC. -use winnow::{combinator::preceded, ModalResult, Parser}; +use winnow::{ + ascii::digit1, + combinator::{opt, preceded}, + token::one_of, + ModalResult, Parser, +}; -use super::primitive::{float, s}; +use super::primitive::{dec_uint, s}; -/// Parse a timestamp in the form of `@1234567890`. -pub fn parse(input: &mut &str) -> ModalResult { - s(preceded("@", float)).parse_next(input) +/// Represents a timestamp with nanosecond accuracy. +/// +/// # Invariants +/// +/// - `nanosecond` is always in the range of `0..1_000_000_000`. +/// - Negative timestamps are represented by a negative `second` value and a +/// positive `nanosecond` value. +#[derive(Debug, PartialEq)] +pub(crate) struct Timestamp { + pub(crate) second: i64, + pub(crate) nanosecond: u32, +} + +/// Parse a timestamp in the form of `1234567890` or `-1234567890.12345` or +/// `1234567890,12345`. +pub(crate) fn parse(input: &mut &str) -> ModalResult { + (s("@"), opt(s(one_of(['-', '+']))), sec_and_nsec) + .verify_map(|(_, sign, (sec, nsec))| { + let sec = i64::try_from(sec).ok()?; + let (second, nanosecond) = match (sign, nsec) { + (Some('-'), 0) => (-sec, 0), + // Truncate towards minus infinity. + (Some('-'), _) => ((-sec).checked_sub(1)?, 1_000_000_000 - nsec), + _ => (sec, nsec), + }; + Some(Timestamp { second, nanosecond }) + }) + .parse_next(input) +} + +/// Parse a second value in the form of `1234567890` or `1234567890.12345` or +/// `1234567890,12345`. +/// +/// The first part represents whole seconds. The optional second part represents +/// fractional seconds, parsed as a nanosecond value from up to 9 digits +/// (padded with zeros on the right if fewer digits are present). If the second +/// part is omitted, it defaults to 0 nanoseconds. +pub(super) fn sec_and_nsec(input: &mut &str) -> ModalResult<(u64, u32)> { + (s(dec_uint), opt(preceded(one_of(['.', ',']), digit1))) + .verify_map(|(sec, opt_nsec_str)| match opt_nsec_str { + Some(nsec_str) if nsec_str.len() >= 9 => Some((sec, nsec_str[..9].parse().ok()?)), + Some(nsec_str) => { + let multiplier = 10_u32.pow(9 - nsec_str.len() as u32); + Some((sec, nsec_str.parse::().ok()?.checked_mul(multiplier)?)) + } + None => Some((sec, 0)), + }) + .parse_next(input) } #[cfg(test)] mod tests { - use super::parse; + use super::*; + + #[test] + fn sec_and_nsec_test() { + let mut input = "1234567890"; + assert_eq!(sec_and_nsec(&mut input).unwrap(), (1234567890, 0)); - fn float_eq(a: f64, b: f64) -> bool { - (a - b).abs() < f64::EPSILON + let mut input = "1234567890.12345"; + assert_eq!(sec_and_nsec(&mut input).unwrap(), (1234567890, 123450000)); + + let mut input = "1234567890,12345"; + assert_eq!(sec_and_nsec(&mut input).unwrap(), (1234567890, 123450000)); + + let mut input = "1234567890.1234567890123"; + assert_eq!(sec_and_nsec(&mut input).unwrap(), (1234567890, 123456789)); } #[test] - fn float() { + fn timestamp() { let mut input = "@1234567890"; - assert!(float_eq(parse(&mut input).unwrap(), 1234567890.0)); + assert_eq!( + parse(&mut input).unwrap(), + Timestamp { + second: 1234567890, + nanosecond: 0, + } + ); + + let mut input = "@ 1234567890"; + assert_eq!( + parse(&mut input).unwrap(), + Timestamp { + second: 1234567890, + nanosecond: 0, + } + ); + + let mut input = "@ -1234567890"; + assert_eq!( + parse(&mut input).unwrap(), + Timestamp { + second: -1234567890, + nanosecond: 0, + } + ); + + let mut input = "@ - 1234567890"; + assert_eq!( + parse(&mut input).unwrap(), + Timestamp { + second: -1234567890, + nanosecond: 0, + } + ); let mut input = "@1234567890.12345"; - assert!(float_eq(parse(&mut input).unwrap(), 1234567890.12345)); + assert_eq!( + parse(&mut input).unwrap(), + Timestamp { + second: 1234567890, + nanosecond: 123450000, + } + ); let mut input = "@1234567890,12345"; - assert!(float_eq(parse(&mut input).unwrap(), 1234567890.12345)); + assert_eq!( + parse(&mut input).unwrap(), + Timestamp { + second: 1234567890, + nanosecond: 123450000, + } + ); let mut input = "@-1234567890.12345"; - assert_eq!(parse(&mut input).unwrap(), -1234567890.12345); + assert_eq!( + parse(&mut input).unwrap(), + Timestamp { + second: -1234567891, + nanosecond: 876550000, + } + ); } } diff --git a/src/items/mod.rs b/src/items/mod.rs index b04100e..49eae08 100644 --- a/src/items/mod.rs +++ b/src/items/mod.rs @@ -58,7 +58,7 @@ use crate::ParseDateTimeError; #[derive(PartialEq, Debug)] pub(crate) enum Item { - Timestamp(f64), + Timestamp(epoch::Timestamp), DateTime(combined::DateTime), Date(date::Date), Time(time::Time), diff --git a/src/items/primitive.rs b/src/items/primitive.rs index 5c351db..0d03896 100644 --- a/src/items/primitive.rs +++ b/src/items/primitive.rs @@ -3,8 +3,10 @@ //! Primitive combinators. +use std::str::FromStr; + use winnow::{ - ascii::{digit1, multispace0}, + ascii::{digit1, multispace0, Uint}, combinator::{alt, delimited, not, opt, peek, preceded, repeat, separated}, error::{ContextError, ParserError, StrContext, StrContextValue}, stream::AsChar, @@ -100,8 +102,9 @@ where /// /// See the rationale for `dec_int` for why we don't use /// `winnow::ascii::dec_uint`. -pub(super) fn dec_uint<'a, E>(input: &mut &'a str) -> winnow::Result +pub(super) fn dec_uint<'a, O, E>(input: &mut &'a str) -> winnow::Result where + O: Uint + FromStr, E: ParserError<&'a str>, { digit1 diff --git a/src/items/time.rs b/src/items/time.rs index 01e0172..56328bf 100644 --- a/src/items/time.rs +++ b/src/items/time.rs @@ -40,11 +40,12 @@ use winnow::{ combinator::{alt, opt, preceded}, error::ErrMode, - seq, ModalResult, Parser, + ModalResult, Parser, }; use super::{ - primitive::{colon, ctx_err, dec_uint, float, s}, + epoch::sec_and_nsec, + primitive::{colon, ctx_err, dec_uint, s}, timezone::{timezone_num, Offset}, }; @@ -52,7 +53,8 @@ use super::{ pub(crate) struct Time { pub hour: u32, pub minute: u32, - pub second: f64, + pub second: u32, + pub nanosecond: u32, pub offset: Option, } @@ -74,16 +76,24 @@ pub(super) fn iso(input: &mut &str) -> ModalResult