Skip to content

Commit

Permalink
Datetime continued (pola-rs#33)
Browse files Browse the repository at this point in the history
* support difftime to pl_duration

* difftime to pl_duraton test + round

* bump rpolars version

* fully support Datetime Date and lazy pl_date_range

* fix test on R devel 4.3.0, Date is now assumed UTC

* bump rust polars no corrections

* improve datetime and time handling + combine

* drop Ptime fn

* fix dox + print PTime

* fix PTime example

* roll back rust polars due to macos dll error during check

* try bump to latest

* try filter errors with rules, this should fail on size note

* try fix path

* cahce on failure + fix R script lines

* fix check path

* try fix path again

* ...path again

* ...again

* yeah now it works imma nutjob. Now ignore last note

* oups failed logic

* add back dt_combine

* fix win ld.exe include extra -lntdll -lpsapi -liphlpapi -lpdh -lpowrprof -loleaut32 -lnetapi32 -lsecur32

* add tests for combine

* year iso_year

* try bump rust-polars again to latest

* month quarter

* lots of dt functions

* test more dt function

* dt offset_by

* add rust Result-like functions

* with last

* dt$epoch

* tidying

* all remaining datetime methods + tidy docs

* oups refresh roxygen2
  • Loading branch information
sorhawell committed Jan 31, 2023
1 parent 638658e commit d4e8a92
Show file tree
Hide file tree
Showing 72 changed files with 3,514 additions and 190 deletions.
10 changes: 9 additions & 1 deletion .github/workflows/check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ jobs:
uses: Swatinem/rust-cache@v2
with:
workspaces: ./src/rust/
cache-on-failure: "true"

- uses: r-lib/actions/setup-pandoc@v2

Expand Down Expand Up @@ -126,7 +127,7 @@ jobs:
- uses: r-lib/actions/check-r-package@v2
with:
upload-snapshots: false
error-on: '"warning"'
error-on: '"never"' #errors are filtered by rules below

- name: print check install log linux / windows
if: runner.os == 'Windows'
Expand All @@ -148,3 +149,10 @@ jobs:
- name: print files
run: print(list.files("..",recursive = TRUE,full.names=TRUE))
shell: Rscript {0}

- name: raise remaining rcmdcheck errors
run: |
print(getwd());
source("./inst/misc/filter_rcmdcheck.R");
shell: Rscript {0}

1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ my.csv
test.csv
docs
.Rprofile
check
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rpolars
Title: Polars ported to R
Version: 0.4.2
Version: 0.4.3
Depends: R (>= 4.1.0)
Imports: utils, codetools
Authors@R:
Expand All @@ -27,6 +27,7 @@ Collate:
'utils.R'
'extendr-wrappers.R'
'after-wrappers.R'
'PTime.R'
'csv.R'
'dataframe__frame.R'
'datatype.R'
Expand All @@ -45,6 +46,7 @@ Collate:
'parquet.R'
'pkg-nanoarrow.R'
'rlang.R'
'rust_result.R'
'series__series.R'
'translation.R'
'vctrs.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ S3method(print,Expr)
S3method(print,GroupBy)
S3method(print,LazyFrame)
S3method(print,LazyGroupBy)
S3method(print,PTime)
S3method(print,RPolarsDataType)
S3method(print,Series)
export("%**%")
Expand Down
141 changes: 141 additions & 0 deletions R/PTime.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# PTime vectors denote n time units since midnight.

# PTime is loosly inspirred by data.table ITime but also allow encoding in s, ms, us, ns
# for s and ms integer32 or float64 is optional. For us and ns flaot64 is mandatory.

# A PTime vector is nothing but either an integer or double vector with class PTime
# and a attribute "tu" setting time unit.


#' Store Time in R
#' @name pl_PTime
#' @include after-wrappers.R
#'
#' @param x an integer or double vector of n epochs since midnight OR a char vector of char times
#' passed to as.POSIXct converted to seconds.
#' @param tu timeunit either "s","ms","us","ns"
#' @param fmt a format string passed to as.POSIXct format via ...
#'
#' @details
#'
#' PTime should probably be replaced with package nanotime or similar.
#'
#' base R is missing encodinging of Time since midnight "s" "ms", "us" and "ns". The latter
#' "ns" is the standard for the polars Time type.
#'
#' Use PTime to convert R doubles and integers and use as input to polars functions which needs a
#' time.
#'
#' Loosely inspired by data.table::ITime which is i32 only. PTime must support polars native
#' timeunit is nanoseconds. The R double(float64) can imitate a i64 ns with full precision within
#' the full range of 24 hours.
#'
#' PTime does not have a time zone and always prints the time as is no matter local machine
#' time zone.
#'
#' An essential difference between R and polars is R prints POSIXct/lt without a timezone in local
#' time. Polars prints Datetime without a timezone label as is (GMT). For POSIXct/lt taged with a
#' timexone(tzone) and Datetime with a timezone(tz) the behavior is the same conversion is
#' intuitive.
#'
#' It appears behavior of R timezones is subject to change a bit in R 4.3.0, see rpolars unit test
#' test-expr_datetime.R/"pl$date_range Date lazy/eager".
#'
#' @return a PTime vector either double or integer, with class "PTime" and attribute "tu" being
#' either "s","ms","us" or "ns"
#' @aliases PTIME
#'
#'
#' @examples
#'
#' #make PTime in all time units
#' pl$PTime(runif(5)*3600*24*1E0, tu = "s")
#' pl$PTime(runif(5)*3600*24*1E3, tu = "ms")
#' pl$PTime(runif(5)*3600*24*1E6, tu = "us")
#' pl$PTime(runif(5)*3600*24*1E9, tu = "ns")
#' pl$PTime("23:59:59")
#'
#'
#' pl$Series(pl$PTime(runif(5)*3600*24*1E0, tu = "s"))
#' pl$lit(pl$PTime("23:59:59"))$lit_to_s()
#'
#' pl$lit(pl$PTime("23:59:59"))$to_r()
pl$PTime = function(x, tu = c("s","ms","us","ns"), fmt = "%H:%M:%S") {

if( is.character(x)) {
x = as.double(as.POSIXct(x, format = fmt)) - as.double(as.POSIXct("00:00:00", format = fmt))
tu = "s"
}

tu = tu[1]
if(!is_string(tu) || !tu %in% c("s","ms","us","ns")) {
stopf("tu must be either 's','ms','us' ,or 'ns', not [%s]",str_string(tu))
}

#type specific conciderations
type_ok = FALSE
if(typeof(x)=="double") {
x = as.double(x)
type_ok = TRUE
}
if(typeof(x)=="integer") {
if(!tu %in% c("s","ms")) {stopf(
"only 's' and 'ms' tu is supported for integer, set input x as double to use tu: [%s]", tu
)}
x = as.integer(x)
type_ok = TRUE
}

#check type
if(!type_ok) {
stopf("type of x is not double or integer, it was [%s]", typeof(x))
}

#check boundaries
if(any(x<0)) {
stopf("no element of x can be negative")
}
x = floor(x)
limits = c(
"s" = 86400,
"ms" = 86400000,
"us" = 86400000000,
"ns" = 86400000000000
)
if(any(x>limits[tu])) {
stopf("no elements can exceed 24 hours, the limit for tu '%s' is the value %s",tu,limits[tu])
}

attr(x,"tu") = tu
class(x) = "PTime"
x
}

#' print PTime
#' @param x a PTime vector
#' @param ... not used
#' @return invisible x
#' @exportS3Method
print.PTime = function(x, ...) {
tu = attr(x,"tu")
tu_exp = pcase(
tu == "s", 0,
tu == "ms", 3,
tu == "us", 6,
tu == "ns", 9,
or_else = stopf("not recognized tu")
)
val = unclass(x) / 10^tu_exp
origin = structure(0, tzone = "GMT", class = c("POSIXct", "POSIXt"))
fmt = format(as.POSIXct(val,tz="GMT",origin=origin),format="%H:%M:%S")

if(tu!="s") {
dgt = formatC((val-floor(val))*10^tu_exp, width = tu_exp, flag=0,big.mark ="_",digits = tu_exp)
fmt = paste0(fmt,":",dgt,tu)
}
cat("PTime [",typeof(x),"]: number of epochs [",tu,"] since midnight\n")
print(paste0(
fmt, " val: ",as.character(x)
))
invisible(x)
}
3 changes: 2 additions & 1 deletion R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ pl$DataFrame = function(..., make_names_unique= TRUE) {
}

##step 4
#buildDataFrameone column at the time
#buildDataFrame one column at the time
self = .pr$DataFrame$new_with_capacity(length(largs))
mapply(largs,keys, FUN = function(column, key) {
if(inherits(column, "Series")) {
Expand All @@ -192,6 +192,7 @@ pl$DataFrame = function(..., make_names_unique= TRUE) {
unwrap(.pr$DataFrame$set_column_from_series(self,column))
} else {
if(length(column)==1L && isTRUE(largs_lengths_max > 1L)) column = rep(column,largs_lengths_max)
column = convert_to_fewer_types(column) #type conversions on R side
unwrap(.pr$DataFrame$set_column_from_robj(self,column,key))
}
return(NULL)
Expand Down
Loading

0 comments on commit d4e8a92

Please sign in to comment.