Data Science Fundamentals: R |
[Table of Contents](../index.ipynb)
- - - 
<!--NAVIGATION-->
Module 15. [Date Time Values](./00.ipynb) | [readr](./01.ipynb) | [dplyr](./02.ipynb) | [Exercises](./03.ipynb)

# Data Transformation with R

## [Date Time Values](https://stats.idre.ucla.edu/r/faq/how-does-r-handle-date-values/)

![images](images/slides1.png)

In [6]:
install.packages("lubridate")


The downloaded binary packages are in
	/var/folders/39/rw094bh97s1fm7lfmg_nwcxm0000gn/T//RtmpiiOH55/downloaded_packages


In [2]:
library(lubridate)


Attaching package: ‘lubridate’


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union




In [3]:
today()

In [18]:
now(tzone = "")

[1] "2021-10-04 23:58:56 CDT"

In [19]:
now()

[1] "2021-10-04 23:58:58 CDT"

In [20]:
now("GMT")
now("")
now() == now() # would be TRUE if computer processed both at the same instant
now() < now() # TRUE
now() > now() # FALSE
today()
today("GMT")
today() == today("GMT") # not always true
today() < as.Date("2999-01-01") # TRUE  (so far)
# }

[1] "2021-10-05 04:59:10 GMT"

[1] "2021-10-04 23:59:10 CDT"

### Creating Dates From Strings

In [10]:
ymd("2017-01-31")

In [11]:
ymd(20170131)

In [12]:
ymd_hms("2017-01-31 20:11:59")

[1] "2017-01-31 20:11:59 UTC"

In [13]:
mdy("January 31st, 2017")

In [14]:
dmy("31-Jan-2017")

In [15]:
mdy_hm("01/31/2017 08:01")

[1] "2017-01-31 08:01:00 UTC"

![images](images/slides2.png)

In [26]:
install.packages('nycflights13')


The downloaded binary packages are in
	/var/folders/39/rw094bh97s1fm7lfmg_nwcxm0000gn/T//RtmpiiOH55/downloaded_packages


In [27]:
install.packages('dplyr')


The downloaded binary packages are in
	/var/folders/39/rw094bh97s1fm7lfmg_nwcxm0000gn/T//RtmpiiOH55/downloaded_packages


In [28]:
install.packages('knitr')


The downloaded binary packages are in
	/var/folders/39/rw094bh97s1fm7lfmg_nwcxm0000gn/T//RtmpiiOH55/downloaded_packages


In [9]:
library(dplyr)
library(nycflights13)
library(knitr)

"package 'dplyr' was built under R version 3.6.3"
Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

"package 'nycflights13' was built under R version 3.6.3"

In [10]:
flights

year,month,day,dep_time,sched_dep_time,dep_delay,arr_time,sched_arr_time,arr_delay,carrier,flight,tailnum,origin,dest,air_time,distance,hour,minute,time_hour
2013,1,1,517,515,2,830,819,11,UA,1545,N14228,EWR,IAH,227,1400,5,15,2013-01-01 05:00:00
2013,1,1,533,529,4,850,830,20,UA,1714,N24211,LGA,IAH,227,1416,5,29,2013-01-01 05:00:00
2013,1,1,542,540,2,923,850,33,AA,1141,N619AA,JFK,MIA,160,1089,5,40,2013-01-01 05:00:00
2013,1,1,544,545,-1,1004,1022,-18,B6,725,N804JB,JFK,BQN,183,1576,5,45,2013-01-01 05:00:00
2013,1,1,554,600,-6,812,837,-25,DL,461,N668DN,LGA,ATL,116,762,6,0,2013-01-01 06:00:00
2013,1,1,554,558,-4,740,728,12,UA,1696,N39463,EWR,ORD,150,719,5,58,2013-01-01 05:00:00
2013,1,1,555,600,-5,913,854,19,B6,507,N516JB,EWR,FLL,158,1065,6,0,2013-01-01 06:00:00
2013,1,1,557,600,-3,709,723,-14,EV,5708,N829AS,LGA,IAD,53,229,6,0,2013-01-01 06:00:00
2013,1,1,557,600,-3,838,846,-8,B6,79,N593JB,JFK,MCO,140,944,6,0,2013-01-01 06:00:00
2013,1,1,558,600,-2,753,745,8,AA,301,N3ALAA,LGA,ORD,138,733,6,0,2013-01-01 06:00:00


In [8]:
flights %>% select(year, month, day, hour, minute) %>%
  mutate(sched_dep_time=make_datetime(year, month, day, hour, minute))

year,month,day,hour,minute,sched_dep_time
<int>,<int>,<int>,<dbl>,<dbl>,<dttm>
2013,1,1,5,15,2013-01-01 05:15:00
2013,1,1,5,29,2013-01-01 05:29:00
2013,1,1,5,40,2013-01-01 05:40:00
2013,1,1,5,45,2013-01-01 05:45:00
2013,1,1,6,0,2013-01-01 06:00:00
2013,1,1,5,58,2013-01-01 05:58:00
2013,1,1,6,0,2013-01-01 06:00:00
2013,1,1,6,0,2013-01-01 06:00:00
2013,1,1,6,0,2013-01-01 06:00:00
2013,1,1,6,0,2013-01-01 06:00:00


In [32]:
flights_dt <- flights %>%
  filter(!is.na(dep_time), !is.na(arr_time)) %>% mutate(
  dep_time = make_datetime(year, month, day,
                          dep_time%/%100, dep_time%%100),
      arr_time = make_datetime(year, month, day,
                              arr_time%/%100, arr_time%%100),
      sched_dep_time = make_datetime(year, month, day,
                                    sched_dep_time%/%100,sched_dep_time%%100),
      sched_arr_time = make_datetime(year, month, day,
                                    sched_arr_time%/%100, sched_arr_time%%100))

In [33]:
flights_dt

year,month,day,dep_time,sched_dep_time,dep_delay,arr_time,sched_arr_time,arr_delay,carrier,flight,tailnum,origin,dest,air_time,distance,hour,minute,time_hour
<int>,<int>,<int>,<dttm>,<dttm>,<dbl>,<dttm>,<dttm>,<dbl>,<chr>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dttm>
2013,1,1,2013-01-01 05:17:00,2013-01-01 05:15:00,2,2013-01-01 08:30:00,2013-01-01 08:19:00,11,UA,1545,N14228,EWR,IAH,227,1400,5,15,2013-01-01 05:00:00
2013,1,1,2013-01-01 05:33:00,2013-01-01 05:29:00,4,2013-01-01 08:50:00,2013-01-01 08:30:00,20,UA,1714,N24211,LGA,IAH,227,1416,5,29,2013-01-01 05:00:00
2013,1,1,2013-01-01 05:42:00,2013-01-01 05:40:00,2,2013-01-01 09:23:00,2013-01-01 08:50:00,33,AA,1141,N619AA,JFK,MIA,160,1089,5,40,2013-01-01 05:00:00
2013,1,1,2013-01-01 05:44:00,2013-01-01 05:45:00,-1,2013-01-01 10:04:00,2013-01-01 10:22:00,-18,B6,725,N804JB,JFK,BQN,183,1576,5,45,2013-01-01 05:00:00
2013,1,1,2013-01-01 05:54:00,2013-01-01 06:00:00,-6,2013-01-01 08:12:00,2013-01-01 08:37:00,-25,DL,461,N668DN,LGA,ATL,116,762,6,0,2013-01-01 06:00:00
2013,1,1,2013-01-01 05:54:00,2013-01-01 05:58:00,-4,2013-01-01 07:40:00,2013-01-01 07:28:00,12,UA,1696,N39463,EWR,ORD,150,719,5,58,2013-01-01 05:00:00
2013,1,1,2013-01-01 05:55:00,2013-01-01 06:00:00,-5,2013-01-01 09:13:00,2013-01-01 08:54:00,19,B6,507,N516JB,EWR,FLL,158,1065,6,0,2013-01-01 06:00:00
2013,1,1,2013-01-01 05:57:00,2013-01-01 06:00:00,-3,2013-01-01 07:09:00,2013-01-01 07:23:00,-14,EV,5708,N829AS,LGA,IAD,53,229,6,0,2013-01-01 06:00:00
2013,1,1,2013-01-01 05:57:00,2013-01-01 06:00:00,-3,2013-01-01 08:38:00,2013-01-01 08:46:00,-8,B6,79,N593JB,JFK,MCO,140,944,6,0,2013-01-01 06:00:00
2013,1,1,2013-01-01 05:58:00,2013-01-01 06:00:00,-2,2013-01-01 07:53:00,2013-01-01 07:45:00,8,AA,301,N3ALAA,LGA,ORD,138,733,6,0,2013-01-01 06:00:00


### Extracting Date Time Values

In [39]:
#birthday
datetime <- ymd_hms("1973-12-17 12:34:56")

In [40]:
datetime

[1] "1973-12-17 12:34:56 UTC"

In [41]:
month(datetime)

In [42]:
wday(datetime, label = TRUE)

### Rounding Dates

In [12]:
datetime.1 <- ymd_hms("1974-08-26 12:34:56") #brenda
datetime.2 <- ymd_hms("1996-06-06 18:56:02") #gabriela
datetime.3 <- ymd_hms("1998-05-10 13:20:11") #cristian

In [13]:
floor_date(datetime.1, unit="week")

[1] "1974-08-25 UTC"

In [14]:
floor_date(datetime.2, unit="week")

[1] "1996-06-02 UTC"

In [15]:
floor_date(datetime.3, unit="week")

[1] "1998-05-10 UTC"

In [16]:
floor_date(datetime.1, unit="day")

[1] "1974-08-26 UTC"

In [17]:
floor_date(datetime.2, unit="day")

[1] "1996-06-06 UTC"

In [53]:
floor_date(datetime.3, unit="day")

[1] "1998-05-10 UTC"

### Time Periods

In [9]:
( gary_age <- today() - ymd("2057-12-17") )

Time difference of -13362 days

In [55]:
as.period(gary_age)

In [56]:
days(2)

In [57]:
weeks(3)

In [58]:
seconds(0:4)

In [59]:
years(1)