In [None]:
# This R environment comes with all of CRAN preinstalled, as well as many other helpful packages
# The environment is defined by the kaggle/rstats docker image: https://github.com/kaggle/docker-rstats
# For example, here's several helpful packages to load in 

library(ggplot2) # Data visualization
library(readr) # CSV file I/O, e.g. the read_csv function
library(lubridate)
library(magrittr)
library(dplyr)
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

system("ls ../input")

# Any results you write to the current directory are saved as output.

# Vector

In [None]:
# create vector
price = c(10,6,5)
print(price)
print(class(price))

In [None]:
# vector with strings
name = c('apple','banana','orange')
print(name)
print(class(name))

# List

In [None]:
# defining a list
a = list(c(5,3,8))
print(a)
print(class(a))

In [None]:
a = list(c(1,3,4))
a[[1]][3]

In [None]:
# some kind of dictionary
a = list('apple' = 22,
        'orange' = 42)
a[['apple']]

In [None]:
# some kind of dictionary
cfg.MODEL <- list(
    '1' = list('in.w'='w', 'in.lags'=c(1, 2, 7, 14, 21)),
    '2' = list('in.w'='w', 'in.lags'=c(2, 3, 7, 14, 21)),
    '99' = list('in.w'='no.w', 'in.lags'=c(3, 4, 7, 14, 21)))

In [None]:
# accessing the data
cfg.MODEL[['99']]$in.w
cfg.MODEL[['99']]$in.lags

In [None]:
# accessing element of a list
a = list(c(3,4,9),c(9,77),c('hoi','soep'))
a[[1]][1]
a[[2]][1]
a[[3]][2]

# Dataframe

In [None]:
# create df from vectors
name = c('apple','banana','orange')
price = c(10,6,5)
df <- data.frame(name = name,price = price)
df

In [None]:
# get columns names of df
df <- data.frame(name = c('apple','banana','orange'),price = c(10,6,5))
names(df)

In [None]:
# rename columns
df <- data.frame(name = c('apple','banana','orange'),price = c(10,6,5))
names(df) <- c("name2", "price2")
df

In [None]:
# remove some columns
df <- data.frame(name = c('apple','banana','orange'),price = c(10,6,5))
df<- subset(df,select=-c(price))
df

## Filter

The filter() function can be used similarly to subset() to select a set of rows from an original data.frame according to some conditioning statement. As with subset(), filter() returns an object that maintains a list of the original levels whether those levels exist in the new data.frame or not. Use droplevels() to restrict the levels to only those that exist in the data.frame

 Use filter() find rows/cases where conditions are true. 
 Unlike base subsetting with [, rows where the condition evaluates to NA are dropped.

In [None]:
# give rows where price is below 10 (CTRL + SHIFT + M)
df <- data.frame(name = c('apple','banana','orange'),price = c(10,6,5))
a <-df %>% filter(price < 10 & name != 'orange')
a

In [None]:
# alternative would be
df <- data.frame(name = c('apple','banana','orange'),price = c(10,6,5))
idx = df$price < 10 & df$name != 'orange'
a = df[idx,]
a

## Other functions

### is.na()

In [None]:
# check for NA's in vector
a <- c(2,NA,4)
is.na(a)

In [None]:
# So what is I put in a df with NA in multiplke columns?
df <- data.frame(weight = c(22,11,NA),price = c(10,NA,5))
is.na(df)

In [None]:
# now give df without the NA's
df <- data.frame(weight = c(22,11,NA),price = c(10,NA,5))
na.omit(df)

### Any()

In [None]:
# is any value true in logical set
a = c(1,3,5)
any(a<5)
any(a<0)

### Which()

In [None]:
# The which() function will return the position of the elements in a logical vector which are TRUE
a = c(1,3,5)
which(a<5)

# Dates

In [None]:
# string to date
mydate = '2014-06-28 09:30'
mydate_local <- as.POSIXct(mydate,tz='Europe/Amsterdam')
print(mydate_local)
print(class(mydate_local))

In [None]:
# format date
mydate_local <- as.POSIXct('2014-06-28 09:30',tz='Europe/Amsterdam')
a = mydate_local %>% strftime('%Y%m%d%H%M',tz = 'Europe/Amsterdam')
b = strftime(mydate_local,format = '%Y%m%d%H%M',tz = 'Europe/Amsterdam')
print(a)
print(b)

In [None]:
# convert local to utc
mydate_local <- as.POSIXct('2014-06-28 09:30',tz='Europe/Amsterdam')
mydate_utc <- mydate_local %>% with_tz('UTC')
print(mydate_utc)

In [None]:
# string to datetime
a = '201805101600'
b = strptime(a,"%Y%m%d%H%M",tz='Europe/Amsterdam')
c = as.POSIXct(b)
b
c
print(class(b))
print(class(c))

https://www.r-bloggers.com/using-dates-and-times-in-r/

http://rstudio-pubs-static.s3.amazonaws.com/7415_a5c3c312c0204d7fbd9131bef15ce724.html

http://r4ds.had.co.nz/dates-and-times.html#time-zones

In [None]:
# check dst seems to go wrong
a = c('2018-10-28 01:00','2018-10-28 02:00','2018-10-28 02:00','2018-10-28 03:00')
b <- a %>% 
    as.POSIXct(tz='Europe/Amsterdam')
b

In [None]:
# use other lubridate function seems to go wrong
a = c('2018-10-28 01:00','2018-10-28 02:00','2018-10-28 02:00','2018-10-28 03:00')
b = parse_date_time(a, '%Y%m%d%H%M', tz = 'Europe/Amsterdam')
b

In [None]:
# check dst from UTC to local
a = c('2018-10-27 23:00','2018-10-28 00:00','2018-10-28 01:00','2018-10-28 02:00','2018-10-28 03:00','2018-10-28 04:00')
b <- a %>% 
    as.POSIXct(tz='UTC')
b

c <-b %>% with_tz('Europe/Amsterdam')
c

In [None]:
with_tz(ymd_hm(c("2008-11-02 01:30", "2008-11-02 02:00", "2008-11-02 02:30"), tz = "America/New_York"), "UTC")

In [None]:
# handy function?
ymd_hm('2018-05-30 16:00',tz='Europe/Amsterdam')
ymd_hms('2018-05-30 16:00:30',tz='Europe/Amsterdam')

In [None]:
# string to date
a = as.Date('2016-02-24')
print(a)
print(class(a))

In [None]:
# date to string
print(format(today(),'%Y%m%d'))

# Strings

In [None]:
# sprintf
numfruit = 8
mystring = sprintf("This store has %s types of fruit",numfruit)
print(mystring)

In [None]:
# above trick used in writing query statements
name = 'Joe'
age = 55
mystring = sprintf("SELECT * FROM employee WHERE name = '%s' AND age >= %s", name,age)
print(mystring)

In [None]:
# concatenate two string
a = paste('hoi','dit','is',sep = ' ')
print(a)

In [None]:
# use sprintf
mystring = 'inputdata_%s.csv'
a = sprintf(mystring,format(today(),"%Y%m%d"))
print(a)

> # I/O

In [None]:
# contruct path to read file
a = file.path(getwd(),'test.csv')
print(a)

In [None]:
# write df to csv
df <- data.frame(name = c('apple','banana','orange'),price = c(10,6,5))
write.csv(df,file='apple.csv',row.names=FALSE)

In [None]:
df_in = read.csv('apple.csv')
df_in