# Built-In Data Types

## Library Used

In [107]:
library(stringr)
# str_detect       (x, pattern)
# str_subset       (x, pattern)
# str_count        (x, pattern)
# str_replace      (x, pattern, replacment)
# str_replace_all  (x, pattern, replacement)
# str_length       (x)
# str_c            (..., sep), (...,collapse)
# str_sub          (string, start, end )
# str_split        (string, pattern)

## remark
# x      = multiple elements vector
# string = single element vector
# ...    = vectorized exectution of multiple vectors
          

In [125]:
## objective of Collapse is to return single string
print( str_c(c('aaa','bbb'), collapse = '-') )
print( str_c(c('aaa','bbb'),  c('$$$','###'), c('111','222'),collapse='-') )

## objective of Sep is to return vectorized combine
print( str_c(c('aaa','bbb'), sep = '-') )   # single input vector, no effect on vectorization
print( str_c(c('aaa','bbb'), c('111','222'), sep='-') )

[1] "aaa-bbb"
[1] "aaa$$$111-bbb###222"
[1] "aaa" "bbb"
[1] "aaa-111" "bbb-222"


## Number

### Class and Typeof
Integer and Double has different class value

In [1]:
print( class(2L) ) 
print( class(2.3L) )

[1] "integer"
[1] "numeric"


In [2]:
print( typeof( 2L ) )
print( typeof( 2.3 ) )

[1] "integer"
[1] "double"


### Integer
Use **L** to denote integer

In [71]:
print( is.integer( 2L ) )

[1] TRUE


### Double

In [72]:
print( is.double( 2.3 ) )

[1] TRUE


### Numeric
**Both** integer and double are numeric

In [10]:
print( is.numeric(2L) )
print( is.numeric(2.3L) )

[1] TRUE
[1] TRUE


## Character

In [24]:
print( 'abc' )
print( typeof('abc') )
print( is.character('abc') )

[1] "abc"
[1] "character"
[1] TRUE


In [12]:
class('abc')

### Properties
#### Number of Chars

In [120]:
x = c('One','Two','Three','Four')
nchar(x)

### Splitting and Trim
#### Split with strsplit( )

In [119]:
s = 'apple , banana, durian , rambutan'
s =  strsplit(s, ",") 
print( s )  # strsplit() return a list of one item
print( s[[1]] )  #selct list item 1, which is a vector
print( s[[1]][2] )  #selct list item 1,element 2

[[1]]
[1] "apple "    " banana"   " durian "  " rambutan"

[1] "apple "    " banana"   " durian "  " rambutan"
[1] " banana"


#### Trim White Space with trimws( )
Trim trailing and tailing white spaces

In [161]:
print( trimws( '   ali baba ') )
print( trimws( s[[1]] ) )

[1] "ali baba"
[1] "apple"    "banana"   "durian"   "rambutan"


### Combining

#### Combine Chars

In [132]:
print( paste("file", "number", "32")  )   ## default seperator is " "
print( paste("file", "number", "32", sep = "_") )

[1] "file number 32"
[1] "file_number_32"


Use **paste0( )  if no seperator required**  
**paste0 is shorthand** for paste(x, sep="")

In [153]:
print( paste0("file", "number", "32")  )

[1] "filenumber32"


#### Vectorized paste

In [151]:
x = rep("file", 5)
y = rep("number", 5)
z = seq(1,5,1)
print(x)
print(y)
print(z)

[1] "file" "file" "file" "file" "file"
[1] "number" "number" "number" "number" "number"
[1] 1 2 3 4 5


In [157]:
print( paste( x,y,z, sep="_" ) )

[1] "file_number_1" "file_number_2" "file_number_3" "file_number_4"
[5] "file_number_5"


In [156]:
print( paste0( x,y,z ) )

[1] "filenumber1" "filenumber2" "filenumber3" "filenumber4" "filenumber5"


#### **stringr**

In [None]:
## objective of Collapse is to return single string
print( str_c(c('aaa','bbb'), collapse = '-') )
print( str_c(c('aaa','bbb'),  c('$$$','###'), c('111','222'),collapse='-') )

## objective of Sep is to return vectorized combine
print( str_c(c('aaa','bbb'), sep = '-') )   # single input vector, no effect on vectorization
print( str_c(c('aaa','bbb'), c('111','222'), sep='-') )

### Matching

In [87]:
cars = c('Nissan serena','toyota camry','Toyota Estima','nissan sentra','TOYOTA vios')

#### Find
**grep( pattern=, x=, value=FALSE, ignore.case=FALSE)**  
can return either indices or values

**Find (Return Indices) the Matching Pattern**

In [79]:
print( cars ) 
print( grep('toyota', cars) )                      # case sensitive
print( grep('toyota', cars, ignore.case = TRUE) )  # case insensitive

[1] "Nissan serena" "toyota camry"  "Toyota Estima" "nissan sentra"
[1] 2
[1] 2 3


**Find The Matching Values**

In [95]:
## Return **value instead of indices**
print( cars )
print( grep('toyota', cars, value=TRUE) )
print( grep('toyota', cars, value=TRUE, ignore.case = TRUE) )

[1] "Nissan serena" "toyota camry"  "Toyota Estima" "nissan sentra"
[5] "TOYOTA vios"  
[1] "toyota camry"
[1] "toyota camry"  "Toyota Estima" "TOYOTA vios"  


**stringr**  
**str_detect** (string, pattern)  - which element contain pattern  
**str_subset** (string, pattern)  - return all elements that matches  

Default to case sensitive,
use pattern=fixed( , ignore_case=TRUE) for case insensitive.

**Detect (Return Logical Vector) The Matching Pattern**

In [70]:
print(cars)
print( str_detect(cars, 'toyota') )                           # case sensitive
print( str_detect(cars, fixed('toyota', ignore_case=TRUE)) )  # case insensitive

[1] "Nissan serena" "toyota camry"  "Toyota Estima" "nissan sentra"
[1] FALSE  TRUE FALSE FALSE
[1] FALSE  TRUE  TRUE FALSE


**Find The Matching Values**

In [88]:
print(cars)
print( str_subset(cars, 'toyota') )                           # case sensitive
print( str_subset(cars, fixed('toyota', ignore_case=TRUE)) )  # case insensitive

[1] "Nissan serena" "toyota camry"  "Toyota Estima" "nissan sentra"
[5] "TOYOTA vios"  
[1] "toyota camry"
[1] "toyota camry"  "Toyota Estima" "TOYOTA vios"  


#### Find and Replace
**gsub( pattern=, replacement=, x=, ignore.case=TRUE)**  
return a new string vector

In [92]:
print( cars )
print( gsub(pattern='toyota', replacement='Toyota', cars) ) 
print( gsub(pattern='toyota',replacement='Toyota',ignore.case = TRUE, cars) )

[1] "Nissan serena" "toyota camry"  "Toyota Estima" "nissan sentra"
[5] "TOYOTA vios"  
[1] "Nissan serena" "Toyota camry"  "Toyota Estima" "nissan sentra"
[5] "TOYOTA vios"  
[1] "Nissan serena" "Toyota camry"  "Toyota Estima" "nissan sentra"
[5] "Toyota vios"  


**stringr**  
**str_replace** (x, pattern, replacement)      # replace first occurance for each elements  
**str_replace_all** (x, pattern, replacement)  # replace all occurances on every elements  

Default to case sensitive,  
use **pattern=fixed( , ignore_case=TRUE)** for case insensitive.

In [94]:
print( cars )
print( str_replace     (cars, 'toyota','Toyota') )
print( str_replace_all (cars, 
                        fixed('toyota', ignore_case=TRUE),  # case insensitive
                        'Toyota') )

[1] "Nissan serena" "toyota camry"  "Toyota Estima" "nissan sentra"
[5] "TOYOTA vios"  
[1] "Nissan serena" "Toyota camry"  "Toyota Estima" "nissan sentra"
[5] "TOYOTA vios"  
[1] "Nissan serena" "Toyota camry"  "Toyota Estima" "nissan sentra"
[5] "Toyota vios"  


### Case Conversion

In [97]:
x = c('One','Two','Three')
print( tolower(x)  )
print( toupper(x)  )

[1] "one"   "two"   "three"
[1] "ONE"   "TWO"   "THREE"


## Logical

### Class and Typeof

In [15]:
print( typeof(TRUE) )
print( class(TRUE) )

[1] "logical"
[1] "logical"


### Logical Comparison
Take note, R will coerce different data type before comparing

#### Equality

In [30]:
print( 3 == 3 )          # compare two integers
print( 3L == 3.0 )       # compare integer and double
print( 'abc' == 'abc' )  # compare two char
print( 3.0 == '3' )      # compare integer and character
print( 3.0 == 'three' )      # compare integer and character

[1] TRUE
[1] TRUE
[1] TRUE
[1] TRUE
[1] FALSE


#### Not Equal

In [16]:
print( 3 != 2 )

[1] TRUE


#### Negate

In [17]:
!(3L == 3.0)

### Comparing NA
Any operation against NA will **return NA**

In [60]:
nas = c(1,2,NA,4,5,6)
nas = nas + 1
print (nas)
print( nas == NA )

[1]  2  3 NA  5  6  7
[1] NA NA NA NA NA NA


Use **is.na()** to check if value is NA, NEVER use x==NA to check

In [59]:
is.na(nas)

### Comparing NULL

**Null is neither TRUE or FALSE**

In [46]:
NULL==TRUE | NULL==FALSE

- Comparing NULL with anything get **NOTHING**  
- **NOTHING is not NULL)**

In [55]:
result = (NULL=='abc')
print( typeof(result)  )
print( length(result)  )
print( is.null(result) )

[1] "logical"
[1] 0
[1] FALSE
logical(0)


## Factor

### Creating
**Non Ordered Factor**
There is no need to specify levels

In [81]:
X = c('apple','banana','apple','durian','rambutan','durian')
f1 = factor(X)
print( f1 )

[1] apple    banana   apple    durian   rambutan durian  
Levels: apple banana durian rambutan


**Ordered Factor**
- Specify ordered=T and its levels in order

In [82]:
y = c('slow','turbo','fast','slow','fast','slow')
f2 = factor(y, ordered=TRUE, 
               levels= c('slow','fast','turbo'))
print( f2 )

[1] slow  turbo fast  slow  fast  slow 
Levels: slow < fast < turbo


- Each element is comparible according to the order specified

In [84]:
print( f2[1] < f2[2] )

[1] TRUE


### Class, Typeof, is.factor( )
Observe that factor is type of integer

In [86]:
print( class(f1) )
print( typeof(f1) )
print( is.factor(f1) )

[1] "factor"
[1] "integer"
[1] TRUE


### Properties

In [90]:
print( levels(f2) )

[1] "slow"  "fast"  "turbo"


## Date and Time

### Date
- Date is in fact **double**
- There is **no is.date()** function

In [2]:
d = as.Date('2019-01-03')
print( d )
print( typeof(d) )
print( is.double(d) )

[1] "2019-01-03"
[1] "double"
[1] TRUE


### Time

### Date and Time

# Built-In Data Structure

## Vector
- Vector can only contain single data type (atomic) elements

### Creating Vector

#### Empty Vector
- Empty vector is vector with no elements, also known as **NULL**
- Use **is.null()** to test NULL

In [9]:
empty = c()
print   (empty)
typeof  (empty)
is.null (empty)

NULL


#### Assignment

In [74]:
x1 = c(1,2,3,4,5)    
x2 = c('yongks','mahathir','limge','annuar','limks')
x3 = c(1,4:6,10:15)  ## assign with range of number
str (x1)
str (x2)
str (x3)

 num [1:5] 1 2 3 4 5
 chr [1:5] "yongks" "mahathir" "limge" "annuar" "limks"
 num [1:10] 1 4 5 6 10 11 12 13 14 15


### Naming & Attributes
- Each element of vector can have a name

#### Creating Names

In [58]:
y        = c(7,8,9,10)
names(y) = c('seven','eight','nine')
print(y)

seven eight  nine  <NA> 
    7     8     9    10 


#### Retrieving Names
- **names()** return a vector of all names of a vector
- Elements that has not been assinged names will have **NA**

In [60]:
names(y)

#### Name as Attribute

Naming vector **creates "name" attributes**

In [62]:
attributes (y)

### Accessing Elements
- Accessing element(s) will always return as a new vector

#### Access with Index Vector
Supply a **number vector in [  ]** to select the elements.   
Index number starts at 1.

In [7]:
z = c('aaa','bbb','ccc','ddd','eee','fff','ggg','hhh','iii','jjj')

print( z[3]   )       ## retrieve single element
print( z[c(1,3,4)]  ) ## retrieve element 1,3,4
print( z[1:3] )       ## retrieve first 3 elements
print( z[c(1:3, 7:9)])

[1] "ccc"
[1] "aaa" "ccc" "ddd"
[1] "aaa" "bbb" "ccc"
[1] "aaa" "bbb" "ccc" "ggg" "hhh" "iii"


Use **negate (-num)** to deselect item(s)

In [8]:
print( z[c(-1,-4)] )  ## negate specific elements
print( z[c(-1:-3)] )  ## deselect first 3 elements

[1] "bbb" "ccc" "eee" "fff" "ggg" "hhh" "iii" "jjj"
[1] "ddd" "eee" "fff" "ggg" "hhh" "iii" "jjj"


#### Access with Logical Vector
Conceptual wize, index a vector with T/F will return elements which match T

In [11]:
z = c('aaa','bbb','ccc','ddd','eee','fff','ggg','hhh','iii','jjj')
print( z[ c(T,T,T,F,F,F,F,F,F,F)] ) ## retrieve first 3 elements

[1] "aaa" "bbb" "ccc"


**This method can be used as 'filtering'**, 
by first 
- Form a logical vector based on matching criteria of the vector itself
- Index the data vector with this logical vector 
- If logical vector is shorter than data, **logical elements repeat itself**

In [12]:
## create the logical vector
criteria = z %in% c('fff','iii')
print( criteria )
print( z[criteria] )   ## apply the logical vector to vector indexing

 [1] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE
[1] "fff" "iii"


Logical vector **repeat itself** if shorter than data length.  

In [13]:
## select every alternate element (odd position)
z[c(T,F)]

#### Access with Name
Index with a vector of names

In [14]:
z = c(1,2,3,4,5,6,7,8,9)
names(z) = c('one','two','three','four','five','six','seven','eight','nine')

In [15]:
print( z['seven']  )            # does not require c() if single element
print( z[c('seven','nine')]  )  # choose two elements by name

seven 
    7 
seven  nine 
    7     9 


### Remove Item(s)

#### Characteristic

In [38]:
is.atomic(x2)

### Characteristic of Vector

In [3]:
z = c(1,2,3,4,5,6,7,8,9)
names(z) = c('one','two','three','four','five','six','seven','eight','nine')
attributes()

## Data Frame
### Class and Typeof
Observe that dataframe is a type of **list**

In [24]:
print( class(iris) )
print( typeof(iris) )
print( is.data.frame(iris) )

[1] "data.frame"
[1] "list"
[1] TRUE


### Creating
- Use **data.frame()** to create data frame from vectors
- All original vectors must have same length, otherwise will have error

#### Create From Vectors
- Character is auto converted to **Factor**
- **Rownames** default to 1,2,3, etc

In [70]:
x1 = c(1,2,3,4,5)
x2 = c('one','two','three','four','firve')
x3 = c('satu','dua','tiga','empat','lima')
X  = data.frame(X1=x1, x2, x3)
str(X)

'data.frame':	5 obs. of  3 variables:
 $ X1: num  1 2 3 4 5
 $ x2: Factor w/ 5 levels "firve","four",..: 3 5 4 2 1
 $ x3: Factor w/ 5 levels "dua","empat",..: 4 1 5 2 3


- Disable Auto Factor conversion

In [72]:
X = data.frame(x1,x2,x3, stringsAsFactors = FALSE)
str( X )

'data.frame':	5 obs. of  3 variables:
 $ x1: num  1 2 3 4 5
 $ x2: chr  "one" "two" "three" "four" ...
 $ x3: chr  "satu" "dua" "tiga" "empat" ...


- Specify Row Names

In [48]:
X = data.frame(x1,x2, row.names = x3)
print( X )

      x1    x2
satu   1   one
dua    2   two
tiga   3 three
empat  4  four
lima   5 firve


### Column Manipulation

In [67]:
x1 = c(1,2,3,4,5)
x2 = c('one','two','three','four','five')
x3 = c('satu','dua','tiga','empat','lima')
rn = c('_1_','_2_','_3_','_4_','_5_')
X = data.frame(x1,x2,x3, 
               stringsAsFactors = FALSE, 
               row.names=rn)
print( X )

    x1    x2    x3
_1_  1   one  satu
_2_  2   two   dua
_3_  3 three  tiga
_4_  4  four empat
_5_  5  five  lima


#### Selecting One Column
Single column returns a **VECTOR!!**

In [14]:
print( X[,2] )    # column number
print( X[,'x2'] ) # column name

[1] "one"   "two"   "three" "four"  "five" 
[1] "one"   "two"   "three" "four"  "five" 


#### Select Multiple Columns

In [56]:
print( X[,c(2,3)] )        # by column number

     x2    x3
1   one  satu
2   two   dua
3 three  tiga
4  four empat
5 firve  lima


In [25]:
print( X[,c('x2','x3')] )  # by column names

       x2    x3
_1_   one  satu
_2_   two   dua
_3_ three  tiga
_4_  four empat
_5_  five  lima


### Select Rows
Single or Multi Rows selection return dataframe

#### Select Single Row

In [35]:
print( X[3,] )     # by row number
print( X['_3_',])  # by row names
print( is.data.frame(X[3, ])  )

    x1    x2   x3
_3_  3 three tiga
    x1    x2   x3
_3_  3 three tiga
[1] TRUE


In [29]:
print( typeof(X[3,])  )
print( is.data.frame(X[3,]) )

[1] "list"
[1] TRUE


#### Select Multiple Rows

In [22]:
print( X[3:5,] )                  # by row number
print( X[c('_3_','_4_','_5_'),] ) # by row names

    x1    x2    x3
_3_  3 three  tiga
_4_  4  four empat
_5_  5  five  lima
    x1    x2    x3
_3_  3 three  tiga
_4_  4  four empat
_5_  5  five  lima


Similar to single row, multi row selection is also a dataframe

In [34]:
print( typeof(X[3:5,])  )
print( is.data.frame(X[3:5,]) )

[1] "list"
[1] TRUE


### Data Frame Properties

#### Dimension
**dim(  )** returns vector of two elemens (rows,cols)

In [64]:
print( dim( iris )  )  # vector of two elements
print( dim(iris)[1] )  # get rows
print( dim(iris)[2] )  # get columns

[1] 150   5
[1] 150
[1] 5


**Number of Rows**

In [59]:
print( nrow(iris) )

[1] 150


**Number of Columns**  
It may not be intuitive, however, length() on dataframe return number of columns

In [58]:
print( ncol(iris) )
print( length(iris) )

[1] 5
[1] 5


#### Attributes
Let's see all available attributes for data frame

In [73]:
attributes(iris)

**Retrieve single attribute**

In [92]:
attr(iris,'names')

#### Names
**Column Names**  
- Observe that both **names()  and colnames()** return dataframe column names 
- However, **colnames(  )** is more intuitive

In [93]:
print( names(iris) )
print( colnames(iris) )

[1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"     
[1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"     


**Row Names**

In [76]:
rownames(iris)

## List
- List can hold many types of data including list
- Each data that it holds can have different length (unlike dataframe)

# Control Structure

## if..then..else

In [48]:
X = 1
if (X==1) 
    print('1')

[1] "1"


**{  } is necessary** when **else** presence. Otherwise error

In [43]:
X = 1
if (X ==1) {
    print('1')
} else {
    print('not one')
}

[1] "1"


## For Loop

Loop through any vector, numeric or character

### Loop through Vectors
**Number Vector**

In [4]:
for(i in 1:5)
    print(i)

[1] 1
[1] 2
[1] 3
[1] 4
[1] 5


**Character Vector**  

In [13]:
## Create the Data vector
students = c(10,15,16,18,20)
names(students) = c('ali','abu','ah kow','sammy','david')
print(students)

   ali    abu ah kow  sammy  david 
    10     15     16     18     20 


In [14]:
## Loop
for (i in 1:length(students)) {
    cat('Student name: ', names(students)[i], '\tage: ', students[i],'\n')
}

Student name:  ali 	age:  10 
Student name:  abu 	age:  15 
Student name:  ah kow 	age:  16 
Student name:  sammy 	age:  18 
Student name:  david 	age:  20 


### Break and Next
**Next Loop, skip anything remaining**

In [10]:
for(i in 1:10) {
    if (i%%2)  # if even number
        next
    else
        print(i)
}

[1] 2
[1] 4
[1] 6
[1] 8
[1] 10


**Exit Loop Immediately**

In [12]:
for(i in 1:10) {
    if (i>5)  # stop the loop entirely if i>5
        break
    else
        print(i)
}

[1] 1
[1] 2
[1] 3
[1] 4
[1] 5


## While Loop

### Loop through Number Vector

In [9]:
i = 1
while (i <= 5) {
    print(i)
    i = i+1
}

[1] 1
[1] 2
[1] 3
[1] 4
[1] 5


### Loop through Character Vector

In [1]:
students = c(10,15,16,18,20)
names(students) = c('ali','abu','ah kow','sammy','david')
print(students)

   ali    abu ah kow  sammy  david 
    10     15     16     18     20 


In [2]:
i = 1
while (i <= length(students)) {
    cat('Student name: ', names(students)[i], '\tage: ', students[i],'\n')
    i = i+1
}

Student name:  ali 	age:  10 
Student name:  abu 	age:  15 
Student name:  ah kow 	age:  16 
Student name:  sammy 	age:  18 
Student name:  david 	age:  20 


### Next and Break

**Next will continue to next loop, skip remaining statements**

In [10]:
i = 0
while (i <= 5) {
    i = i + 1
    if ( i%%2 ) ( # if odd number, skip
        next
    )
    print(i)
}

[1] 2
[1] 4
[1] 6


**Break will entirely exit the loop**

In [11]:
i = 0
while (i <= 5) {
    i = i + 1
    if ( i==3 ) ( 
        break
    )
    print(i)
}

[1] 1
[1] 2


## Repeat Loop
Repeat is a forever loop with no conditional matching  
Use **break** to exit

In [21]:
i = 0
repeat {
    i = i + 1
    if (i%%2) { # if odd number, skip
        next
        }
    if (i>10) { # if >5, stop
        break
        }
    print(i)
}

[1] 2
[1] 4
[1] 6
[1] 8
[1] 10


# Data Generation

## Numbers

### Sequantial Number
#### Incremental 
**Incremental by 1 Step**

In [10]:
print( 3:12)
print( seq (3, 12) )        # integer increment, default by=1

 [1]  3  4  5  6  7  8  9 10 11 12
 [1]  3  4  5  6  7  8  9 10 11 12


In [11]:
print( 3.3:12.5 )
print( seq (3.3, 12.5) )    # double increment, default by 1

 [1]  3.3  4.3  5.3  6.3  7.3  8.3  9.3 10.3 11.3 12.3
 [1]  3.3  4.3  5.3  6.3  7.3  8.3  9.3 10.3 11.3 12.3


**Incrementaal by x Step**

In [18]:
print( seq (3, 12, by = 4) )           # increment of integer
print( seq (3.25, 12.50, by = 2.25))   # increment of decimal 

[1]  3  7 11
[1]  3.25  5.50  7.75 10.00 12.25


In [31]:
seq(3, 17, length.out=6)   # 

**Incremental by Equal Spreading**

In [28]:
seq(15,3, length.out=6)

#### Decremental

**Decrement by 1 Step**

In [36]:
print( 12:3 )   
print( seq(12,3)  )            # default by -1

 [1] 12 11 10  9  8  7  6  5  4  3
 [1] 12 11 10  9  8  7  6  5  4  3


**Decrement by X Step**

In [37]:
print( seq(12,3, by = -3)  )               # integer
print( seq(12.50, 3.25, by = -1.25)  )     # double

[1] 12  9  6  3
[1] 12.50 11.25 10.00  8.75  7.50  6.25  5.00  3.75


**Decrement by Equal Spreading**

In [43]:
print( seq(12.50, 3.25, length.out=6) )    # double

[1] 12.50 10.65  8.80  6.95  5.10  3.25


## Random Numbers

### Normal Distribution

In [46]:
print( rnorm(8, mean=3,sd=1.25) )

[1] 2.461450 1.433926 3.370971 1.940777 2.936089 1.569709 3.040864 4.871685


### Unified Distribution

In [47]:
print (runif(8,min = 3, max=10))

[1] 9.009766 8.394454 7.408255 4.586779 7.480465 5.259408 7.688593 6.852577


## Factor

### Random Non Ordered Factor

In [118]:
rf2 = gl(n=2, k=3, length = 12, c('Aaa','Bbb','Ccc','Ddd'))
print( rf2 )


 [1] Aaa Aaa Aaa Bbb Bbb Bbb Aaa Aaa Aaa Bbb Bbb Bbb
Levels: Aaa Bbb Ccc Ddd


### Random Ordered Factor

In [97]:
rf1 = gl( 3, 5, 9, c('Aaa','Bbb','Ccc'), ordered=TRUE )
print( rf1 )

[1] Aaa Aaa Aaa Aaa Aaa Bbb Bbb Bbb Bbb
Levels: Aaa < Bbb < Ccc


## Sampling

### Without Replacement

Sample size MUST not be larger than population**

In [60]:
X = c(1,2,3,4,5,6,7,8)
print( sample(X, size = 5) )

[1] 1 7 4 6 2


### Sampling With Replacement

In [61]:
print( sample(X,size = 8, replace=TRUE) )

[1] 5 6 1 5 6 8 3 2


# Statistics

In [119]:
summary( mtcars )

      mpg             cyl             disp             hp       
 Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
 1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
 Median :19.20   Median :6.000   Median :196.3   Median :123.0  
 Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
 3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
 Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
      drat             wt             qsec             vs        
 Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
 1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
 Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
 Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
 3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
 Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
       am              gear            carb      
 Min.   :0.0000   Min.   :3.000  