[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/yingwang-git/R_StatisticAnalysis/HEAD?filepath=0_ProductivityTips.ipynb)

**Click above button to run code online. It may take a long time to load.**

# Some Useful Usages in R

## Read multiple files

In [1]:
# read multiple csv files in one time
data_list <- list("cars.csv", "Boston.csv")  # names of data files
name_list <- list("data_cars", "data_Boston")  # names assigned to data files in R

In [33]:
# use paste0 to combine the folder and file name.
for(i in seq_along(data_list)){
  assign(name_list[[i]], read.csv(paste0("data/", data_list[[i]]), header = T))
}

In [34]:
# see all objects in the workspace
# all csv files were sucessfully imported.
ls()

In [35]:
# see more details
ls.str()

data_Bostone : 'data.frame':	506 obs. of  14 variables:
 $ crim   : num  0.00632 0.02731 0.02729 0.03237 0.06905 ...
 $ zn     : num  18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
 $ indus  : num  2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
 $ chas   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ nox    : num  0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
 $ rm     : num  6.58 6.42 7.18 7 7.15 ...
 $ age    : num  65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
 $ dis    : num  4.09 4.97 4.97 6.06 6.06 ...
 $ rad    : int  1 2 2 3 3 3 5 5 5 5 ...
 $ tax    : int  296 242 242 222 222 222 311 311 311 311 ...
 $ ptratio: num  15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
 $ black  : num  397 397 393 395 397 ...
 $ lstat  : num  4.98 9.14 4.03 2.94 5.33 ...
 $ medv   : num  24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
data_cars : 'data.frame':	50 obs. of  2 variables:
 $ speed: int  4 4 7 7 8 9 10 10 10 11 ...
 $ dist : int  2 10 4 22 16 10 18 26 34 17 ...
data_li

## Run multiple models in one time

In [2]:
# create simulating data
set.seed(2022)
data <- data.frame(y1 = rnorm(n = 200, mean = 10, sd = 5),
                   y2 = rnorm(n = 200, mean = 15, sd = 10),
                   y3 = rnorm(n = 200, mean = 12, sd = 10),
                   x1 = runif(n = 200, min = 0, max = 100),
                   x2 = runif(n = 200, min = 0, max = 2100),
                   x3 = runif(n = 200, min = 50, max = 200)
)

In [4]:
# see the first columns of the simulating data
head(data)

Unnamed: 0_level_0,y1,y2,y3,x1,x2,x3
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,14.50071,18.764746,20.25341,64.64432,447.0848,135.61489
2,4.133271,13.054604,-9.50715,56.0843,177.6777,165.73911
3,5.512573,-6.15336,14.75747,44.13558,255.8928,80.8682
4,2.777493,5.869326,22.67206,71.54717,750.2817,137.52002
5,8.344932,18.497093,29.19872,47.93445,347.8567,67.66569
6,-4.503145,19.055771,14.99965,56.57396,1904.4356,162.99925


### Same Y, different Xs

In [7]:
# model 1: y1 ~ x1
# model 2: y1 ~ x2
# model 3: y1 ~ x3

for (i in 4:6){  # Xs in columns 4-6
  print(names(data[i]))
  lm <- lm(y1 ~ data[[i]], data = data)  # can add control variables. e.g. data[[i]] + control1 + control2
  result <- summary(lm)
  print(result)
}

[1] "x1"

Call:
lm(formula = y1 ~ data[[i]], data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-14.4525  -3.7203   0.2322   3.9946  14.7903 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  9.18386    0.76040  12.078   <2e-16 ***
data[[i]]    0.01353    0.01372   0.986    0.325    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 5.152 on 198 degrees of freedom
Multiple R-squared:  0.004889,	Adjusted R-squared:  -0.0001365 
F-statistic: 0.9728 on 1 and 198 DF,  p-value: 0.3252

[1] "x2"

Call:
lm(formula = y1 ~ data[[i]], data = data)

Residuals:
    Min      1Q  Median      3Q     Max 
-14.642  -4.165   0.302   4.168  14.230 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 9.4485277  0.7432691  12.712   <2e-16 ***
data[[i]]   0.0003624  0.0005961   0.608    0.544    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error

## Same X, different Ys

In [8]:
# model 1: y1 ~ x1 + x2 + x3
# model 2: y2 ~ x1 + x2 + x3
# model 3: y3 ~ x1 + x2 + x3

# Method 1
for (i in 1:3){  # Ys in columns 1-3
  print(names(data[i]))
  lm <- lm(data[[i]] ~ x1 + x2 + x3, data = data)
  result <- summary(lm)
  print(result)
}

[1] "y1"

Call:
lm(formula = data[[i]] ~ x1 + x2 + x3, data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-15.4871  -3.6213   0.3281   4.0184  14.4518 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 6.5400836  1.4812961   4.415 1.67e-05 ***
x1          0.0118962  0.0137922   0.863   0.3894    
x2          0.0004632  0.0005955   0.778   0.4376    
x3          0.0177226  0.0087930   2.016   0.0452 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 5.119 on 196 degrees of freedom
Multiple R-squared:  0.02771,	Adjusted R-squared:  0.01282 
F-statistic: 1.862 on 3 and 196 DF,  p-value: 0.1374

[1] "y2"

Call:
lm(formula = data[[i]] ~ x1 + x2 + x3, data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-24.9643  -6.5928  -0.2791   5.9426  26.9041 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) 18.2691519  2.7265385   6.700 2.15e-10 ***
x1     

In [9]:
# Method 2
lm <- lm(cbind(y1, y2, y3) ~ x1 + x2 + x3, data = data)
summary(lm)

Response y1 :

Call:
lm(formula = y1 ~ x1 + x2 + x3, data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-15.4871  -3.6213   0.3281   4.0184  14.4518 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 6.5400836  1.4812961   4.415 1.67e-05 ***
x1          0.0118962  0.0137922   0.863   0.3894    
x2          0.0004632  0.0005955   0.778   0.4376    
x3          0.0177226  0.0087930   2.016   0.0452 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 5.119 on 196 degrees of freedom
Multiple R-squared:  0.02771,	Adjusted R-squared:  0.01282 
F-statistic: 1.862 on 3 and 196 DF,  p-value: 0.1374


Response y2 :

Call:
lm(formula = y2 ~ x1 + x2 + x3, data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-24.9643  -6.5928  -0.2791   5.9426  26.9041 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) 18.2691519  2.7265385   6.700 2.15e-10 ***
x1        

## Different Ys, different Xs

In [12]:
# 9 models
# y1 ~ x1, y1 ~ x2, y1 ~ x3
# y2 ~ x1, y2 ~ x2, y2 ~ x3
# y3 ~ x1, y3 ~ x2, y3 ~ x3

for (i in 4:6){  # Xs in columns 4-6
  print(names(data[i]))
  lm <- lm(paste0("cbind(y1, y2, y3) ~ ", names(data[i])), data = data)
  # can add control variables. e.g., paste0("cbind(y1, y2, y3) ~ ", names(data[i]) "+ control1 + control2"
  result <- summary(lm)
  print(result)
}

[1] "x1"
Response y1 :

Call:
lm(formula = y1 ~ x1, data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-14.4525  -3.7203   0.2322   3.9946  14.7903 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  9.18386    0.76040  12.078   <2e-16 ***
x1           0.01353    0.01372   0.986    0.325    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 5.152 on 198 degrees of freedom
Multiple R-squared:  0.004889,	Adjusted R-squared:  -0.0001365 
F-statistic: 0.9728 on 1 and 198 DF,  p-value: 0.3252


Response y2 :

Call:
lm(formula = y2 ~ x1, data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-23.6249  -6.7293  -0.6903   5.8579  27.9805 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 15.92738    1.38714  11.482   <2e-16 ***
x1          -0.03147    0.02503  -1.257     0.21    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual s