## Multicollinearity

In [None]:
library(tidyverse)
library(caret)

In [None]:
data("Boston", package = "MASS")

In [None]:
model1 <- lm(medv ~., data = Boston)

In [None]:
?MASS::Boston

In [None]:
summary(model1)

In [None]:
car::vif(model1)

In [None]:
model2 <- lm(medv ~. -tax, data = Boston)

In [None]:
summary(model2)

## Multicollinearity 2

In [None]:
# load packages
library(MASS)
library(mvtnorm)

# set number of observations
n <- 50

# initialize vectors of coefficients
coefs1 <- cbind("hat_beta_1" = numeric(10000), "hat_beta_2" = numeric(10000))
coefs2 <- coefs1

# set seed
set.seed(1)

# loop sampling and estimation
for (i in 1:10000) {
  
  # for cov(X_1,X_2) = 0.25
  X <- rmvnorm(n, c(50, 100), sigma = cbind(c(10, 2.5), c(2.5, 10)))
  u <- rnorm(n, sd = 5)
  Y <- 5 + 2.5 * X[, 1] + 3 * X[, 2] + u
  coefs1[i, ] <- lm(Y ~ X[, 1] + X[, 2])$coefficients[-1]
  
  # for cov(X_1,X_2) = 0.85
  X <- rmvnorm(n, c(50, 100), sigma = cbind(c(10, 8.5), c(8.5, 10)))
  Y <- 5 + 2.5 * X[, 1] + 3 * X[, 2] + u
  coefs2[i, ] <- lm(Y ~ X[, 1] + X[, 2])$coefficients[-1]
  
}

# obtain variance estimates
diag(var(coefs1))
#> hat_beta_1 hat_beta_2 
#> 0.05674375 0.05712459
diag(var(coefs2))
#> hat_beta_1 hat_beta_2 
#>  0.1904949  0.1909056