# Group 15 - Final Project: SVM

Try three different kernel SVM with weights tuning & with SMOTE data

In [3]:
# packages
library(dplyr)
library(tidyverse)
library(ggplot2)
library(ggpubr)
library(e1071)
library(ROCR)

## Support Vector Machine

As we did in hw, we are going to use three different kernels for support vector machine classify the level of `staging` of a subject in this analysis. They are `Linear`, `Radial`, and `Polynomial`. We will first split the whole data set into training and test data. We will use 70% of the whole data for training models.

We will try three different ways:

1) Weighting each class when tuning

3) Using weighted SMOTE data

In [12]:
# load data
train_x = read.csv("train_x.csv")
train_labels = read.csv("train_labels.csv") %>%
                transmute(Category = case_when(V1 == 0 ~ "BD",
                                               V1 == 1 ~ "H",
                                               V1 == 2 ~ "F",
                                               V1 == 3 ~ "C")) %>%
                mutate(Category = as.factor(Category))

test_x = read.csv("test_x.csv")
test_labels = read.csv("test_labels.csv") %>% 
                    transmute(Category = case_when(V1 == 0 ~ "BD",
                                                   V1 == 1 ~ "H",
                                                   V1 == 2 ~ "F",
                                                   V1 == 3 ~ "C")) %>%
                    mutate(Category = as.factor(Category))   
# train & test data
train = data.frame(train_labels, train_x)
test = data.frame(test_labels, test_x)

In [19]:
# class weights
round((nrow(train))/(table(train$Category)*4),1)


 BD   C   F   H 
0.3 5.1 7.2 6.3 

In [7]:
# initial settings
kernels <- c('linear', 'radial', 'polynomial')
cost0 <- 10^(seq(-2, 2, 0.1))
gamma0 <- 10^(seq(-5, 5, 0.1))
degree0 <- c(1, 2, 3, 4, 5)
CW <- c('BD' = 0.3, 'C' = 5.1, 'F' = 7.2, 'H' = 6.3)

# vector of lists to store tuning results
tune_result <- vector('list', 3)

In [22]:
# tuning all models with different kernels
for(i in 1:length(kernels)){
  # reproducability
    set.seed(0)
    if(kernels[i] == 'linear'){
        tune_result[[i]] = tune(svm, Category ~ ., data = train, kernel = kernels[i],
                                scale = FALSE, range = list(cost = cost0),
                                class.weights = CW, tunecontrol = tune.control(cross = 5))
    }else if(kernels[i] == 'radial'){
        tune_result[[i]] = tune(svm, Category ~ ., data = train, kernel = kernels[i],
                                scale = FALSE, range = list(cost = cost0, gamma = gamma0),
                                class.weights = CW, tunecontrol = tune.control(cross = 5))
    }else{
        tune_result[[i]] = tune(svm, Category ~ ., data = train, kernel = kernels[i],
                                scale = FALSE, range = list(cost = cost0, degree = degree0),
                                class.weights = CW, tunecontrol = tune.control(cross = 5))
    }
}

### Best models for each kernel

In [None]:
# vector of lists to store svm results
best.models <- vector('list', 3)

for(i in 1:length(kernels)){
    hyper = tune_result[[i]]$best.parameters
    if(kernels[i] == 'linear'){
        best.models[[i]] <- svm(Category ~ ., data = train, kernel = kernels[i], 
                                scale = FALSE, cost = hyper[,1], class.weights = CW)
    }else if(kernels[i] == 'radial'){
        best.models[[i]] <- svm(Category ~ ., data = train, kernel = kernels[i], 
                                scale = FALSE, cost = hyper[,1], gamma = hyper[,2],
                                class.weights = CW)
    }else{
        best.models[[i]] <- svm(Category ~ ., data = train, kernel = kernels[i], 
                                scale = FALSE, cost = hyper[,1], degree = hyper[,2],
                                class.weights = CW)
    }
}

In [None]:
# Prediction & Confusion Matrix: Training dataset
results <- vector('list', length(kernels))
for(i in 1:length(kernels)){
    # array to store results for each model
    a <- array(list(), 2)
    # training data
    ## prediction
    pred = predict(best.models[[i]], train[, -1])
    ## confusion matrix
    confusion = table(predict = pred, truth = train$Category)
    ## overall error
    overall = mean(pred == train$Category)
    ## error for each class
    each.class = round(diag(confusion)/table(train$Category), 2)
    a[[1]] <- list(pred = pred, confusion = confusion, overall = overall, 
                   recall.each.class = each.class)
    # test data
    ## prediction
    pred = predict(best.models[[i]], test[, -1])
    ## confusion matrix
    confusion = table(truth = test$Category, predict = pred)
    ## overall error
    accuracy = mean(pred == test$Category)
    ## recall for each class
    recall = round(diag(confusion)/table(test$Category), 2)
    ## precision for each class
    precision = round(diag(confusion)/table(pred),2)
    a[[2]] <- list(pred = pred, confusion = confusion, accuracy = accuracy, 
                   recall = recall, precision = precision)
    
    # combine training & test results into 'results' list
    results[[i]] <- a
}

## Results for test

In [19]:
results[[1]][[2]][3:4]; results[[2]][[2]][3:4]; results[[3]][[2]][3:4]

ERROR: Error in eval(expr, envir, enclos): object 'results' not found


ERROR: Error in eval(expr, envir, enclos): object 'results' not found


ERROR: Error in eval(expr, envir, enclos): object 'results' not found


# Weighted SMOTE 

In [4]:
# change the file name to "X_train_wSMOTE.csv"
train.x.wSMOTE = read.csv("X_train_wSMOTE.csv")
head(train.x.wSMOTE)

Unnamed: 0_level_0,Age,Sex,ALB,ALP,ALT,AST,BIL,CHE,CHOL,CREA,GGT,PROT
Unnamed: 0_level_1,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,32,0,38.5,70.3,18.0,24.7,3.9,11.17,4.8,74,15.6,76.5
2,32,0,46.9,74.7,36.2,52.6,6.1,8.84,5.2,86,33.2,79.3
3,32,0,43.2,52.0,30.6,22.6,18.9,7.33,4.74,80,33.8,75.7
4,32,0,42.2,41.9,35.8,31.1,16.1,5.82,4.6,109,21.5,67.1
5,32,0,44.3,52.3,21.7,22.4,17.2,4.15,3.57,78,24.1,75.4
6,33,0,36.3,78.6,23.6,22.0,7.0,8.56,5.38,78,19.4,68.7


In [5]:
# train dataset : SMOTE
train.x.wSMOTE = read.csv("X_train_wSMOTE.csv")
train.labels.wSMOTE = read.csv("Y_train_wSMOTE.csv") %>% 
                        transmute(Category = case_when(V1 == 0 ~ "BD",
                                                       V1 == 1 ~ "H",
                                                       V1 == 2 ~ "F",
                                                       V1 == 3 ~ "C")) %>%
                        mutate(Category = as.factor(Category))  
train.wSMOTE = data.frame(train.labels.wSMOTE, train.x.wSMOTE)

In [8]:
# vector of lists to store tuning results
tune_result.wSMOTE <- vector('list', 3)

# tuning all models with different kernels
for(i in 1:length(kernels)){
  # reproducability
    set.seed(0)
    if(kernels[i] == 'linear'){
        tune_result.wSMOTE[[i]] = tune(svm, Category ~ ., data = train.wSMOTE, 
                                      kernel = kernels[i], scale = FALSE, 
                                      range = list(cost = cost0),
                                      tunecontrol = tune.control(cross = 5))
    }else if(kernels[i] == 'radial'){
        tune_result.wSMOTE[[i]] = tune(svm, Category ~ ., data = train.wSMOTE, 
                                      kernel = kernels[i],
                                      scale = FALSE, range = list(cost = cost0, 
                                                                  gamma = gamma0),
                                      tunecontrol = tune.control(cross = 5))
    }else{
        tune_result.wSMOTE[[i]] = tune(svm, Category ~ ., data = train.wSMOTE, 
                                      kernel = kernels[i],
                                      scale = FALSE, range = list(cost = cost0, 
                                                                  degree = degree0),
                                      tunecontrol = tune.control(cross = 5))
    }
}

In [9]:
# vector of lists to store svm results
best.models.wSMOTE <- vector('list', 3)

for(i in 1:length(kernels)){
    hyper = tune_result.wSMOTE[[i]]$best.parameters
    if(kernels[i] == 'linear'){
        best.models.wSMOTE[[i]] <- svm(Category ~ ., data = train.wSMOTE, probability = TRUE,
                                      kernel = kernels[i], scale = FALSE, cost = hyper[,1])
    }else if(kernels[i] == 'radial'){
        best.models.wSMOTE[[i]] <- svm(Category ~ ., data = train.wSMOTE, probability = TRUE,
                                      kernel = kernels[i], scale = FALSE, 
                                      cost = hyper[,1], gamma = hyper[,2])
    }else{
        best.models.wSMOTE[[i]] <- svm(Category ~ ., data = train.wSMOTE, probability = TRUE,
                                      kernel = kernels[i], scale = FALSE, 
                                      cost = hyper[,1], degree = hyper[,2])
    }
}

In [13]:
# Prediction & Confusion Matrix: Training dataset
results.wSMOTE <- vector('list', length(kernels))
for(i in 1:length(kernels)){
    # array to store results for each model
    a <- array(list(), 2)
    # training data
    ## prediction
    pred = predict(best.models.wSMOTE[[i]], train.wSMOTE[, -1])
    ## confusion matrix
    confusion = table(predict = pred, truth = train.wSMOTE$Category)
    ## overall error
    overall = mean(pred == train.wSMOTE$Category)
    ## error for each class
    each.class = round(diag(confusion)/table(train.wSMOTE$Category), 2)
    a[[1]] <- list(pred = pred, confusion = confusion, overall = overall, 
                   recall.each.class = each.class)
    # test data
    ## prediction
    pred = predict(best.models.wSMOTE[[i]], test[, -1])
    ## confusion matrix
    confusion = table(truth = test$Category, predict = pred)
    ## overall error
    accuracy = mean(pred == test$Category)
    ## recall for each class
    recall = round(diag(confusion)/table(test$Category), 2)
    ## precision for each class
    precision = round(diag(confusion)/table(pred),2)
    ## F-1 score 
    F_1 = round(2*recall*precision/(recall+precision), 2)
    
    a[[2]] <- list(pred = pred, confusion = confusion, accuracy = accuracy, 
                   recall = recall, precision = precision, F_1 = F_1)
    
    # combine training & test results into 'results' list
    results.wSMOTE[[i]] <- a
}

In [14]:
print("Results for linear SVM")
results.wSMOTE[[1]][[2]][3:6]; 
print("Results for radial kernel SVM")
results.wSMOTE[[2]][[2]][3:6]; 
print("Results for polynomial kernel SVM")
results.wSMOTE[[3]][[2]][3:6]

[1] "Results for linear SVM"


$accuracy
[1] 0.9130435

$recall

  BD    C    F    H 
0.99 0.44 0.17 0.43 

$precision
pred
  BD    C    F    H 
0.98 0.80 0.14 0.38 

$F_1

  BD    C    F    H 
0.98 0.57 0.15 0.40 


[1] "Results for radial kernel SVM"


$accuracy
[1] 0.923913

$recall

  BD    C    F    H 
0.97 0.78 0.17 0.71 

$precision
pred
  BD    C    F    H 
0.99 0.54 0.25 0.62 

$F_1

  BD    C    F    H 
0.98 0.64 0.20 0.66 


[1] "Results for polynomial kernel SVM"


$accuracy
[1] 0.9130435

$recall

  BD    C    F    H 
0.99 0.44 0.17 0.43 

$precision
pred
  BD    C    F    H 
0.98 0.80 0.14 0.38 

$F_1

  BD    C    F    H 
0.98 0.57 0.15 0.40 


In [15]:
# tables: recalls
all <- matrix(rep(0, 24),  6, 4, byrow=TRUE)
colnames(all) <- c('BD', 'C', 'F', 'H')
rownames(all) <- c('weighted-linear', 'weighted-radial', 'weighted-polynomial',
                   'weightedSMOTE-linear', 'weightedSMOTE-radial', 'weightedSMOTE-plynomial')
# recalls for SVM with weights
all[1,]<-unlist(results[[1]][[2]][4])
all[2,]<-unlist(results[[2]][[2]][4])
all[3,]<-unlist(results[[3]][[2]][4])
# recalls for SVM with SMOTE
all[4,]<-unlist(results.wSMOTE[[1]][[2]][4])
all[5,]<-unlist(results.wSMOTE[[2]][[2]][4])
all[6,]<-unlist(results.wSMOTE[[3]][[2]][4])
all

Unnamed: 0,BD,C,F,H
weighted-linear,0.0,0.0,0.0,0.0
weighted-radial,0.0,0.0,0.0,0.0
weighted-polynomial,0.0,0.0,0.0,0.0
weightedSMOTE-linear,0.99,0.44,0.17,0.43
weightedSMOTE-radial,0.97,0.78,0.17,0.71
weightedSMOTE-plynomial,0.99,0.44,0.17,0.43


In [17]:
# tables: precision
p<- matrix(rep(0, 24),  6, 4, byrow=TRUE)
colnames(p) <- c('BD', 'C', 'F', 'H')
rownames(p) <- c('weighted-linear', 'weighted-radial', 'weighted-polynomial',
                 'weightedSMOTE-linear', 'weightedSMOTE-radial', 'weightedSMOTE-plynomial')
# precision for SVM with weights
p[1,]<-unlist(results[[1]][[2]][5])
p[2,]<-unlist(results[[2]][[2]][5])
p[3,]<-unlist(results[[3]][[2]][5])
# precision for SVM with SMOTE
p[4,]<-unlist(results.wSMOTE[[1]][[2]][5])
p[5,]<-unlist(results.wSMOTE[[2]][[2]][5])
p[6,]<-unlist(results.wSMOTE[[3]][[2]][5])
p

Unnamed: 0,BD,C,F,H
weighted-linear,0.0,0.0,0.0,0.0
weighted-radial,0.0,0.0,0.0,0.0
weighted-polynomial,0.0,0.0,0.0,0.0
weightedSMOTE-linear,0.98,0.8,0.14,0.38
weightedSMOTE-radial,0.99,0.54,0.25,0.62
weightedSMOTE-plynomial,0.98,0.8,0.14,0.38


In [18]:
# tables: F-1
f<- matrix(rep(0, 24),  6, 4, byrow=TRUE)
colnames(f) <- c('BD', 'C', 'F', 'H')
rownames(f) <- c('weighted-linear', 'weighted-radial', 'weighted-polynomial',
                 'weightedSMOTE-linear', 'weightedSMOTE-radial', 'weightedSMOTE-plynomial')
# precision for SVM with weights
f[1,]<-unlist(results[[1]][[2]][6])
f[2,]<-unlist(results[[2]][[2]][6])
f[3,]<-unlist(results[[3]][[2]][6])
# precision for SVM with SMOTE
f[4,]<-unlist(results.wSMOTE[[1]][[2]][6])
f[5,]<-unlist(results.wSMOTE[[2]][[2]][6])
f[6,]<-unlist(results.wSMOTE[[3]][[2]][6])
f

Unnamed: 0,BD,C,F,H
weighted-linear,0.0,0.0,0.0,0.0
weighted-radial,0.0,0.0,0.0,0.0
weighted-polynomial,0.0,0.0,0.0,0.0
weightedSMOTE-linear,0.98,0.57,0.15,0.4
weightedSMOTE-radial,0.98,0.64,0.2,0.66
weightedSMOTE-plynomial,0.98,0.57,0.15,0.4


We want to get the best performance on C. SVM with polynomial kernel with weighted tuning and SVM with polynomial kernel with weighted SMOTE data. For other classes, **SVM with weighted polynomial has the best recall**.