# Group 15 - Final Project: Radial SVM

In [2]:
# packages
library(dplyr)
library(tidyverse)
library(ggplot2)
library(ggpubr)
library(e1071)
library(ROCR)

In [11]:
# train dataset : weighted SMOTE
train.x.wSMOTE = read.csv("X_train_wSMOTE.csv")
train.labels.wSMOTE = read.csv("Y_train_wSMOTE.csv") %>% 
                        transmute(Category = case_when(V1 == 0 ~ "BD",
                                                       V1 == 1 ~ "H",
                                                       V1 == 2 ~ "F",
                                                       V1 == 3 ~ "C")) %>%
                        mutate(Category = as.factor(Category))  
train.wSMOTE = data.frame(train.labels.wSMOTE, train.x.wSMOTE)

# test dataset :
test_x = read.csv("test_x.csv")
test_labels = read.csv("test_labels.csv") %>% 
                    transmute(Category = case_when(V1 == 0 ~ "BD",
                                                   V1 == 1 ~ "H",
                                                   V1 == 2 ~ "F",
                                                   V1 == 3 ~ "C")) %>%
                    mutate(Category = as.factor(Category)) 
test = data.frame(test_labels, test_x)
head(train.wSMOTE)

Unnamed: 0_level_0,Category,Age,Sex,ALB,ALP,ALT,AST,BIL,CHE,CHOL,CREA,GGT,PROT
Unnamed: 0_level_1,<fct>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,BD,32,0,38.5,70.3,18.0,24.7,3.9,11.17,4.8,74,15.6,76.5
2,BD,32,0,46.9,74.7,36.2,52.6,6.1,8.84,5.2,86,33.2,79.3
3,BD,32,0,43.2,52.0,30.6,22.6,18.9,7.33,4.74,80,33.8,75.7
4,BD,32,0,42.2,41.9,35.8,31.1,16.1,5.82,4.6,109,21.5,67.1
5,BD,32,0,44.3,52.3,21.7,22.4,17.2,4.15,3.57,78,24.1,75.4
6,BD,33,0,36.3,78.6,23.6,22.0,7.0,8.56,5.38,78,19.4,68.7


In [8]:
# initial settings
kernels <- c('radial')
cost0 <- 10^(seq(-2, 2, 0.1))
gamma0 <- 10^(seq(-5, 5, 0.1))
degree0 <- c(1, 2, 3, 4, 5)

# vector of lists to store tuning results
tune_result.wSMOTE <- vector('list', 1)

# reproducability
set.seed(0)

# tuning the radial kernel SVM
tune_result.wSMOTE = tune(svm, Category ~ ., data = train.wSMOTE, kernel = kernels,
                          scale = FALSE, range = list(cost = cost0, gamma = gamma0),
                          tunecontrol = tune.control(cross = 5))

In [9]:
##### Best model for radial kernel
# hyper parameters for the best model
hyper = tune_result.wSMOTE$best.parameters
# radial kernel SVM with the hyper parameters and probabilities
best.models.wSMOTE <- svm(Category ~ ., data = train.wSMOTE, probability = TRUE,
                          kernel = kernels, scale = FALSE, cost = hyper[,1], gamma = hyper[,2])

In [17]:
##### Results for the best model
hyper

Unnamed: 0_level_0,cost,gamma
Unnamed: 0_level_1,<dbl>,<dbl>
765,3.981072,0.0006309573


In [15]:
##### Predicted classes & Confusion matrix & Overall accuracy & Confusion matrix & Recall & Precision & F-1 score
results.wSMOTE <- vector('list', 1)

    # array to store results for each model
    a <- array(list(), 2)

    #### training data
    ## prediction
    pred = predict(best.models.wSMOTE, train.wSMOTE[, -1])
    ## confusion matrix
    confusion = table(predict = pred, truth = train.wSMOTE$Category)
    ## overall error
    overall = mean(pred == train.wSMOTE$Category)
    ## error for each class
    each.class = round(diag(confusion)/table(train.wSMOTE$Category), 2)
    a[[1]] <- list(pred = pred, confusion = confusion, overall = overall, 
                   recall.each.class = each.class)
    #### test data
    ## prediction
    pred = predict(best.models.wSMOTE, test[, -1])
    ## confusion matrix
    confusion = table(truth = test$Category, predict = pred)
    ## overall accuracy
    accuracy = mean(pred == test$Category)
    ## recall 
    recall = round(diag(confusion)/table(test$Category), 2)
    ## precision 
    precision = round(diag(confusion)/table(pred),2)
    ## F-1 score 
    F_1 = round(2*recall*precision/(recall+precision), 2)
    a[[2]] <- list(pred = pred, confusion = confusion, accuracy = accuracy, 
                   precision = precision, recall = recall, F_1 = F_1)
    
    # combine training & test results into 'results' list
    results.wSMOTE <- a

In [16]:
##### Display the results
results.wSMOTE[[2]]

$pred
  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 
 BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD 
 21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40 
 BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD   H  BD  BD 
 41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60 
 BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD 
 61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80 
 BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD   C  BD  BD  BD  BD 
 81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 
 BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD 
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 
 BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD  BD 
121 122 123 124 125 12