In [None]:
BreastCancer <- read.csv("wisc_bc_data.csv", stringsAsFactors = FALSE)
summary(BreastCancer)[1:4]

str(BreastCancer)

BreastCancer[1:5, 1:5]

## Data Preparation



**To see the variables and the columns
**


In [None]:
#To see the variables and the columns
dim(BreastCancer)

In [None]:
#Check for missing data
sum(is.na(data))

In [None]:
table(BreastCancer$diagnosis)

Test the Benign and Malignant



In [None]:
library(reshape2)
bc <- BreastCancer[, -1]
bc.melt <- melt(bc, id.var = "diagnosis")
head(bc.melt)
library(reshape2)
bc <- BreastCancer[, -1]
bc.melt <- melt(bc, id.var = "diagnosis")
head(bc.melt)

ggplot(data = bc.melt, aes(x = diagnosis, y = log(value + 1), fill = diagnosis)) +
    geom_boxplot() + theme_bw() + facet_wrap(~variable, ncol = 8)



In [None]:
library(tidyverse)
data <- select(BreastCancer, -1) %>Test the Benign and Malignant

%
    mutate_at("diagnosis", as.factor)
data[1:5, 1:5]

corrplot::corrplot(cor(data[, -1]))

### Data for training and testing



Train data and test data (split data 7:3)



In [None]:
library(sampling)
set.seed(123)
train_id <- strata(data, "diagnosis", size = rev(round(table(data$diagnosis) * 0.7)))$ID_unit
train_data <- data[train_id, ]
test_data <- data[-train_id, ]Test the Benign and Malignant



Check that the data is split to ensure the same proportion of positive and negative samples in the training and test data


In [None]:
# View the proportion of positive and negative samples in training and test data
prop.table(table(train_data$diagnosis))

prop.table(table(test_data$diagnosis))

Train the SVM Linear Kernel Function

In [None]:
set.seed(123)
linear.tune <- tune.svm(diagnosis ~ ., data = train_data, kernel = "linear", cost = c(0.001,
    0.01, 0.1, 1, 5, 10))
summary(linear.tune)

best.linear <- linear.tune$best.model
tune.test <- predict(best.linear, newdata = test_data)
table(tune.test, test_data$diagnosis)

confusionMatrix(tune.test, test_data$diagnosis, positive = "B")

set.seed(123)
rfeCNTL <- rfeControl(functions = lrFuncs, method = "cv", number = 10)
svmLinear <- rfe(train_data[, -1], train_data[, 1], sizes = c(7, 6, 5, 4), rfeControl = rfeCNTL,
    method = "svmLinear")
svmLinear

vec <- names(coefficients(svmLinear$fit))[-1]
var <- paste(vec, collapse = "+")
fun <- as.formula(paste("diagnosis", "~", var))
svm <- svm(fun, data = train_data, kernel = "linear")
Linear.predict = predict(svm, newdata = test_data[, vec])
table(Linear.predict, test_data$diagnosis)

confusionMatrix(Linear.predict, test_data$diagnosis, positive = "B")



Train the SVM Polynomial Kernel Function

In [None]:
set.seed(123)
poly.tune <- tune.svm(diagnosis ~ ., data = train_data, kernel = "polynomial", degree = c(3,
    4, 5), coef0 = c(0.1, 0.5, 1, 2, 3, 4))
summary(poly.tune)

best.poly <- poly.tune$best.model
poly.test <- predict(best.poly, newdata = test_data)
table(poly.test, test_data$diagnosis)

confusionMatrix(poly.test, test_data$diagnosis, positive = "B")

set.seed(123)
svmPoly <- rfe(train_data[, -1], train_data[, 1], sizes = c(7, 6, 5, 4), rfeControl = rfeCNTL,
    method = "svmPoly")
svmPoly

vec <- names(coefficients(svmPoly$fit))[-1]
var <- paste(vec, collapse = "+")
fun <- as.formula(paste("diagnosis", "~", var))
svm <- svm(fun, data = train_data, kernel = "poly")
Poly.predict = predict(svm, newdata = test_data[, vec])
table(Poly.predict, test_data$diagnosis)



Train the SVM Radio Kernel Function

In [None]:
set.seed(123)
rbf.tune <- tune.svm(diagnosis ~ .Train the SVM Poly model, data = train_data, kernel = "radial", gamma = c(0.1,
    0.5, 1, 2, 3, 4))
summary(rbf.tune)

best.rbf <- rbf.tune$best.model
rbf.test <- predict(best.rbf, newdata = test_data)
table(rbf.test, test_data$diagnosis)

confusionMatrix(rbf.test, test_data$diagnosis, positive = "B")

set.seed(123)
svmRadial <- rfe(train_data[, -1], train_data[, 1], sizes = c(7, 6, 5, 4), rfeControl = rfeCNTL,
    method = "svmRadial")
svmRadialTrain the SVM Poly model

vec <- names(coefficients(svmRadial$fit))[-1]
var <- paste(vec, collapse = "+")
fun <- as.formula(paste("diagnosis", "~", var))
svm <- svm(fun, data = train_data, kernel = "radial")
Radial.predict = predict(svm, newdata = test_data[, vec])
table(Radial.predict, test_data$diagnosis)

confusionMatrix(Radial.predict, test_data$diagnosis, positive = "B")


Train the SVM Sigmoid Kernel Function

In [None]:
set.seed(123)
sigmoid.tune <- tune.svm(diagnosis ~ ., data = train_data, kernel = "sigmoid", gamma = c(0.1,
    0.5, 1, 2, 3, 4), coef0 = c(0.1, 0.5, 1, 2, 3, 4))
summary(sigmoid.tune)

best.sigmoid <- sigmoid.tune$best.model
sigmoid.test <- predict(best.sigmoid, newdata = test_data)
table(sigmoid.test, test_data$diagnosis)

confusionMatrix(sigmoid.test, test_data$diagnosis, positive = "B")

set.seed(123)
rfeCNTL <- rfeControl(functions = lrFuncs, method = "cv", number = 10)
svmSigmoid <- rfe(train_data[, -1], train_data[, 1], sizes = c(7, 6, 5, 4), rfeControl = rfeCNTL,
    method = "svmSigmoid")
svmSigmoid

vec <- names(coefficients(svmSigmoid$fit))[-1]
var <- paste(vec, collapse = "+")
fun <- as.formula(paste("diagnosis", "~", var))
svm <- svm(fun, data = train_data, kernel = "sigmoid")
Sigmoid.predict = predict(svm, newdata = test_data[, vec])
table(Sigmoid.predict, test_data$diagnosis)

