# Cervical Cancer Risk Classification (Support Vector Classification)

In [1]:
import pandas as pd
import numpy as np

In [2]:
dataset = pd.read_csv('kag_risk_factors_cervical_cancer.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
print(X)

[[18 '4.0' '15.0' ... 0 0 0]
 [15 '1.0' '14.0' ... 0 0 0]
 [34 '1.0' '?' ... 0 0 0]
 ...
 [25 '2.0' '17.0' ... 0 0 1]
 [33 '2.0' '24.0' ... 0 0 0]
 [29 '2.0' '20.0' ... 0 0 0]]


In [4]:
print(y)

[0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0
 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 

## Handling Missing Data

In [13]:
from sklearn.impute import SimpleImputer
simple_imputer = SimpleImputer(missing_values = '?', strategy = 'constant', fill_value = '0')
X = simple_imputer.fit_transform(X)

## Converting all features to float

In [14]:
X = X.astype(float)

In [15]:
print(X)

[[18.  4. 15. ...  0.  0.  0.]
 [15.  1. 14. ...  0.  0.  0.]
 [34.  1.  0. ...  0.  0.  0.]
 ...
 [25.  2. 17. ...  0.  0.  1.]
 [33.  2. 24. ...  0.  0.  0.]
 [29.  2. 20. ...  0.  0.  0.]]


## Splitting dataset to train and test dataset

In [19]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [20]:
print(len(X_train))

686


In [15]:
print(len(X_test))

172


In [16]:
print(len(y_train))

686


In [17]:
print(len(y_test))

172


## Featuer Scaling

In [21]:
from sklearn.preprocessing import StandardScaler
std_scaler = StandardScaler()
X_train = std_scaler.fit_transform(X_train)
X_test = std_scaler.fit_transform(X_test)

In [22]:
print(X_train)

[[ 0.12568315  0.28789742  0.37838763 ... -0.22480211 -0.32352726
  -0.22120051]
 [ 0.00643192  0.85869982 -0.2433463  ... -0.22480211 -0.32352726
  -0.22120051]
 [ 0.48343686  0.28789742 -0.86508023 ... -0.22480211 -0.32352726
  -0.22120051]
 ...
 [-0.35132179 -0.85370737  0.06752067 ... -0.22480211 -0.32352726
  -0.22120051]
 [-0.82832674 -0.85370737 -0.2433463  ... -0.22480211 -0.32352726
  -0.22120051]
 [ 2.27220541 -0.85370737 -0.55421326 ... -0.22480211 -0.32352726
  -0.22120051]]


In [20]:
print(X_test)

[[ 0.41394562 -0.18883727 -0.0500641  ... -0.10846523 -0.23497813
  -0.27386128]
 [ 1.87616109  1.19329085 -0.38125739 ... -0.10846523 -0.23497813
  -0.27386128]
 [-0.37340118 -0.18883727 -1.04364398 ... -0.10846523 -0.23497813
  -0.27386128]
 ...
 [-0.82331363 -0.87990133 -1.04364398 ... -0.10846523 -0.23497813
  -0.27386128]
 [ 1.0888143   0.50222679 -0.71245069 ... -0.10846523 -0.23497813
  -0.27386128]
 [ 0.75137996  0.50222679  0.61232248 ... -0.10846523 -0.23497813
  -0.27386128]]


## Data Fitting and Model Creation

In [23]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'poly')
classifier.fit(X_train, y_train)

SVC(kernel='poly')

## Prediction

In [24]:
y_pred = classifier.predict(X_test)

In [25]:
print(y_pred)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


## Classification Metrics

In [26]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

array([[160,   4],
       [  6,   2]])

In [27]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred) * 100

94.18604651162791