# Reading the dataset

In [9]:
# Importing the required library
import pandas as pd

In [10]:
# Reading the dataset
# data = pd.read_csv('diabetes.csv')
data = pd.read_csv('https://raw.githubusercontent.com/analyticsindiamagazine/MocksDatasets/main/diabetes.csv')

In [11]:
# Checking top 5 rows of dataset
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [12]:
# Checking the shape of the dataset
data.shape

(768, 9)

In [13]:
# Class label count
data['Outcome'].value_counts()

0    500
1    268
Name: Outcome, dtype: int64

In [14]:
data['Outcome'].value_counts(normalize=True)

0    0.651042
1    0.348958
Name: Outcome, dtype: float64

# Defining input and output features

In [15]:
# Defining input (X) and output (y) features
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

In [16]:
# Checking the shape of input-output features
X.shape, y.shape

((768, 8), (768,))

In [17]:
# Creating training and test patterns
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, shuffle=True, random_state = 0)

In [18]:
# Checking shape of training and test sets
X_train.shape, X_test.shape

((652, 8), (116, 8))

In [19]:
# Checking the input training patterns
print(X_train)

[[2.00e+00 1.08e+02 6.20e+01 ... 2.53e+01 8.81e-01 2.20e+01]
 [1.00e+00 1.93e+02 5.00e+01 ... 2.59e+01 6.55e-01 2.40e+01]
 [1.00e+00 1.11e+02 8.60e+01 ... 3.01e+01 1.43e-01 2.30e+01]
 ...
 [4.00e+00 9.40e+01 6.50e+01 ... 2.47e+01 1.48e-01 2.10e+01]
 [1.10e+01 8.50e+01 7.40e+01 ... 3.01e+01 3.00e-01 3.50e+01]
 [5.00e+00 1.36e+02 8.20e+01 ... 0.00e+00 6.40e-01 6.90e+01]]


In [20]:
# Feature scaling (z-standardization)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [21]:
# Checking the scaled input training patterns
print(X_train)

[[-0.54921808 -0.3978825  -0.35995979 ... -0.85834839  1.26393096
  -0.95875201]
 [-0.84249424  2.29516196 -0.97367202 ... -0.78239248  0.57614012
  -0.79199029]
 [-0.84249424 -0.30283387  0.86746466 ... -0.25070111 -0.9820409
  -0.87537115]
 ...
 [ 0.03733424 -0.84144276 -0.20653173 ... -0.93430431 -0.96682429
  -1.04213287]
 [ 2.09026735 -1.12658864  0.25375243 ... -0.25070111 -0.5042393
   0.12519918]
 [ 0.33061039  0.48923803  0.66289392 ... -4.06115598  0.53049029
   2.96014843]]


# Defining and training the SVM model

In [22]:
# Defining the SVM classification model
from sklearn.svm import SVC
SVM_classifier = SVC(kernel = 'linear', random_state = 0)

In [23]:
# Training the SVM classifier
SVM_classifier.fit(X_train, y_train)

SVC(kernel='linear', random_state=0)

# Predictions and evaluations

In [24]:
# Making predictions with the test data
y_pred = SVM_classifier.predict(X_test)

In [25]:
# Combining the predicted and actual values
pd.DataFrame(data={'Predicted Labels': y_pred, 'Actual Labels': y_test})

Unnamed: 0,Predicted Labels,Actual Labels
0,1,1
1,0,0
2,0,0
3,1,1
4,0,0
...,...,...
111,0,1
112,1,1
113,0,1
114,1,1


In [26]:
# Confusuion matrix
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, y_pred))

[[71  7]
 [13 25]]


In [27]:
# Accuracy score
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.8275862068965517

In [28]:
# Classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.85      0.91      0.88        78
           1       0.78      0.66      0.71        38

    accuracy                           0.83       116
   macro avg       0.81      0.78      0.80       116
weighted avg       0.82      0.83      0.82       116

