In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import cv2
import os

In [2]:
# Set the path to the image folders
train_folder = 'C:/Users/Uday/Desktop/python/Major/New-Data/train'
test_folder = 'C:/Users/Uday/Desktop/python/Major/New-Data/test'
val_folder = 'C:/Users/Uday/Desktop/python/Major/New-Data/valid'

In [3]:
# Load the training dataset
train_data = []
train_labels = []
folders = ['NORMAL', 'COVID19', 'PNEUMONIA','TUBERCULOSIS', 'LUNG CANCER']
for folder in folders:
    path = os.path.join(train_folder, folder)
    for img in os.listdir(path):
        img_path = os.path.join(path, img)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = cv2.resize(image, (224, 224))
        train_data.append(image)
        train_labels.append(folder)
        
train_data = np.array(train_data)
train_labels = np.array(train_labels)

In [4]:
# Load the testing dataset
test_data = []
test_labels = []
for folder in folders:
    path = os.path.join(test_folder, folder)
    for img in os.listdir(path):
        img_path = os.path.join(path, img)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = cv2.resize(image, (224, 224))
        test_data.append(image)
        test_labels.append(folder)
        
test_data = np.array(test_data)
test_labels = np.array(test_labels)

In [5]:

# Load the validation dataset
val_data = []
val_labels = []
for folder in folders:
    path = os.path.join(val_folder, folder)
    for img in os.listdir(path):
        img_path = os.path.join(path, img)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = cv2.resize(image, (224, 224))
        val_data.append(image)
        val_labels.append(folder)
        
val_data = np.array(val_data)
val_labels = np.array(val_labels)

In [6]:
# Preprocess the images
train_data = train_data.astype('float32') / 255
test_data = test_data.astype('float32') / 255
val_data = val_data.astype('float32') / 255

In [7]:
# Reshape the images into vectors
train_data = train_data.reshape(train_data.shape[0], -1)
test_data = test_data.reshape(test_data.shape[0], -1)
val_data = val_data.reshape(val_data.shape[0], -1)

In [8]:
# Train the SVM model
clf = svm.SVC(kernel='linear')
clf.fit(train_data, train_labels)

In [9]:

# Predict the labels of the validation set
val_pred = clf.predict(val_data)

In [10]:
# Evaluate the accuracy of the model
val_accuracy = accuracy_score(val_labels, val_pred)
print('Validation accuracy:', val_accuracy)

Validation accuracy: 0.9509803921568627


In [11]:

# Predict the labels of the test set
test_pred = clf.predict(test_data)

In [12]:

# Evaluate the performance of the model using metrics
test_accuracy = accuracy_score(test_labels, test_pred)
print('Test accuracy:', test_accuracy)

Test accuracy: 0.9099225897255454


In [13]:

cm = confusion_matrix(test_labels, test_pred)
print('Confusion matrix:\n', cm)

Confusion matrix:
 [[120   0   0   1   0]
 [ 30 184   1   3   6]
 [  0   4 313   2   1]
 [  5   0  27 670   2]
 [ 39   0   5   2   6]]


In [16]:
# import seaborn as sns
# import pandas as pd

# classes = ['COVID19', 'LUNG CANCER', 'NORMAL', 'PNEUMONIA', 'TUBERCULOSIS']
# df_cm=pd.DataFrame(confusion_matrix, columns=classes, index=classes)
# df_cm.index.name='Actual'
# df_cm.columns.name='Predicted'
# plt.figure(figsize=(8,5))
# sns.heatmap(df_cm/np.sum(df_cm), fmt='.2%', annot=True, annot_kws={'size':16})
# plt.show()

In [17]:
cr = classification_report(test_labels, test_pred)
print('Classification report:\n', cr)

Classification report:
               precision    recall  f1-score   support

     COVID19       0.62      0.99      0.76       121
 LUNG CANCER       0.98      0.82      0.89       224
      NORMAL       0.90      0.98      0.94       320
   PNEUMONIA       0.99      0.95      0.97       704
TUBERCULOSIS       0.40      0.12      0.18        52

    accuracy                           0.91      1421
   macro avg       0.78      0.77      0.75      1421
weighted avg       0.91      0.91      0.90      1421

