In [1]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.vgg19 import preprocess_input
from tensorflow.keras.models import Model

from sklearn.preprocessing import LabelEncoder
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import multilabel_confusion_matrix

import warnings
import numpy as np
import sys
import os
import glob
from pathlib import Path
from xml.etree import ElementTree

In [2]:
img_dim_x = 224
img_dim_y = 224
img_rgb = 3
img_dim = [img_dim_x, img_dim_y]
img_folder = '/Users/utkarsh/Desktop/study/iitj/sem2/ml2/assignment/VOCdevkit/VOC2012/'

In [3]:
img_dir = os.path.join(img_folder ,'JPEGImages2/')
ext = ['png', 'jpg', 'gif']
img_files = []
[img_files.extend(glob.glob(img_dir + '*.' + e)) for e in ext]

[None, None, None]

In [4]:
xml_files = []
for file in img_files:
    filename = Path(file).stem
    xml_dir = os.path.join(img_folder, 'Annotations/')
    xml_file = xml_dir + filename + ".xml"
    xml_files.append(xml_file)

In [5]:
img_dict = {'img_path': img_files, 'xml_path': xml_files}

In [6]:
warnings.filterwarnings('ignore')
vgg19 = VGG19(weights='imagenet')
vgg19.summary()

model = Model(inputs=vgg19.input, outputs=vgg19.get_layer('fc2').output)
labels = []
features = []

# useful for getting number of output classes
img_paths = img_dict['img_path']
xml_paths = img_dict['xml_path']

for index in range(0, len(img_paths)):
    img = image.load_img(img_paths[index], target_size=(img_dim_x, img_dim_y))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    block_pool_features = model.predict(x)
    flat_pool_features = block_pool_features.flatten()
    features.append(flat_pool_features)
    labels.append(ElementTree.parse(xml_paths[index]).getroot().find('.//object').find('name').text) 

Model: "vgg19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [7]:
print(len(features[0]))
print("features: {}".format(features))
print("labels: {}".format(labels))
print("unique labels: {}".format(list(set(labels))))

le_labels = LabelEncoder().fit_transform(labels)

# get the shape of training labels
print("encoded labels: {}".format(le_labels))
print("features shape: {}".format(np.array(features).shape))
print("encoded labels shape: {}".format(le_labels.shape))

4096
features: [array([-0.       , -0.       , -0.       , ..., -0.       ,  5.0703998,
       -0.       ], dtype=float32), array([ 5.3971524, -0.       ,  2.7153337, ..., -0.       , -0.       ,
       -0.       ], dtype=float32), array([ 0.71897113, -0.        , -0.        , ..., -0.        ,
       -0.        , -0.        ], dtype=float32), array([-0.       , -0.       ,  3.2457087, ..., -0.       , -0.       ,
       -0.       ], dtype=float32), array([-0.        ,  2.0118582 ,  0.41150683, ...,  0.19820923,
        1.1877387 ,  0.81167275], dtype=float32), array([-0.       , -0.       ,  1.5629687, ..., -0.       , -0.       ,
       -0.       ], dtype=float32), array([-0.       ,  3.0575397, -0.       , ...,  4.2556543, -0.       ,
       -0.       ], dtype=float32), array([-0.        ,  1.0823995 ,  1.0916412 , ...,  0.52648544,
       -0.        , -0.        ], dtype=float32), array([-0.       , -0.       , -0.       , ...,  2.3269413, -0.       ,
       -0.       ], dtype=floa

In [8]:
(X_train, X_test, y_train, y_test) = train_test_split(np.array(features),
                                                      np.array(le_labels),
                                                      test_size=0.3,
                                                      random_state=100)
print("splitted data...")
print("train data  : {}".format(X_train.shape))
print("test data   : {}".format(X_test.shape))
print("train labels: {}".format(y_train.shape))
print("test labels : {}".format(y_test.shape))

splitted data...
train data  : (50, 4096)
test data   : (22, 4096)
train labels: (50,)
test labels : (22,)


In [9]:
# Creating the SVM model
model = OneVsRestClassifier(SVC())

# Fitting the model with training data
model.fit(X_train, y_train)

# Making a prediction on the test set
prediction = model.predict(X_test)

# Evaluating the model
print("Test Set Accuracy : {}".format(accuracy_score(y_test, prediction)))
print("Classification Report : {}".format(classification_report(y_test, prediction)))
print(multilabel_confusion_matrix(y_test, prediction))

Test Set Accuracy : 0.36363636363636365
Classification Report :               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         1
           4       0.00      0.00      0.00         2
           5       0.00      0.00      0.00         3
           6       1.00      1.00      1.00         1
           7       0.00      0.00      0.00         1
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         1
          11       0.00      0.00      0.00         1
          12       0.00      0.00      0.00         1
          13       0.56      0.83      0.67         6
          14       0.00      0.00      0.00         0
          15       0.00      0.00      0.00         2
          16       0.50      1.00      0.67         1

    accuracy                           0.36        22
   macro avg       0.22      0.24      0.21        22
weighted avg    

In [10]:
# Creating the SVM model
model = OneVsRestClassifier(LogisticRegression())

# Fitting the model with training data
model.fit(X_train, y_train)

# Making a prediction on the test set
prediction = model.predict(X_test)

# Evaluating the model
print("Test Set Accuracy : {}".format(accuracy_score(y_test, prediction)))
print("Classification Report : {}".format(classification_report(y_test, prediction)))
print(multilabel_confusion_matrix(y_test, prediction))

Test Set Accuracy : 0.3181818181818182
Classification Report :               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         1
           4       0.00      0.00      0.00         2
           5       0.00      0.00      0.00         3
           6       0.00      0.00      0.00         1
           7       0.00      0.00      0.00         1
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         1
          11       1.00      1.00      1.00         1
          12       0.00      0.00      0.00         1
          13       0.50      0.67      0.57         6
          15       0.00      0.00      0.00         2
          16       0.50      1.00      0.67         1

    accuracy                           0.32        22
   macro avg       0.23      0.24      0.22        22
weighted avg       0.30      0.32      0.29        22

[[[20  2]
  [ 0 

In [11]:
# Creating the SVM model
model = OneVsRestClassifier(LogisticRegression())

# Fitting the model with training data
model.fit(X_train, y_train)

# Making a prediction on the test set
prediction = model.predict(X_test)

# Evaluating the model
print("Test Set Accuracy : {}".format(accuracy_score(y_test, prediction)))
print("Classification Report : {}".format(classification_report(y_test, prediction)))
print(multilabel_confusion_matrix(y_test, prediction))

Test Set Accuracy : 0.3181818181818182
Classification Report :               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         1
           4       0.00      0.00      0.00         2
           5       0.00      0.00      0.00         3
           6       0.00      0.00      0.00         1
           7       0.00      0.00      0.00         1
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         1
          11       1.00      1.00      1.00         1
          12       0.00      0.00      0.00         1
          13       0.50      0.67      0.57         6
          15       0.00      0.00      0.00         2
          16       0.50      1.00      0.67         1

    accuracy                           0.32        22
   macro avg       0.23      0.24      0.22        22
weighted avg       0.30      0.32      0.29        22

[[[20  2]
  [ 0 