Code Implementation of - **M. Kaur and S. Gupta, “A passive blind approach for image splicing detection based on dwt and lbp histograms,” in Security in Computing and Communications, P. Mueller, S. M. Thampi, M. Z. Alam Bhuiyan, R. Ko, R. Doss, and J. M. Alcaraz Calero, Eds. Singapore: Springer Singapore, 2016, pp. 318–327.**

#### Columbia Color dataset

In [1]:
import cv2
import numpy as np
import pandas as pd
from skimage.feature import local_binary_pattern
import pywt
import os
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import random
from sklearn.preprocessing import MinMaxScaler

In [2]:
#U-LBP parameters
P = 8
R = 1.0

In [3]:
def feature_sub_channel(sub_channel):
    coeffs = pywt.dwt2(sub_channel, 'haar')
    cA, (cH, cV, cD) = coeffs
#     print("cA shape: ",cA.shape)
    lbp_cA = local_binary_pattern(cA, P, R, method='default')
#     print("ulbp_cA shape: ",lbp_cA.shape)
    lbp_cH = local_binary_pattern(cH, P, R, method='default')
#     print("lbp_cH shape: ",lbp_cH.shape)
    lbp_cV = local_binary_pattern(cV, P, R, method='default')
    lbp_cD = local_binary_pattern(cD, P, R, method='default')
    
    lbp_cA_hist, bin_edges = np.histogram(lbp_cA, bins=256, range=(0,255))
#     print("lbp_cA_hist shape is: ",lbp_cA_hist.shape)
    lbp_cH_hist, bin_edges = np.histogram(lbp_cH, bins=256, range=(0,255))
    lbp_cV_hist, bin_edges = np.histogram(lbp_cV, bins=256, range=(0,255))
    lbp_cD_hist, bin_edges = np.histogram(lbp_cD, bins=256, range=(0,255))
    
    
    feature=np.concatenate([lbp_cA_hist, lbp_cH_hist, lbp_cV_hist, lbp_cD_hist])
#     print("final_feature shape: ",feature.shape)
    
    return feature

In [4]:
def feature_extraction(path_to_folder, class_label):
    data_list=[]
    for file_name in os.listdir(path_to_folder):
        path_to_img = os.path.join(path_to_folder,file_name)
        img = cv2.imread(path_to_img)
        
        if np.shape(img) == ():
            continue
        
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #changing to YCrCb color space.
#         img_cr = img[:,:,1] # the Cr channel only.
        
        F = feature_sub_channel(img)
        
        final_feature=list(F)
        final_feature.insert(0,file_name)
        final_feature.insert(1,class_label)
        
        data_list.append(final_feature)
                        
    return(data_list)

In [5]:
au_path="YOUR_PATH/Columbia_Color/authentic"
tp_path="YOUR_PATH/Columbia_Color/spliced"

data_list1 = feature_extraction(au_path, 0)
data_list2 = feature_extraction(tp_path, 1)

df = pd.DataFrame(data_list1)
df = df.append(pd.DataFrame(data_list2), ignore_index=True)
# #  --------------------------------------------------------------------------------------

df.rename(columns = {0: "image_names", 1: "label"}, inplace = True)

scaler_norm = MinMaxScaler() 
df.iloc[:,2:] = scaler_norm.fit_transform(df.iloc[:,2:].to_numpy()) # Normalising the values in dataframe.

In [6]:
df.head()

Unnamed: 0,image_names,label,2,3,4,5,6,7,8,9,...,1016,1017,1018,1019,1020,1021,1022,1023,1024,1025
0,AU_SS_V_006.bmp,0,0.434655,0.23301,0.184615,0.301587,0.210811,0.196429,0.255814,0.170732,...,0.230769,0.167939,0.166667,0.395833,0.1875,0.283582,0.4,0.317757,0.337209,0.119444
1,AU_TT_O_043.bmp,0,0.538913,0.281553,0.230769,0.396825,0.183784,0.178571,0.310078,0.222222,...,0.384615,0.183206,0.261905,0.25,0.4375,0.437811,0.333333,0.149533,0.453488,0.093315
2,AU_TT_V_036.bmp,0,0.490455,0.203883,0.107692,0.246032,0.348649,0.178571,0.155039,0.130081,...,0.576923,0.145038,0.261905,0.270833,0.25,0.373134,0.311111,0.308411,0.418605,0.078385
3,AU_TT_O_044.bmp,0,0.511013,0.393204,0.215385,0.396825,0.156757,0.25,0.325581,0.189702,...,0.230769,0.152672,0.357143,0.229167,0.354167,0.383085,0.244444,0.242991,0.494186,0.076349
4,AU_TS_O_132.bmp,0,0.38326,0.15534,0.246154,0.277778,0.145946,0.071429,0.341085,0.149051,...,0.269231,0.198473,0.547619,0.416667,0.208333,0.487562,0.577778,0.233645,0.5,0.044113


In [7]:
df.shape

(1798, 1026)

In [8]:
df.to_csv('Columbia_Color_baseline_features.csv', index=False)

### SVM CLASSIFICATION

In [9]:
array=df.values
x_feature=array[:,2:]
y_label=array[:,1].astype('int')
print(x_feature.shape)
print(y_label.shape)

(1798, 1024)
(1798,)


In [10]:
X_train,X_test,Y_train,Y_test=train_test_split(x_feature,y_label,test_size=0.20,random_state=7)

In [11]:
# Random check
model_SVC=SVC(kernel='rbf',C=100,gamma=0.001)

kfold=KFold(n_splits=10, shuffle=True)
cv_results=cross_val_score(model_SVC,X_train,Y_train,cv=kfold,scoring='accuracy')
msg="%s %f (%f)" % ('Training Accuracy: ',cv_results.mean(),cv_results.std())
print(msg)

Training Accuracy:  0.688500 (0.023495)


In [12]:
# SVM hypertuning using GridSeachCV
model_SVC=SVC()

kfold=KFold(n_splits=10)
param_grid = {'C': [1, 10, 100, 500, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']} 

grid=GridSearchCV(estimator=model_SVC,param_grid=param_grid,scoring='accuracy',cv=kfold,verbose=3)
grid_result=grid.fit(X_train,Y_train)

print("Best: %f using %s" % (grid_result.best_score_,grid_result.best_params_))

Fitting 10 folds for each of 25 candidates, totalling 250 fits
[CV 1/10] END .........C=1, gamma=1, kernel=rbf;, score=0.576 total time=   1.3s
[CV 2/10] END .........C=1, gamma=1, kernel=rbf;, score=0.562 total time=   1.4s
[CV 3/10] END .........C=1, gamma=1, kernel=rbf;, score=0.500 total time=   1.5s
[CV 4/10] END .........C=1, gamma=1, kernel=rbf;, score=0.493 total time=   1.5s
[CV 5/10] END .........C=1, gamma=1, kernel=rbf;, score=0.486 total time=   1.4s
[CV 6/10] END .........C=1, gamma=1, kernel=rbf;, score=0.465 total time=   1.5s
[CV 7/10] END .........C=1, gamma=1, kernel=rbf;, score=0.535 total time=   1.4s
[CV 8/10] END .........C=1, gamma=1, kernel=rbf;, score=0.535 total time=   1.5s
[CV 9/10] END .........C=1, gamma=1, kernel=rbf;, score=0.490 total time=   1.3s
[CV 10/10] END ........C=1, gamma=1, kernel=rbf;, score=0.538 total time=   1.4s
[CV 1/10] END .......C=1, gamma=0.1, kernel=rbf;, score=0.757 total time=   1.6s
[CV 2/10] END .......C=1, gamma=0.1, kernel=rb

[CV 2/10] END .......C=100, gamma=1, kernel=rbf;, score=0.549 total time=   1.3s
[CV 3/10] END .......C=100, gamma=1, kernel=rbf;, score=0.507 total time=   1.3s
[CV 4/10] END .......C=100, gamma=1, kernel=rbf;, score=0.493 total time=   1.3s
[CV 5/10] END .......C=100, gamma=1, kernel=rbf;, score=0.479 total time=   1.3s
[CV 6/10] END .......C=100, gamma=1, kernel=rbf;, score=0.465 total time=   1.3s
[CV 7/10] END .......C=100, gamma=1, kernel=rbf;, score=0.542 total time=   1.3s
[CV 8/10] END .......C=100, gamma=1, kernel=rbf;, score=0.528 total time=   1.3s
[CV 9/10] END .......C=100, gamma=1, kernel=rbf;, score=0.497 total time=   1.3s
[CV 10/10] END ......C=100, gamma=1, kernel=rbf;, score=0.538 total time=   1.3s
[CV 1/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.694 total time=   1.3s
[CV 2/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.722 total time=   1.3s
[CV 3/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.681 total time=   1.4s
[CV 4/10] END .....C=100, ga

[CV 4/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.493 total time=   1.3s
[CV 5/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.479 total time=   1.3s
[CV 6/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.465 total time=   1.3s
[CV 7/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.542 total time=   1.3s
[CV 8/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.528 total time=   1.3s
[CV 9/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.497 total time=   1.3s
[CV 10/10] END .....C=1000, gamma=1, kernel=rbf;, score=0.538 total time=   1.3s
[CV 1/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.694 total time=   1.3s
[CV 2/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.722 total time=   1.3s
[CV 3/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.681 total time=   1.3s
[CV 4/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.660 total time=   1.3s
[CV 5/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.646 total time=   1.3s
[CV 6/10] END ....C=1000, ga

In [14]:
model_SVC = SVC(C=1,gamma=0.01, kernel='rbf')
model_SVC.fit(X_train,Y_train) 

predictions=model_SVC.predict(X_test)

print(accuracy_score(Y_test,predictions))
print(confusion_matrix(Y_test,predictions))
print(classification_report(Y_test,predictions))

0.7444444444444445
[[146  43]
 [ 49 122]]
              precision    recall  f1-score   support

           0       0.75      0.77      0.76       189
           1       0.74      0.71      0.73       171

    accuracy                           0.74       360
   macro avg       0.74      0.74      0.74       360
weighted avg       0.74      0.74      0.74       360

