Code Implementation of - **M. Kaur and S. Gupta, “A passive blind approach for image splicing detection based on dwt and lbp histograms,” in Security in Computing and Communications, P. Mueller, S. M. Thampi, M. Z. Alam Bhuiyan, R. Ko, R. Doss, and J. M. Alcaraz Calero, Eds. Singapore: Springer Singapore, 2016, pp. 318–327.**

#### Baseline on CASIA 1.0 and 2.0, and columbia uncompressed 

In [22]:
import cv2
import numpy as np
import pandas as pd
from skimage.feature import local_binary_pattern
import pywt
import os
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import random
from sklearn.preprocessing import MinMaxScaler

In [2]:
#U-LBP parameters
P = 8
R = 1.0

In [24]:
def feature_sub_channel(sub_channel):
    coeffs = pywt.dwt2(sub_channel, 'haar')
    cA, (cH, cV, cD) = coeffs
#     print("cA shape: ",cA.shape)
    lbp_cA = local_binary_pattern(cA, P, R, method='default')
#     print("ulbp_cA shape: ",lbp_cA.shape)
    lbp_cH = local_binary_pattern(cH, P, R, method='default')
#     print("lbp_cH shape: ",lbp_cH.shape)
    lbp_cV = local_binary_pattern(cV, P, R, method='default')
    lbp_cD = local_binary_pattern(cD, P, R, method='default')
    
    lbp_cA_hist, bin_edges = np.histogram(lbp_cA, bins=256, range=(0,255))
#     print("lbp_cA_hist shape is: ",lbp_cA_hist.shape)
    lbp_cH_hist, bin_edges = np.histogram(lbp_cH, bins=256, range=(0,255))
    lbp_cV_hist, bin_edges = np.histogram(lbp_cV, bins=256, range=(0,255))
    lbp_cD_hist, bin_edges = np.histogram(lbp_cD, bins=256, range=(0,255))
    
    
    feature=np.concatenate([lbp_cA_hist, lbp_cH_hist, lbp_cV_hist, lbp_cD_hist])
#     print("final_feature shape: ",feature.shape)
    
    return feature

In [25]:
def feature_extraction(path_to_folder, class_label):
    data_list=[]
    count=0
    for file_name in os.listdir(path_to_folder):
        if(count!=0 and count%300==0):
            print("No. of images done: ",count)
        path_to_img = os.path.join(path_to_folder,file_name)
        img = cv2.imread(path_to_img)
        
        if np.shape(img) == ():
            continue
        
        img = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb) #changing to YCrCb color space.
        img_cr = img[:,:,1] # the Cr channel only.
        
        Fcr = feature_sub_channel(img_cr)
        
        final_feature=list(Fcr)
        final_feature.insert(0,file_name)
        final_feature.insert(1,class_label)
        
        data_list.append(final_feature)
        
        count+=1
                
    return(data_list)

In [34]:
#CASIA V1.0 dataset
# au_path = "YOUR_PATH/CASIA 1.0 dataset/Au"
# tp_path1 = "YOUR_PATH/CASIA 1.0 dataset/Modified Tp/Tp/CM"
# tp_path2 = "YOUR_PATH/CASIA 1.0 dataset/Modified Tp/Tp/Sp"

# data_list1 = feature_extraction(au_path, 0)
# data_list2 = feature_extraction(tp_path1, 1)
# data_list3 = feature_extraction(tp_path2, 1)

# df = pd.DataFrame(data_list1)
# df = df.append(pd.DataFrame(data_list2), ignore_index=True)
# df = df.append(pd.DataFrame(data_list3), ignore_index=True)
#  --------------------------------------------------------------------------------------

# # CASIA V2.0 Dataset
# au_path="YOUR_PATH/CASIA2.0_revised/Au"
# tp_path="YOUR_PATH/CASIA2.0_revised/Tp"

# data_list1 = feature_extraction(au_path, 0)
# data_list2 = feature_extraction(tp_path, 1)

# df = pd.DataFrame(data_list1)
# df = df.append(pd.DataFrame(data_list2), ignore_index=True)
# #  --------------------------------------------------------------------------------------

# Columbia_Uncompressed dataset
tp_path="YOUR_PATH/Columbia_Uncompressed/4cam_splc"
au_path="YOUR_PATH/Columbia_Uncompressed/4cam_auth"

data_list1 = feature_extraction(au_path, 0)
data_list2 = feature_extraction(tp_path, 1)

df = pd.DataFrame(data_list1)
df = df.append(pd.DataFrame(data_list2), ignore_index=True)

# #  --------------------------------------------------------------------------------------

df.rename(columns = {0: "image_names", 1: "label"}, inplace = True)

scaler_norm = MinMaxScaler() 
df.iloc[:,2:] = scaler_norm.fit_transform(df.iloc[:,2:].to_numpy()) # Normalising the values in dataframe.

In [35]:
df.head()

Unnamed: 0,image_names,label,2,3,4,5,6,7,8,9,...,1016,1017,1018,1019,1020,1021,1022,1023,1024,1025
0,canong3_02_sub_05.tif,0,0.07007,0.414797,0.031614,0.445058,0.017524,0.09827,0.103535,0.272225,...,0.178112,0.422644,0.284706,0.477924,0.24356,0.3618,0.490887,0.391874,0.259149,0.025144
1,canonxt_32_sub_05.tif,0,0.454091,0.518828,0.309484,0.327195,0.692878,0.391696,0.408249,0.341548,...,0.512876,0.747465,0.585882,0.825179,0.484778,0.661233,0.831713,0.744414,0.478734,0.385878
2,canonxt_14_sub_04.tif,0,0.499461,0.364017,0.341098,0.190915,0.583914,0.604152,0.270202,0.323353,...,0.388412,0.830079,0.564706,0.744033,0.248244,0.521262,0.781896,0.826777,0.343966,0.446285
3,canong3_02_sub_06.tif,0,0.077338,0.418981,0.024958,0.440147,0.056167,0.137716,0.119529,0.322893,...,0.178112,0.451558,0.28,0.504773,0.173302,0.310418,0.516403,0.432687,0.243076,0.030481
4,canonxt_14_sub_08.tif,0,0.283005,0.28604,0.331115,0.298956,0.435408,0.366782,0.303872,0.640488,...,0.390558,0.91964,0.517647,0.810859,0.264637,0.505315,0.869988,0.944783,0.377349,0.439319


In [36]:
df.shape

(363, 1026)

In [37]:
df.to_csv('Columbia_Uncompressed_baseline_features.csv', index=False)

## SVM CLASSIFICATION CASIA 1.0

In [18]:
array=df.values
x_feature=array[:,2:]
y_label=array[:,1].astype('int')
print(x_feature.shape)
print(y_label.shape)

(1721, 1024)
(1721,)


In [19]:
X_train,X_test,Y_train,Y_test=train_test_split(x_feature,y_label,test_size=0.20,random_state=7)

In [20]:
# Random check
model_SVC=SVC(kernel='rbf',C=100,gamma=0.001)

kfold=KFold(n_splits=10, shuffle=True)
cv_results=cross_val_score(model_SVC,X_train,Y_train,cv=kfold,scoring='accuracy')
msg="%s %f (%f)" % ('Training Accuracy: ',cv_results.mean(),cv_results.std())
print(msg)

Training Accuracy:  0.930953 (0.017267)


In [23]:
# SVM hypertuning using GridSeachCV
model_SVC=SVC()

kfold=KFold(n_splits=10)
param_grid = {'C': [1, 10, 100, 500, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']} 

grid=GridSearchCV(estimator=model_SVC,param_grid=param_grid,scoring='accuracy',cv=kfold,verbose=3)
grid_result=grid.fit(X_train,Y_train)

print("Best: %f using %s" % (grid_result.best_score_,grid_result.best_params_))

Fitting 10 folds for each of 25 candidates, totalling 250 fits
[CV 1/10] END .........C=1, gamma=1, kernel=rbf;, score=0.580 total time=   1.3s
[CV 2/10] END .........C=1, gamma=1, kernel=rbf;, score=0.601 total time=   1.3s
[CV 3/10] END .........C=1, gamma=1, kernel=rbf;, score=0.572 total time=   1.3s
[CV 4/10] END .........C=1, gamma=1, kernel=rbf;, score=0.638 total time=   1.3s
[CV 5/10] END .........C=1, gamma=1, kernel=rbf;, score=0.551 total time=   1.3s
[CV 6/10] END .........C=1, gamma=1, kernel=rbf;, score=0.638 total time=   1.3s
[CV 7/10] END .........C=1, gamma=1, kernel=rbf;, score=0.628 total time=   1.4s
[CV 8/10] END .........C=1, gamma=1, kernel=rbf;, score=0.620 total time=   1.3s
[CV 9/10] END .........C=1, gamma=1, kernel=rbf;, score=0.635 total time=   1.5s
[CV 10/10] END ........C=1, gamma=1, kernel=rbf;, score=0.555 total time=   1.4s
[CV 1/10] END .......C=1, gamma=0.1, kernel=rbf;, score=0.928 total time=   1.2s
[CV 2/10] END .......C=1, gamma=0.1, kernel=rb

[CV 2/10] END .......C=100, gamma=1, kernel=rbf;, score=0.623 total time=   1.3s
[CV 3/10] END .......C=100, gamma=1, kernel=rbf;, score=0.594 total time=   1.3s
[CV 4/10] END .......C=100, gamma=1, kernel=rbf;, score=0.630 total time=   1.3s
[CV 5/10] END .......C=100, gamma=1, kernel=rbf;, score=0.580 total time=   1.3s
[CV 6/10] END .......C=100, gamma=1, kernel=rbf;, score=0.652 total time=   1.3s
[CV 7/10] END .......C=100, gamma=1, kernel=rbf;, score=0.642 total time=   1.3s
[CV 8/10] END .......C=100, gamma=1, kernel=rbf;, score=0.628 total time=   1.3s
[CV 9/10] END .......C=100, gamma=1, kernel=rbf;, score=0.635 total time=   1.3s
[CV 10/10] END ......C=100, gamma=1, kernel=rbf;, score=0.569 total time=   1.3s
[CV 1/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.928 total time=   1.2s
[CV 2/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.884 total time=   1.2s
[CV 3/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.906 total time=   1.2s
[CV 4/10] END .....C=100, ga

[CV 4/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.630 total time=   1.3s
[CV 5/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.580 total time=   1.3s
[CV 6/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.652 total time=   1.3s
[CV 7/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.642 total time=   1.3s
[CV 8/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.628 total time=   1.3s
[CV 9/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.635 total time=   1.3s
[CV 10/10] END .....C=1000, gamma=1, kernel=rbf;, score=0.569 total time=   1.3s
[CV 1/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.928 total time=   1.2s
[CV 2/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.884 total time=   1.2s
[CV 3/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.906 total time=   1.2s
[CV 4/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.906 total time=   1.2s
[CV 5/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.906 total time=   1.2s
[CV 6/10] END ....C=1000, ga

## SVM CLASSIFICATION CASIA 2.0

In [30]:
array=df.values
x_feature=array[:,2:]
y_label=array[:,1].astype('int')
print(x_feature.shape)
print(y_label.shape)

(12614, 1024)
(12614,)


In [31]:
X_train,X_test,Y_train,Y_test=train_test_split(x_feature,y_label,test_size=0.20,random_state=7)

In [32]:
model_SVC=SVC(kernel='rbf',C=100,gamma=0.001)

kfold=KFold(n_splits=10, shuffle=True)
cv_results=cross_val_score(model_SVC,X_train,Y_train,cv=kfold,scoring='accuracy')
msg="%s %f (%f)" % ('Training Accuracy: ',cv_results.mean(),cv_results.std())
print(msg)

Training Accuracy:  0.971955 (0.004896)


In [33]:
# SVM hypertuning using GridSeachCV
model_SVC=SVC()

kfold=KFold(n_splits=10)
param_grid = {'C': [1, 10, 100, 500, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']} 

grid=GridSearchCV(estimator=model_SVC,param_grid=param_grid,scoring='accuracy',cv=kfold,verbose=3)
grid_result=grid.fit(X_train,Y_train)

print("Best: %f using %s" % (grid_result.best_score_,grid_result.best_params_))

Fitting 10 folds for each of 25 candidates, totalling 250 fits
[CV 1/10] END .........C=1, gamma=1, kernel=rbf;, score=0.956 total time=  17.1s
[CV 2/10] END .........C=1, gamma=1, kernel=rbf;, score=0.966 total time=  17.0s
[CV 3/10] END .........C=1, gamma=1, kernel=rbf;, score=0.961 total time=  16.9s
[CV 4/10] END .........C=1, gamma=1, kernel=rbf;, score=0.945 total time=  16.8s
[CV 5/10] END .........C=1, gamma=1, kernel=rbf;, score=0.952 total time=  16.6s
[CV 6/10] END .........C=1, gamma=1, kernel=rbf;, score=0.963 total time=  16.6s
[CV 7/10] END .........C=1, gamma=1, kernel=rbf;, score=0.954 total time=  16.7s
[CV 8/10] END .........C=1, gamma=1, kernel=rbf;, score=0.952 total time=  16.7s
[CV 9/10] END .........C=1, gamma=1, kernel=rbf;, score=0.958 total time=  16.7s
[CV 10/10] END ........C=1, gamma=1, kernel=rbf;, score=0.960 total time=  16.9s
[CV 1/10] END .......C=1, gamma=0.1, kernel=rbf;, score=0.970 total time=  12.2s
[CV 2/10] END .......C=1, gamma=0.1, kernel=rb

[CV 2/10] END .......C=100, gamma=1, kernel=rbf;, score=0.963 total time=  19.0s
[CV 3/10] END .......C=100, gamma=1, kernel=rbf;, score=0.964 total time=  18.3s
[CV 4/10] END .......C=100, gamma=1, kernel=rbf;, score=0.948 total time=  18.3s
[CV 5/10] END .......C=100, gamma=1, kernel=rbf;, score=0.958 total time=  18.3s
[CV 6/10] END .......C=100, gamma=1, kernel=rbf;, score=0.966 total time=  18.9s
[CV 7/10] END .......C=100, gamma=1, kernel=rbf;, score=0.957 total time=  19.8s
[CV 8/10] END .......C=100, gamma=1, kernel=rbf;, score=0.955 total time=  19.0s
[CV 9/10] END .......C=100, gamma=1, kernel=rbf;, score=0.952 total time=  18.5s
[CV 10/10] END ......C=100, gamma=1, kernel=rbf;, score=0.963 total time=  18.5s
[CV 1/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.968 total time=   9.5s
[CV 2/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.975 total time=   9.9s
[CV 3/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.973 total time=  10.4s
[CV 4/10] END .....C=100, ga

[CV 4/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.948 total time=  18.3s
[CV 5/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.958 total time=  18.3s
[CV 6/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.966 total time=  18.2s
[CV 7/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.956 total time=  18.4s
[CV 8/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.954 total time=  17.8s
[CV 9/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.952 total time=  18.8s
[CV 10/10] END .....C=1000, gamma=1, kernel=rbf;, score=0.963 total time=  18.5s
[CV 1/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.967 total time=   8.8s
[CV 2/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.965 total time=   9.1s
[CV 3/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.965 total time=   8.9s
[CV 4/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.962 total time=   9.0s
[CV 5/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.959 total time=   8.9s
[CV 6/10] END ....C=1000, ga

## SVM CLASSIFICATION  Columbia Uncompressed

In [38]:
array=df.values
x_feature=array[:,2:]
y_label=array[:,1].astype('int')
print(x_feature.shape)
print(y_label.shape)

(363, 1024)
(363,)


In [39]:
X_train,X_test,Y_train,Y_test=train_test_split(x_feature,y_label,test_size=0.20,random_state=7)

In [40]:
model_SVC=SVC(kernel='rbf',C=100,gamma=0.001)

kfold=KFold(n_splits=10, shuffle=True)
cv_results=cross_val_score(model_SVC,X_train,Y_train,cv=kfold,scoring='accuracy')
msg="%s %f (%f)" % ('Training Accuracy: ',cv_results.mean(),cv_results.std())
print(msg)

Training Accuracy:  0.765517 (0.055172)


In [41]:
# SVM hypertuning using GridSeachCV
model_SVC=SVC()

kfold=KFold(n_splits=10)
param_grid = {'C': [1, 10, 100, 500, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']} 

grid=GridSearchCV(estimator=model_SVC,param_grid=param_grid,scoring='accuracy',cv=kfold,verbose=3)
grid_result=grid.fit(X_train,Y_train)

print("Best: %f using %s" % (grid_result.best_score_,grid_result.best_params_))

Fitting 10 folds for each of 25 candidates, totalling 250 fits
[CV 1/10] END .........C=1, gamma=1, kernel=rbf;, score=0.655 total time=   0.1s
[CV 2/10] END .........C=1, gamma=1, kernel=rbf;, score=0.862 total time=   0.1s
[CV 3/10] END .........C=1, gamma=1, kernel=rbf;, score=0.690 total time=   0.1s
[CV 4/10] END .........C=1, gamma=1, kernel=rbf;, score=0.828 total time=   0.1s
[CV 5/10] END .........C=1, gamma=1, kernel=rbf;, score=0.690 total time=   0.1s
[CV 6/10] END .........C=1, gamma=1, kernel=rbf;, score=0.690 total time=   0.1s
[CV 7/10] END .........C=1, gamma=1, kernel=rbf;, score=0.552 total time=   0.1s
[CV 8/10] END .........C=1, gamma=1, kernel=rbf;, score=0.690 total time=   0.1s
[CV 9/10] END .........C=1, gamma=1, kernel=rbf;, score=0.517 total time=   0.1s
[CV 10/10] END ........C=1, gamma=1, kernel=rbf;, score=0.655 total time=   0.1s
[CV 1/10] END .......C=1, gamma=0.1, kernel=rbf;, score=0.655 total time=   0.1s
[CV 2/10] END .......C=1, gamma=0.1, kernel=rb

[CV 4/10] END .......C=100, gamma=1, kernel=rbf;, score=0.759 total time=   0.1s
[CV 5/10] END .......C=100, gamma=1, kernel=rbf;, score=0.655 total time=   0.1s
[CV 6/10] END .......C=100, gamma=1, kernel=rbf;, score=0.724 total time=   0.1s
[CV 7/10] END .......C=100, gamma=1, kernel=rbf;, score=0.586 total time=   0.1s
[CV 8/10] END .......C=100, gamma=1, kernel=rbf;, score=0.759 total time=   0.1s
[CV 9/10] END .......C=100, gamma=1, kernel=rbf;, score=0.517 total time=   0.1s
[CV 10/10] END ......C=100, gamma=1, kernel=rbf;, score=0.655 total time=   0.1s
[CV 1/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.793 total time=   0.0s
[CV 2/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.897 total time=   0.1s
[CV 3/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.897 total time=   0.0s
[CV 4/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.862 total time=   0.0s
[CV 5/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.828 total time=   0.0s
[CV 6/10] END .....C=100, ga

[CV 8/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.759 total time=   0.1s
[CV 9/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.517 total time=   0.1s
[CV 10/10] END .....C=1000, gamma=1, kernel=rbf;, score=0.655 total time=   0.1s
[CV 1/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.793 total time=   0.0s
[CV 2/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.897 total time=   0.1s
[CV 3/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.897 total time=   0.0s
[CV 4/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.862 total time=   0.0s
[CV 5/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.828 total time=   0.0s
[CV 6/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.828 total time=   0.0s
[CV 7/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.828 total time=   0.1s
[CV 8/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.828 total time=   0.0s
[CV 9/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.793 total time=   0.0s
[CV 10/10] END ...C=1000, ga