Code Implementation of - **M. M. Islam, G. Karmakar, J. Kamruzzaman, and M. Murshed, “A robust forgery detection method for copy–move and splicing attacks in images,” Electronics, vol. 9, no. 9, 2020. [Online]. Available: https://www.mdpi.com/2079-9292/9/9/1500**

#### Columbia Color dataset

In [14]:
import cv2
import numpy as np
import pandas as pd
from scipy.fftpack import dct
from skimage.feature import local_binary_pattern
import os
from skimage.util import view_as_windows
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import random
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV

In [15]:
def get_patches(image_mat):
    """
    Extract patches rom an image
    :param image_mat: The image as a matrix
    :param stride: The stride of the patch extraction process
    :returns: The patches
    """
    stride=8 #stride is same as window's breadth so that it gives non-overlapping blocks.
    window_shape = (8, 8)
    image_mat=np.array(image_mat)
    
    windows = view_as_windows(image_mat, window_shape, step=stride)
#     print('windows shape:',windows.shape)

    patches = []
    for m in range(windows.shape[0]):
        for n in range(windows.shape[1]):
#             print("window shape: ",windows[m][n].shape)
            patches += [windows[m][n]]
    return windows, patches #sending both the window array and the patches extracted from it.

In [16]:
def feature_sub_channel(sub_channel):
    img_windows, sub_image_blocks = get_patches(sub_channel) #Gives the 8x8 patches/blocks of sub_image.
    
    
#     print("img_windows shape is: ",img_windows.shape)
    img_windows_shape = img_windows.shape
    
    dct_block_windows=[]
    
    for m in range(img_windows.shape[0]):
        for n in range(img_windows.shape[1]):
#             print("Original Block", img_windows[m][n])
            dct_block = dct(img_windows[m][n], type=2, n=None, axis=-1, norm=None, overwrite_x=False)
#             print("DCT Block", dct_block)
            dct_block_windows.append(dct_block)
    
    dct_block_windows = np.asarray(dct_block_windows)
    dct_block_windows = np.reshape(dct_block_windows, newshape=img_windows_shape)
#     print("dct_block_windows shape is: ", dct_block_windows.shape)
#     print("dct_block_windows second block [0][1] is: ", dct_block_windows[0][1])
    
    reconstructed_dct_img = np.zeros(shape=(sub_channel.shape[0], sub_channel.shape[1]))

    i=0
    j=0
    for x in range(dct_block_windows.shape[0]):
        for y in range(dct_block_windows.shape[1]):
            reconstructed_dct_img[i:i + 8, j:j + 8] = dct_block_windows[x,y]
            j=j+8
        i=i+8
        j=0
    
        
    reconstructed_dct_img = abs(reconstructed_dct_img) # Taking absolute of the dct image.
#     print("reconstructed_dct_img shape is: ",reconstructed_dct_img.shape)
#     print("reconstructed_dct_img first block is: ",reconstructed_dct_img[0:8,0:8])
    
    lbp_img = local_binary_pattern(reconstructed_dct_img, 8, 2, method='default') # lbp on whole.
#     print("lbp_img shape is: ",lbp_img.shape)
    
    _, lbp_img_blocks = get_patches(lbp_img) #blocks of lbp image
    
        
    lbp_blocks_array=np.asarray(lbp_img_blocks)
#     print("shape of lbp_blocks_array",lbp_blocks_array.shape)

    _,r,c = lbp_blocks_array.shape #(1536,8,8) for CASIA
#     print("shape of lbp_blocks_array",lbp_blocks_array.shape)
    
    img_mean_list=[]
    for x in range(r):
        for y in range(c):
            pixel_depth_subarr = lbp_blocks_array[:,x,y]
            mean = np.mean(pixel_depth_subarr)
            img_mean_list.append(mean)
    
#     print("Length of img_mean_list: ",len(img_mean_list))
    return img_mean_list


In [19]:
#main function to extract the features.
def feature_extraction(path_to_folder, class_label):
    data_list=[]
    count=0
    for file_name in os.listdir(path_to_folder):
#         if(count>0):
#             break
        path_to_img = os.path.join(path_to_folder,file_name)
        img = cv2.imread(path_to_img)
        
        if np.shape(img) == ():
            continue
        
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #changing to YCrCb color space.
#         img_cr = img[:,:,1] # the Cr channel only.
#         print("img_cr shape is: ",img_cr.shape)
        
        F = feature_sub_channel(img)
#         print("fcr shape: ",Fcr.shape)
        
        final_feature = F
        final_feature.insert(0,file_name)
        final_feature.insert(1,class_label)
        
        data_list.append(final_feature)
        
        count += 1
        
    return(data_list) 

In [20]:
au_path="YOUR_PATH/Columbia_Color/authentic"
tp_path="YOUR_PATH/Columbia_Color/spliced"

data_list1 = feature_extraction(au_path, 0)
data_list2 = feature_extraction(tp_path, 1)

df = pd.DataFrame(data_list1)
df = df.append(pd.DataFrame(data_list2), ignore_index=True)
# #  --------------------------------------------------------------------------------------


df.rename(columns = {0: "image_names", 1: "label"}, inplace = True)

scaler_norm = MinMaxScaler() 
df.iloc[:,2:] = scaler_norm.fit_transform(df.iloc[:,2:].to_numpy()) # Normalising the values in dataframe.

In [21]:
df.head()

Unnamed: 0,image_names,label,2,3,4,5,6,7,8,9,...,56,57,58,59,60,61,62,63,64,65
0,AU_SS_V_006.bmp,0,0.433296,0.218873,0.258317,0.369931,0.286641,0.259737,0.250286,0.314214,...,0.307698,0.211565,0.422274,0.20483,0.193822,0.301015,0.26101,0.191737,0.184656,0.161019
1,AU_TT_O_043.bmp,0,0.325234,0.270751,0.187686,0.223736,0.089418,0.185093,0.34133,0.463464,...,0.331517,0.361111,0.388922,0.275244,0.173433,0.271125,0.129733,0.13417,0.285817,0.428782
2,AU_TT_V_036.bmp,0,0.36859,0.308785,0.277061,0.289096,0.153382,0.269549,0.313571,0.256855,...,0.216681,0.277952,0.331472,0.345291,0.323095,0.279762,0.238654,0.291458,0.322738,0.314495
3,AU_TT_O_044.bmp,0,0.334796,0.289218,0.204103,0.243114,0.142365,0.174543,0.322024,0.412442,...,0.335918,0.347368,0.314428,0.282361,0.212357,0.273724,0.221824,0.218776,0.318168,0.338708
4,AU_TS_O_132.bmp,0,0.23289,0.154787,0.166801,0.165795,0.136731,0.344849,0.326137,0.163606,...,0.357921,0.209119,0.233911,0.132847,0.112886,0.197861,0.142679,0.276965,0.316627,0.205509


In [22]:
df.shape

(1798, 66)

In [23]:
df.to_csv('Columbia_Color_baseline_features.csv',index=False)

## SVM CLASSIFICATION

In [24]:
array=df.values
x_feature=array[:,2:]
y_label=array[:,1].astype('int')
print(x_feature.shape)
print(y_label.shape)

(1798, 64)
(1798,)


In [25]:
X_train,X_test,Y_train,Y_test=train_test_split(x_feature,y_label,test_size=0.20,random_state=9)

In [26]:
# Random check
model_SVC=SVC(kernel='rbf',C=10,gamma=1)

kfold=KFold(n_splits=10, shuffle=True)
cv_results=cross_val_score(model_SVC,X_train,Y_train,cv=kfold,scoring='accuracy')
msg="%s %f (%f)" % ('Training Accuracy: ',cv_results.mean(),cv_results.std())
print(msg)

Training Accuracy:  0.734339 (0.039793)


In [27]:
# SVM hypertuning using GridSeachCV
model_SVC=SVC()

kfold=KFold(n_splits=10)
param_grid = {'C': [1, 10, 100, 500, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']} 

grid=GridSearchCV(estimator=model_SVC,param_grid=param_grid,scoring='accuracy',cv=kfold,verbose=3)
grid_result=grid.fit(X_train,Y_train)

print("Best: %f using %s" % (grid_result.best_score_,grid_result.best_params_))

Fitting 10 folds for each of 25 candidates, totalling 250 fits
[CV 1/10] END .........C=1, gamma=1, kernel=rbf;, score=0.743 total time=   0.1s
[CV 2/10] END .........C=1, gamma=1, kernel=rbf;, score=0.701 total time=   0.1s
[CV 3/10] END .........C=1, gamma=1, kernel=rbf;, score=0.812 total time=   0.1s
[CV 4/10] END .........C=1, gamma=1, kernel=rbf;, score=0.799 total time=   0.1s
[CV 5/10] END .........C=1, gamma=1, kernel=rbf;, score=0.764 total time=   0.1s
[CV 6/10] END .........C=1, gamma=1, kernel=rbf;, score=0.764 total time=   0.1s
[CV 7/10] END .........C=1, gamma=1, kernel=rbf;, score=0.778 total time=   0.1s
[CV 8/10] END .........C=1, gamma=1, kernel=rbf;, score=0.812 total time=   0.1s
[CV 9/10] END .........C=1, gamma=1, kernel=rbf;, score=0.664 total time=   0.1s
[CV 10/10] END ........C=1, gamma=1, kernel=rbf;, score=0.839 total time=   0.1s
[CV 1/10] END .......C=1, gamma=0.1, kernel=rbf;, score=0.701 total time=   0.1s
[CV 2/10] END .......C=1, gamma=0.1, kernel=rb

[CV 2/10] END .......C=100, gamma=1, kernel=rbf;, score=0.688 total time=   0.2s
[CV 3/10] END .......C=100, gamma=1, kernel=rbf;, score=0.715 total time=   0.2s
[CV 4/10] END .......C=100, gamma=1, kernel=rbf;, score=0.701 total time=   0.2s
[CV 5/10] END .......C=100, gamma=1, kernel=rbf;, score=0.674 total time=   0.2s
[CV 6/10] END .......C=100, gamma=1, kernel=rbf;, score=0.611 total time=   0.2s
[CV 7/10] END .......C=100, gamma=1, kernel=rbf;, score=0.743 total time=   0.2s
[CV 8/10] END .......C=100, gamma=1, kernel=rbf;, score=0.688 total time=   0.2s
[CV 9/10] END .......C=100, gamma=1, kernel=rbf;, score=0.650 total time=   0.2s
[CV 10/10] END ......C=100, gamma=1, kernel=rbf;, score=0.783 total time=   0.2s
[CV 1/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.736 total time=   0.1s
[CV 2/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.667 total time=   0.1s
[CV 3/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.819 total time=   0.1s
[CV 4/10] END .....C=100, ga

[CV 4/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.708 total time=   0.2s
[CV 5/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.688 total time=   0.2s
[CV 6/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.597 total time=   0.2s
[CV 7/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.736 total time=   0.2s
[CV 8/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.688 total time=   0.2s
[CV 9/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.643 total time=   0.2s
[CV 10/10] END .....C=1000, gamma=1, kernel=rbf;, score=0.769 total time=   0.2s
[CV 1/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.778 total time=   0.3s
[CV 2/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.667 total time=   0.3s
[CV 3/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.722 total time=   0.3s
[CV 4/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.715 total time=   0.3s
[CV 5/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.736 total time=   0.3s
[CV 6/10] END ....C=1000, ga

In [29]:
model_SVC = SVC(C=1,gamma=1, kernel='rbf')
model_SVC.fit(X_train,Y_train) 

predictions=model_SVC.predict(X_test)

print(accuracy_score(Y_test,predictions))
print(confusion_matrix(Y_test,predictions))
print(classification_report(Y_test,predictions))

0.7555555555555555
[[132  46]
 [ 42 140]]
              precision    recall  f1-score   support

           0       0.76      0.74      0.75       178
           1       0.75      0.77      0.76       182

    accuracy                           0.76       360
   macro avg       0.76      0.76      0.76       360
weighted avg       0.76      0.76      0.76       360

