Code implementation of - **S. Dua, J. Singh, and H. Parthasarathy, “Image forgery detection based on statistical features of block dct coefficients,” Procedia Computer Science, vol. 171, pp. 369–378, 2020, third International Conference on Computing and Network Communications (CoCoNet’19). [Online]. 
Available: https://www.sciencedirect.com/science/article/pii/S1877050920310048**

In [1]:
import cv2
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [18]:
def get_patches(image_mat):
    """
    Extract patches rom an image
    :param image_mat: The image as a matrix
    :param stride: The stride of the patch extraction process
    :returns: The patches
    """
    stride=8 #stride is same as window's breadth so that it gives non-overlapping blocks.
    window_shape = (8, 8)
    image_mat=np.array(image_mat)
    
    windows = view_as_windows(image_mat, window_shape, step=stride)
#     print('windows shape:',windows.shape)

    patches = []
    for m in range(windows.shape[0]):
        for n in range(windows.shape[1]):
#             print("window shape: ",windows[m][n].shape)
            patches += [windows[m][n]]
    return patches

In [38]:
def std_and_ones(type_of_sub_image_blocks):
    ac_dct_stack=[]
    number_of_ones=[]

    for block in type_of_sub_image_blocks:
        dct_block = dct(block, type=2, n=None, axis=-1, norm=None, overwrite_x=False)
        dct_block_row = dct_block.flatten() # 2d dct array to 1d row array.
        ac_dct = dct_block_row[1:] # only AC component, removing the first DC comp.
        ac_dct_stack.append(ac_dct)

    ac_dct_stack=np.asarray(ac_dct_stack) #1536X63
    ac_dct_stack=ac_dct_stack.T # 63X1536

#     print("AC stacked shape: ", ac_dct_stack.shape)

    ac_dct_std = np.std(ac_dct_stack, axis=1) # row wise standard-deviation.

    for i in range(ac_dct_stack.shape[0]):
        count_one=0
        for j in range(ac_dct_stack.shape[1]):
            if(ac_dct_stack[i][j]>0):   # row wise counting number of ones.
                count_one+=1
        number_of_ones.append(count_one)

    number_of_ones=np.asarray(number_of_ones)
    
    return(ac_dct_std, number_of_ones)

In [39]:
def feature_sub_image(sub_image):
    sub_image_blocks = get_patches(sub_image) #Gives the 8x8 patches/blocks of sub_image.

    sub_image_cropped = sub_image[4:,4:] #removing 4 rows and 4 cols.
    sub_image_cropped_blocks = get_patches(sub_image_cropped)

    STD_full_image, ONE_full_image = std_and_ones(sub_image_blocks)
    STD_cropped_image, ONE_cropped_image = std_and_ones(sub_image_cropped_blocks)

    #             print("STD_full image shape: ",STD_full_image.shape)
    #             print("one_full image shape: ",ONE_full_image.shape)
    #             print("STD_crop image shape: ",STD_cropped_image.shape)
    #             print("One_crop image shape: ",ONE_cropped_image.shape)
    
    #63x4 stacked F-sub-image
    F_sub_image=np.column_stack((STD_full_image, ONE_full_image, STD_cropped_image, ONE_cropped_image))
    
    F_sub_image_flat=F_sub_image.T.flatten() #column wise flattening, 63*4=252 features
    return(F_sub_image_flat)

In [82]:
#main function to extract the features.
def feature_extraction(path_to_folder, class_label):
    data_list=[]
    for file_name in os.listdir(path_to_folder):
        path_to_img = os.path.join(path_to_folder,file_name)
        img = cv2.imread(path_to_img)
        
        if np.shape(img) == ():
            continue
        
        img = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb) #changing to YCrCb color space.
        img_y = img[:,:,0] # the Y channel only.
        img_cr = img[:,:,1] # the Cr channel only.
        img_cb = img[:,:,2] # the Cb channel only.
        
        Fy = feature_sub_image(img_y)
        Fcr = feature_sub_image(img_cr)
        Fcb = feature_sub_image(img_cb)
#         print("fy shape: ",Fy.shape)
#         print("fcr shape: ",Fcr.shape)
#         print("fcb shape: ",Fcb.shape)
        
        final_feature = np.concatenate((Fy, Fcb, Fcr), axis=None) #63*4*3=756 flattened features.
#         print("final feature shape: ",final_feature.shape)
        
        final_feature=list(final_feature)
        final_feature.insert(0,file_name)
        final_feature.insert(1,class_label)
        data_list.append(final_feature)
        
        
    return(data_list)

In [93]:
# #CASIA V1.0 dataset
# au_path = "YOUR_PATH/CASIA 1.0 dataset/Au"
# tp_path1 = "YOUR_PATH/CASIA 1.0 dataset/Modified Tp/Tp/CM"
# tp_path2 = "YOUR_PATH/CASIA 1.0 dataset/Modified Tp/Tp/Sp"
# output_name='CASIA1_features.csv'

# data_list1 = feature_extraction(au_path, 0)
# data_list2 = feature_extraction(tp_path1, 1)
# data_list3 = feature_extraction(tp_path2, 1)

# df = pd.DataFrame(data_list1)
# df = df.append(pd.DataFrame(data_list2), ignore_index=True)
# df = df.append(pd.DataFrame(data_list3), ignore_index=True)
#  --------------------------------------------------------------------------------------

# CASIA V2.0 Dataset
au_path="YOUR_PATH/CASIA2.0_revised/Au"
tp_path="YOUR_PATH/CASIA2.0_revised/Tp"
output_name='CASIA2_features.csv'

data_list1 = feature_extraction(au_path, 0)
data_list2 = feature_extraction(tp_path, 1)

df = pd.DataFrame(data_list1)
df = df.append(pd.DataFrame(data_list2), ignore_index=True)

#  --------------------------------------------------------------------------------------

# Keep this as it is
df.rename(columns = {0: "image_names", 1: "label"}, inplace = True)

scaler_norm = MinMaxScaler() 
df.iloc[:,2:] = scaler_norm.fit_transform(df.iloc[:,2:].to_numpy()) # Normalising the values in dataframe.

df.to_csv(output_name, index=False)

In [94]:
df.head()

Unnamed: 0,image_names,label,2,3,4,5,6,7,8,9,...,748,749,750,751,752,753,754,755,756,757
0,Au_sec_30588.jpg,0,0.470137,0.381402,0.351574,0.323234,0.2873,0.274272,0.23942,0.546986,...,0.170738,0.196697,0.1171,0.149016,0.167678,0.162983,0.149168,0.161387,0.173836,0.188427
1,Au_ani_30725.jpg,0,0.252941,0.188587,0.173732,0.164308,0.166953,0.169824,0.153493,0.393366,...,0.100509,0.157115,0.1171,0.117119,0.10698,0.114766,0.065053,0.125717,0.101974,0.150099
2,Au_nat_10126.jpg,0,0.3743,0.297729,0.307559,0.297172,0.231406,0.237804,0.198738,0.545127,...,0.16056,0.141331,0.1171,0.14154,0.147951,0.144651,0.141301,0.143677,0.153846,0.142681
3,Au_ani_00069.jpg,0,0.190092,0.119046,0.079772,0.071714,0.059388,0.05701,0.046951,0.228982,...,0.05598,0.12069,0.1171,0.100424,0.05741,0.096183,0.045688,0.107758,0.058198,0.123887
4,Au_art_30207.jpg,0,0.358499,0.242465,0.215926,0.207624,0.196746,0.190034,0.174886,0.738948,...,0.133333,0.17897,0.1171,0.149016,0.1391,0.145153,0.10469,0.158394,0.137905,0.175074


## SVM CLASSIFICATION CASIA 1.0

In [6]:
df=pd.read_csv('YOUR_PATH/____features.csv')

In [7]:
array=df.values
x_feature=array[:,2:]
y_label=array[:,1].astype('int')
print(x_feature.shape)
print(y_label.shape)

(1721, 756)
(1721,)


In [8]:
X_train,X_test,Y_train,Y_test=train_test_split(x_feature,y_label,test_size=0.20,random_state=7)

In [90]:
model_SVC=SVC(kernel='rbf',C=100,gamma=0.001)

kfold=KFold(n_splits=10, shuffle=True)
cv_results=cross_val_score(model_SVC,X_train,Y_train,cv=kfold,scoring='accuracy')
msg="%s %f (%f)" % ('Training Accuracy: ',cv_results.mean(),cv_results.std())
print(msg)

Training Accuracy:  0.927356 (0.018820)


In [92]:
# SVM hypertuning using GridSeachCV
model_SVC=SVC()

kfold=KFold(n_splits=10)
param_grid = {'C': [1, 10, 100, 500, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']} 

grid=GridSearchCV(estimator=model_SVC,param_grid=param_grid,scoring='accuracy',cv=kfold,verbose=3)
grid_result=grid.fit(X_train,Y_train)

print("Best: %f using %s" % (grid_result.best_score_,grid_result.best_params_))

Fitting 10 folds for each of 25 candidates, totalling 250 fits
[CV 1/10] END .........C=1, gamma=1, kernel=rbf;, score=0.746 total time=   1.0s
[CV 2/10] END .........C=1, gamma=1, kernel=rbf;, score=0.717 total time=   1.0s
[CV 3/10] END .........C=1, gamma=1, kernel=rbf;, score=0.725 total time=   0.9s
[CV 4/10] END .........C=1, gamma=1, kernel=rbf;, score=0.710 total time=   0.9s
[CV 5/10] END .........C=1, gamma=1, kernel=rbf;, score=0.768 total time=   1.1s
[CV 6/10] END .........C=1, gamma=1, kernel=rbf;, score=0.739 total time=   1.3s
[CV 7/10] END .........C=1, gamma=1, kernel=rbf;, score=0.693 total time=   1.1s
[CV 8/10] END .........C=1, gamma=1, kernel=rbf;, score=0.759 total time=   0.9s
[CV 9/10] END .........C=1, gamma=1, kernel=rbf;, score=0.723 total time=   0.9s
[CV 10/10] END ........C=1, gamma=1, kernel=rbf;, score=0.693 total time=   0.9s
[CV 1/10] END .......C=1, gamma=0.1, kernel=rbf;, score=0.862 total time=   0.7s
[CV 2/10] END .......C=1, gamma=0.1, kernel=rb

[CV 2/10] END .......C=100, gamma=1, kernel=rbf;, score=0.775 total time=   0.9s
[CV 3/10] END .......C=100, gamma=1, kernel=rbf;, score=0.754 total time=   0.9s
[CV 4/10] END .......C=100, gamma=1, kernel=rbf;, score=0.761 total time=   0.9s
[CV 5/10] END .......C=100, gamma=1, kernel=rbf;, score=0.797 total time=   1.0s
[CV 6/10] END .......C=100, gamma=1, kernel=rbf;, score=0.754 total time=   0.9s
[CV 7/10] END .......C=100, gamma=1, kernel=rbf;, score=0.745 total time=   1.0s
[CV 8/10] END .......C=100, gamma=1, kernel=rbf;, score=0.788 total time=   0.9s
[CV 9/10] END .......C=100, gamma=1, kernel=rbf;, score=0.766 total time=   0.9s
[CV 10/10] END ......C=100, gamma=1, kernel=rbf;, score=0.745 total time=   1.0s
[CV 1/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.928 total time=   0.5s
[CV 2/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.942 total time=   0.5s
[CV 3/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.949 total time=   0.5s
[CV 4/10] END .....C=100, ga

[CV 4/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.761 total time=   0.9s
[CV 5/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.797 total time=   0.9s
[CV 6/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.754 total time=   0.9s
[CV 7/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.745 total time=   0.9s
[CV 8/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.788 total time=   0.9s
[CV 9/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.766 total time=   0.9s
[CV 10/10] END .....C=1000, gamma=1, kernel=rbf;, score=0.745 total time=   0.9s
[CV 1/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.935 total time=   0.5s
[CV 2/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.942 total time=   0.5s
[CV 3/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.942 total time=   0.5s
[CV 4/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.913 total time=   0.4s
[CV 5/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.942 total time=   0.5s
[CV 6/10] END ....C=1000, ga

In [None]:
model_SVC = SVC(C=500,gamma=0.01, kernel='rbf')
model_SVC.fit(X_train,Y_train) 

predictions=model_SVC.predict(X_test)

print(accuracy_score(Y_test,predictions))
print(confusion_matrix(Y_test,predictions))
print(classification_report(Y_test,predictions))

## SVM CLASSIFICATION CASIA 2.0

In [2]:
# df=pd.read_csv('YOUR_PATH/___features.csv')

In [3]:
array=df.values
x_feature=array[:,2:]
y_label=array[:,1].astype('int')
print(x_feature.shape)
print(y_label.shape)

(12614, 756)
(12614,)


In [4]:
X_train,X_test,Y_train,Y_test=train_test_split(x_feature,y_label,test_size=0.20,random_state=7)

In [97]:
model_SVC=SVC(kernel='rbf',C=100,gamma=0.001)

kfold=KFold(n_splits=10, shuffle=True)
cv_results=cross_val_score(model_SVC,X_train,Y_train,cv=kfold,scoring='accuracy')
msg="%s %f (%f)" % ('Training Accuracy: ',cv_results.mean(),cv_results.std())
print(msg)

Training Accuracy:  0.967793 (0.004678)


In [98]:
# SVM hypertuning using GridSeachCV
model_SVC=SVC()

kfold=KFold(n_splits=10)
param_grid = {'C': [1, 10, 100, 500, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']} 

grid=GridSearchCV(estimator=model_SVC,param_grid=param_grid,scoring='accuracy',cv=kfold,verbose=3)
grid_result=grid.fit(X_train,Y_train)

print("Best: %f using %s" % (grid_result.best_score_,grid_result.best_params_))

Fitting 10 folds for each of 25 candidates, totalling 250 fits
[CV 1/10] END .........C=1, gamma=1, kernel=rbf;, score=0.937 total time=  18.0s
[CV 2/10] END .........C=1, gamma=1, kernel=rbf;, score=0.944 total time=  17.7s
[CV 3/10] END .........C=1, gamma=1, kernel=rbf;, score=0.938 total time=  17.7s
[CV 4/10] END .........C=1, gamma=1, kernel=rbf;, score=0.925 total time=  17.8s
[CV 5/10] END .........C=1, gamma=1, kernel=rbf;, score=0.943 total time=  17.7s
[CV 6/10] END .........C=1, gamma=1, kernel=rbf;, score=0.945 total time=  17.8s
[CV 7/10] END .........C=1, gamma=1, kernel=rbf;, score=0.937 total time=  17.4s
[CV 8/10] END .........C=1, gamma=1, kernel=rbf;, score=0.927 total time=  17.5s
[CV 9/10] END .........C=1, gamma=1, kernel=rbf;, score=0.932 total time=  17.7s
[CV 10/10] END ........C=1, gamma=1, kernel=rbf;, score=0.939 total time=  17.6s
[CV 1/10] END .......C=1, gamma=0.1, kernel=rbf;, score=0.965 total time=  11.2s
[CV 2/10] END .......C=1, gamma=0.1, kernel=rb

[CV 2/10] END .......C=100, gamma=1, kernel=rbf;, score=0.944 total time=  15.2s
[CV 3/10] END .......C=100, gamma=1, kernel=rbf;, score=0.936 total time=  15.1s
[CV 4/10] END .......C=100, gamma=1, kernel=rbf;, score=0.930 total time=  15.2s
[CV 5/10] END .......C=100, gamma=1, kernel=rbf;, score=0.945 total time=  15.2s
[CV 6/10] END .......C=100, gamma=1, kernel=rbf;, score=0.946 total time=  15.4s
[CV 7/10] END .......C=100, gamma=1, kernel=rbf;, score=0.931 total time=  15.2s
[CV 8/10] END .......C=100, gamma=1, kernel=rbf;, score=0.941 total time=  15.2s
[CV 9/10] END .......C=100, gamma=1, kernel=rbf;, score=0.936 total time=  15.3s
[CV 10/10] END ......C=100, gamma=1, kernel=rbf;, score=0.940 total time=  15.0s
[CV 1/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.966 total time=   6.7s
[CV 2/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.967 total time=   6.6s
[CV 3/10] END .....C=100, gamma=0.1, kernel=rbf;, score=0.963 total time=   6.7s
[CV 4/10] END .....C=100, ga

[CV 4/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.925 total time=  14.6s
[CV 5/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.947 total time=  14.9s
[CV 6/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.947 total time=  14.7s
[CV 7/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.929 total time=  14.8s
[CV 8/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.933 total time=  14.8s
[CV 9/10] END ......C=1000, gamma=1, kernel=rbf;, score=0.940 total time=  14.7s
[CV 10/10] END .....C=1000, gamma=1, kernel=rbf;, score=0.935 total time=  14.6s
[CV 1/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.961 total time=   6.2s
[CV 2/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.966 total time=   6.2s
[CV 3/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.959 total time=   6.2s
[CV 4/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.957 total time=   6.1s
[CV 5/10] END ....C=1000, gamma=0.1, kernel=rbf;, score=0.959 total time=   6.4s
[CV 6/10] END ....C=1000, ga

In [5]:
model_SVC = SVC(C=100,gamma=0.01, kernel='rbf')
model_SVC.fit(X_train,Y_train) 

predictions=model_SVC.predict(X_test)

print(accuracy_score(Y_test,predictions))
print(confusion_matrix(Y_test,predictions))
print(classification_report(Y_test,predictions))

0.9734443123265953
[[1475   56]
 [  11  981]]
              precision    recall  f1-score   support

           0       0.99      0.96      0.98      1531
           1       0.95      0.99      0.97       992

    accuracy                           0.97      2523
   macro avg       0.97      0.98      0.97      2523
weighted avg       0.97      0.97      0.97      2523

