## Transfer Learning: First Order Model

**Objective**: Leverage the DeepFake generator model to improve the performance of DeepFake detection models

**Hypothesis**: The hypothesis behind the model is that generator models have robust feature extraction, which if leveraged can lead to a significant improvement in the DeepFake detection accuracy

**Notebook Division**:
1. Load Dataset
2. Pre-process Dataset
3. Feature Extraction with First Order Model
    1. Load Checkpoint
    2. Extract different encoders/model segments
    3. Predict Values
4. Classification Models

In [1]:
# Install the relevant code for First Order Model Animation
# !git clone https://github.com/AliaksandrSiarohin/first-order-model

In [2]:
# !python -m pip install -U scikit-image

In [3]:
cd ../first-order-model

/mnt/disks/user/project/first-order-model


In [4]:
#Data processing
from skimage import io
import os
import glob
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from sklearn.preprocessing import LabelEncoder
import random
from collections import Counter

#Feature Extraction
from demo import load_checkpoints
import torch

# Models
from sklearn.svm import SVC
from sklearn import linear_model
from sklearn.ensemble import RandomForestClassifier

#Neural Network
import keras
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras import Input
from tensorflow.keras import optimizers
from keras.callbacks import ModelCheckpoint,Callback
import matplotlib.pyplot as plt


#Evaluation
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from keras.callbacks import ModelCheckpoint,Callback
from keras.callbacks import ReduceLROnPlateau

from sklearn import metrics
from tensorflow.keras.models import load_model
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.
  from numpy.core.umath_tests import inner1d


## Load Dataset

In [5]:
# path to your dataset
DATASET_PATH = '/mnt/disks/user/project/Dataset/'
deepfake_class = ['FaceSwap/clean_frames', 'Reenactment/clean_frames','original/clean_frames']

In [6]:
'''
func: load the deepfake dataset and divide them into train and test with each having samples of the different classes
input:
    i.dataset_path: string: the main dataset folder path 
    ii.train_ratio: float: the ratio of the dataset that will be used for training the model. Eg: 0.8
    iii. fake_class: string array: the different deepfake classes
output:
    i. train_set, test_set: dictionary of image paths as key and deepfake class as value
    ii. X_train, X_test: array of image paths 
    iii. y_train, y_test: array of corresponding deepfake classes 
'''
def MultiDatasetSplit(DATASET_PATH, train_ratio,fake_cls):
    test_set = {}
    train_set = {}
    list_IDs = []
    labels = {}
    for i, cls in enumerate(fake_cls):
        paths = glob.glob(os.path.join(DATASET_PATH, cls,'*/*.jpg'))
        #balancing the dataset
        balance_paths = random.sample(paths,2000)

        brk_point = int(len(balance_paths)*train_ratio)
        for j in range(len(balance_paths)):
            if j <= brk_point:
                train_set.update({balance_paths[j]:i})
            else:
                test_set.update({balance_paths[j]:i})
    
    X_train = [X for X in train_set.keys()] 
    y_train = [y for y in train_set.values()]
    X_test = [X for X in test_set.keys()]
    y_test = [y for y in test_set.values()]
    return train_set, test_set, X_train,y_train, X_test, y_test

In [7]:
'''
func: load the deepfake dataset and divide them into train and test with each having samples of the different classes
input:
    i.dataset_path: string: the main dataset folder path 
    ii.train_ratio: float: the ratio of the dataset that will be used for training the model. Eg: 0.8
    iii. fake_class: string array: the different deepfake classes
output:
    i. train_set, test_set: dictionary of image paths as key and deepfake class as value
    ii. X_train, X_test: array of image paths 
    iii. y_train, y_test: array of corresponding deepfake classes 
'''
def BinaryDatasetSplit(DATASET_PATH, train_ratio,fake_cls):
    test_set = {}
    train_set = {}
    list_IDs = []
    labels = {}
    for i, cls in enumerate(fake_cls):
        paths = glob.glob(os.path.join(DATASET_PATH, cls,'*/*.jpg'))
        if i == 0:
            paths = random.sample(paths,2000)
        else:
            paths = random.sample(paths,1000)
        brk_point = int(len(paths)*train_ratio)
        for j in range(len(paths)):
            if j <= brk_point:
                if i == 0:
                    train_set.update({paths[j]:0})
                else: 
                    train_set.update({paths[j]:1})
            else:
                if i == 0:
                    test_set.update({paths[j]:0})
                else:
                    test_set.update({paths[j]:1})

    X_train = [X for X in train_set.keys()] 
    y_train = [y for y in train_set.values()]
    X_test = [X for X in test_set.keys()]
    y_test = [y for y in test_set.values()]
    return train_set, test_set, X_train,y_train, X_test, y_test

In [8]:
b_train_set, b_test_set, b_train_X, b_train_y, b_test_X, b_test_y = BinaryDatasetSplit(DATASET_PATH,0.7,deepfake_class)

In [7]:
m_train_set, m_test_set, m_train_X, m_train_y, m_test_X, m_test_y = MultiDatasetSplit(DATASET_PATH,0.7,deepfake_class)

In [10]:
Counter(b_train_set.values())

Counter({0: 1401, 1: 1402})

In [8]:
Counter(m_train_set.values())

Counter({0: 1401, 1: 1401, 2: 1401})

In [12]:
set(b_test_set.values())

{0, 1}

In [9]:
set(m_test_set.values())

{0, 1, 2}

In [10]:
# specify image size and channels
img_channels = 3
img_rows = 224
img_cols = 224

# number of classes
b_nb_classes = 2
m_nb_classes = 3

## Feature Extraction

Load model and model checkpoints for Vox (from the other dataset models, Vox appears to be the most relevant and closest in content to the FaceForensics++ Dataset)


In [11]:
config_path='config/vox-256.yaml' #data checkpoints
checkpoint_path='../vox-cpk.pth.tar' #pyTorch Model

In [12]:
#kp_detector model loads keypoints
generator, kp_detector = load_checkpoints(config_path='config/vox-256.yaml', 
                            checkpoint_path='../vox-cpk.pth.tar')

In [13]:
#dir(generator.module)

In [14]:
#dir(generator.module.down_blocks)
#encoder.train()

In [15]:
#get the pre-trained weights from the generator module
encoder = generator.module.bottleneck
encoder.train()

Sequential(
  (r0): ResBlock2d(
    (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (norm1): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (norm2): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (r1): ResBlock2d(
    (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (norm1): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (norm2): SynchronizedBatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (r2): ResBlock2d(
    (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (norm1

## Data Processing

Allow for batch-wise pre-processing of data

In [16]:
'''
func: process the image paths to return image values and respective labeled classes
input:
    i.batch_X: array of image paths 
    ii.batch_y: dictionary of image paths and their corresponding class
output:
    i. X: processed image data
    ii. y_value: corresponding class values
'''
def PreProcess(batch_X, batch_y, batch_size=32, dimension=(256,256), n_channels=3):
    X = np.empty((len(batch_X), *dimension, n_channels))
    y = np.empty((len(batch_X)), dtype=int)
    for i, image_path in enumerate(batch_X):
        img = image.load_img(image_path, target_size=dimension)
        img = image.img_to_array(img)
        #img = img/255
        img = preprocess_input(img)
        X[i,] = img
        y[i] = batch_y[image_path]
        
    #Extract features for batch-wise values
    output = torch.tensor(X, dtype=torch.float).to('cuda')
    output_val = encoder(output)
    feature = output_val.cpu().data.numpy().reshape((32,256,256,-1))
    
    le = LabelEncoder()
    y_value = le.fit_transform(y)
    return feature,y_value

In [17]:
'''
func: Random Forest model for DeepFake classification 
input:
    i.train_X: array list: image path for model training
    ii.train_set : dictionary: of training image paths and corresponding class
    iii. batch_size: int
    iv. model: feature extractor model
    v. epoch: int
    vi. ablation: optional int value to simple ablation testing
output:
    i. classifier: trained random forest classifier model
'''
def Feature_Extraction(train_X,train_set, batch_size, ablation=None):

    X = []
    y = []
    
    #Select Data for fit
    if ablation != None:
        train_X = random.sample(train_X, ablation)
        train_set = {image: train_set[image] for image in train_X }
    #Getting the number of batches needed
    batch_num = int(np.floor(len(train_X) / batch_size))
    #For every batch in an epoch
    for i in range(batch_num):
        #Randomly select data for the batch
        batch_X = random.sample(train_X, batch_size)
        batch_y = {image: train_set[image] for image in batch_X}
        #Preprocess batch data
        X_val,y_val = PreProcess(batch_X, batch_y)
        for i,val in enumerate(X_val):
            val = val.flatten()
            X.append(val)
            y.append(y_val[i])

    return X,y

In [None]:
X_data,y_data = Feature_Extraction(b_train_X,b_train_set,32)

In [23]:
len(X_data)

2784

In [34]:
b_classifier = RandomForestClassifier(bootstrap=True,
                                      max_depth=12,
                                      max_features=0.7,
                                      n_estimators=100,
                                      criterion='gini',
                                      class_weight='balanced',
                                      verbose=2
                                     )

In [35]:
b_classifier.fit(X_data,y_data)

building tree 1 of 100


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  4.0min remaining:    0.0s


building tree 2 of 100
building tree 3 of 100
building tree 4 of 100
building tree 5 of 100
building tree 6 of 100
building tree 7 of 100
building tree 8 of 100
building tree 9 of 100
building tree 10 of 100
building tree 11 of 100
building tree 12 of 100
building tree 13 of 100
building tree 14 of 100
building tree 15 of 100
building tree 16 of 100
building tree 17 of 100
building tree 18 of 100
building tree 19 of 100
building tree 20 of 100
building tree 21 of 100
building tree 22 of 100
building tree 23 of 100
building tree 24 of 100
building tree 25 of 100
building tree 26 of 100
building tree 27 of 100
building tree 28 of 100
building tree 29 of 100
building tree 30 of 100
building tree 31 of 100
building tree 32 of 100
building tree 33 of 100
building tree 34 of 100
building tree 35 of 100
building tree 36 of 100
building tree 37 of 100
building tree 38 of 100
building tree 39 of 100
building tree 40 of 100
building tree 41 of 100
building tree 42 of 100
building tree 43 of 100


[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed: 394.3min finished


RandomForestClassifier(bootstrap=True, class_weight='balanced',
            criterion='gini', max_depth=12, max_features=0.7,
            max_leaf_nodes=None, min_impurity_decrease=0.0,
            min_impurity_split=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=100, n_jobs=1, oob_score=False, random_state=None,
            verbose=2, warm_start=False)

In [36]:
b_classifier.score(X_data,y_data)

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.1s finished


0.9992816091954023

### Random Forest Binary Evaluation

In [37]:
b_test_feature, b_test_labels = Feature_Extraction(b_test_X, b_test_set, 32)

In [38]:
binary_y_pred = b_classifier.predict(b_test_feature)

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished


In [39]:
Counter(binary_y_pred)

Counter({0: 596, 1: 588})

In [40]:
binary_score_test = metrics.accuracy_score(b_test_labels, binary_y_pred)
print('RF Model Test Score ',binary_score_test)

RF Model Test Score  0.6554054054054054


In [41]:
binary_cm = confusion_matrix(b_test_labels, binary_y_pred)
print(binary_cm)

[[389 201]
 [207 387]]


In [42]:
binary_report = classification_report(b_test_labels,binary_y_pred)
print(binary_report)

             precision    recall  f1-score   support

          0       0.65      0.66      0.66       590
          1       0.66      0.65      0.65       594

avg / total       0.66      0.66      0.66      1184



In [43]:
roc_auc_score(b_test_labels, binary_y_pred)

0.6554185927067282

### Multi-Class Random Forest

In [24]:
m_X_data,m_y_data = Feature_Extraction(m_train_X,m_train_set,32)

In [25]:
len(m_X_data)

4192

In [26]:
m_classifier = RandomForestClassifier(bootstrap=True,
                                      max_depth=12,
                                      max_features=0.7,
                                      n_estimators=100,
                                      criterion='gini',
                                      class_weight='balanced',
                                      verbose=2)

In [27]:
m_classifier.fit(m_X_data,m_y_data)

building tree 1 of 100


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  6.3min remaining:    0.0s


building tree 2 of 100
building tree 3 of 100
building tree 4 of 100
building tree 5 of 100
building tree 6 of 100
building tree 7 of 100
building tree 8 of 100
building tree 9 of 100
building tree 10 of 100
building tree 11 of 100
building tree 12 of 100
building tree 13 of 100
building tree 14 of 100
building tree 15 of 100
building tree 16 of 100
building tree 17 of 100
building tree 18 of 100
building tree 19 of 100
building tree 20 of 100
building tree 21 of 100
building tree 22 of 100
building tree 23 of 100
building tree 24 of 100
building tree 25 of 100
building tree 26 of 100
building tree 27 of 100
building tree 28 of 100
building tree 29 of 100
building tree 30 of 100
building tree 31 of 100
building tree 32 of 100
building tree 33 of 100
building tree 34 of 100
building tree 35 of 100
building tree 36 of 100
building tree 37 of 100
building tree 38 of 100
building tree 39 of 100
building tree 40 of 100
building tree 41 of 100
building tree 42 of 100
building tree 43 of 100


[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed: 601.5min finished


RandomForestClassifier(bootstrap=True, class_weight='balanced',
            criterion='gini', max_depth=12, max_features=0.7,
            max_leaf_nodes=None, min_impurity_decrease=0.0,
            min_impurity_split=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=100, n_jobs=1, oob_score=False, random_state=None,
            verbose=2, warm_start=False)

In [28]:
m_classifier.score(m_X_data,m_y_data)

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.2s finished


1.0

### Random Forest Multi Evaluation

In [29]:
m_test_feature, m_test_labels = Feature_Extraction(m_test_X, m_test_set, 32)

In [30]:
multi_y_pred = m_classifier.predict(m_test_feature)

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.1s finished


In [31]:
Counter(multi_y_pred)

Counter({1: 896, 0: 574, 2: 322})

In [32]:
multi_score_test = metrics.accuracy_score(m_test_labels, multi_y_pred)
print('RF Model Test Score ',multi_score_test)

RF Model Test Score  0.5234375


In [33]:
multi_cm = confusion_matrix(m_test_labels, multi_y_pred)
print(multi_cm)

[[316 169  88]
 [ 58 471  83]
 [200 256 151]]


In [34]:
multi_report = classification_report(m_test_labels,multi_y_pred)
print(multi_report)

             precision    recall  f1-score   support

          0       0.55      0.55      0.55       573
          1       0.53      0.77      0.62       612
          2       0.47      0.25      0.33       607

avg / total       0.51      0.52      0.50      1792



## SVM Models


### SVM Binary

In [24]:
b_classifier = Pipeline([('scaler', StandardScaler()), ('svc', SVC(kernel='rbf', max_iter=1000, C=100, gamma=0.001, class_weight='balanced',verbose=1))])

In [25]:
b_classifier.fit(X_data,y_data)

[LibSVM]



Pipeline(memory=None,
     steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svc', SVC(C=100, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=1000, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=1))])

In [26]:
#b_classifier.score(X_data,y_data)

### SVM Binary Evaluation

In [27]:
b_test_feature, b_test_labels = Feature_Extraction(b_test_X, b_test_set, 32)

In [28]:
binary_y_pred = b_classifier.predict(b_test_feature)

In [29]:
Counter(binary_y_pred)

Counter({0: 1184})

In [30]:
binary_score_test = metrics.accuracy_score(b_test_labels, binary_y_pred)
print('SVM Model Test Score ',binary_score_test)

SVM Model Test Score  0.49577702702702703


In [31]:
binary_cm = confusion_matrix(b_test_labels, binary_y_pred)
print(binary_cm)

[[587   0]
 [597   0]]


In [32]:
binary_report = classification_report(b_test_labels,binary_y_pred)
print(binary_report)

             precision    recall  f1-score   support

          0       0.50      1.00      0.66       587
          1       0.00      0.00      0.00       597

avg / total       0.25      0.50      0.33      1184



  'precision', 'predicted', average, warn_for)


In [33]:
roc_auc_score(b_test_labels, binary_y_pred)

0.5

### Multi-Class SVM

In [18]:
m_X_data,m_y_data = Feature_Extraction(m_train_X,m_train_set,32)

In [19]:
len(m_X_data)

4192

In [20]:
m_classifier = Pipeline([('scaler', StandardScaler()), ('svc', SVC(kernel='rbf', max_iter=1000, C=100, gamma=0.001, class_weight='balanced',verbose=2))])

In [21]:
m_classifier.fit(m_X_data,m_y_data)

[LibSVM]



Pipeline(memory=None,
     steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svc', SVC(C=100, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=1000, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=2))])

In [22]:
#m_classifier.score(m_X_data,m_y_data)

### SVM Multi Evaluation

In [23]:
m_test_feature, m_test_labels = Feature_Extraction(m_test_X, m_test_set, 32)

In [24]:
multi_y_pred = m_classifier.predict(m_test_feature)

In [25]:
Counter(multi_y_pred)

Counter({1: 1792})

In [26]:
multi_score_test = metrics.accuracy_score(m_test_labels, multi_y_pred)
print('RF Model Test Score ',multi_score_test)

RF Model Test Score  0.32533482142857145


In [27]:
multi_cm = confusion_matrix(m_test_labels, multi_y_pred)
print(multi_cm)

[[  0 588   0]
 [  0 583   0]
 [  0 621   0]]


In [28]:
multi_report = classification_report(m_test_labels,multi_y_pred)
print(multi_report)

             precision    recall  f1-score   support

          0       0.00      0.00      0.00       588
          1       0.33      1.00      0.49       583
          2       0.00      0.00      0.00       621

avg / total       0.11      0.33      0.16      1792



  'precision', 'predicted', average, warn_for)


### Random Forest

In [None]:
def PreProcess(batch_X, batch_y, batch_size=32, dimension=(256,256), n_channels=3, n_classes=2):
    X = np.empty((batch_size, *dimension, n_channels))
    y = np.empty((batch_size), dtype=int)
    for i, image_path in enumerate(batch_X):
        img = image.load_img(image_path, target_size=dimension)
        img = image.img_to_array(img)
        #img = img/255
        img = preprocess_input(img)
        X[i,] = img
        y[i] = batch_y[image_path]
        
   #Extract features for batch-wise values
    output = torch.tensor(X, dtype=torch.float).to('cuda')
    output_val = encoder(output)
    feature = output_val.cpu().data.numpy().reshape(32,256,256,-1)
    #class encoding to 0,1,2
#         le = LabelEncoder()
#         self.y_value = le.fit_transform(y)
    return feature,keras.utils.to_categorical(y, num_classes=n_classes)

In [None]:
'''
func: Random Forest model for DeepFake classification 
input:
    i.train_X: array list: image path for model training
    ii.train_set : dictionary: of training image paths and corresponding class
    iii. batch_size: int
    iv. model: feature extractor model
    v. epoch: int
    vi. ablation: optional int value to simple ablation testing
output:
    i. classifier: trained random forest classifier model
'''
def Random_Forest_Model(train_X,train_set, batch_size,model,epoch, ablation=None):
    classifier = RandomForestClassifier(bootstrap=True,
                             max_depth=12,
                             max_features=0.7,
                             n_estimators=100,
                             criterion='gini',
                             class_weight='balanced'
                            )
    #Select Data for fit
    if ablation != None:
        train_X = random.sample(train_X, ablation)
        train_set = {image: train_set[image] for image in train_X }
    #Getting the number of batches needed
    batch_num = int(np.floor(len(train_X) / batch_size))
    #For every epoch
    for j in range(epoch):
        #For every batch in an epoch
        for i in range(batch_num):
            #Randomly select data for the batch
            batch_X = random.sample(train_X, batch_size)
            batch_y = {image: train_set[image] for image in batch_X}
            #Preprocess batch data
            X,y = PreProcess(batch_X, batch_y)
            #Extract features from model
            print(X)
            features = model(X)
            classifier.fit(features, y)
    
    pred_train = classifier.predict(features)
    score_train = metrics.accuracy_score(y, pred_train)
    print('Random Forest Trained Score ',score_train)
    return classifier

In [None]:
rf_classifier = Random_Forest_Model(b_train_X,b_train_set,32,encoder,10,50)

In [None]:
rfc = RandomForestClassifier(bootstrap=True,
                             max_depth=12,
                             max_features=0.7,
                             n_estimators=100,
                             criterion='gini',
                             class_weight='balanced'
                            )

In [None]:
# fit
rfc.fit(features,labels)

### Evaluate

In [None]:
'''
func: SVM model for DeepFake classification 
input:
    i.train_X: array list: image path for model training
    ii.train_set : dictionary: of training image paths and corresponding class
    iii. batch_size: int
    iv. model: feature extractor model
    v. epoch: int
    vi. ablation: optional int value to simple ablation testing
'''
def RandomForest(train_X, train_set,batch_size,model,epoch,ablation=None):
    #Model Initialisation
    svm = linear_model.SGDClassifier()
    
    #Select Data for fit
    if ablation != None:
        train_X = random.sample(train_X, ablation)
        train_set = {image: train_set[image] for image in train_X }
    #Getting the number of batches needed
    batch_num = int(np.floor(len(train_X) / batch_size))
    #For every epoch
    for j in range(epoch):
        #For every batch in an epoch
        for i in range(batch_num):
            #Randomly select data for the batch
            batch_X = random.sample(train_X, batch_size)
            batch_y = {image: train_set[image] for image in batch_X}
            #Preprocess batch data
            X,y = PreProcess(batch_X, batch_y)
            #Extract features from model
            output = feature_model(model,X)
            features = output.cpu().data.numpy().reshape(32,-1)
            #Partial fit a model
            #print(features,y)
            svm.partial_fit(features,y,classes=[0,1])
            print('Epoch ',j, svm.score(features,y))
    print('fitting done !!!')
    return svm

In [None]:
#Train SVM model
b_feature_val,b_label = SVM_model(b_train_X,b_train_set,32,encoder,20,ablation=100)

In [None]:
'''
func: SVM model for DeepFake classification 
input:
    i.train_X: array list: image path for model training
    ii.train_set : dictionary: of training image paths and corresponding class
    iii. batch_size: int
    iv. model: feature extractor model
    v. epoch: int
    vi. ablation: optional int value to simple ablation testing
'''
def Feature_Extractor(train_X, train_set,model,ablation=None):   
    feature_val = []
    labels = []
    #Select Data for fit
    if ablation != None:
        train_X = random.sample(train_X, ablation)
        train_set = {image: train_set[image] for image in train_X }
    #Preprocess batch data
    X,y = PreProcess(train_X, train_set)
    #Extract features from model
    print(len(X))
    output = feature_model(model,X)
    features = output.cpu().data.numpy().reshape(len(),256,256,-1)
    flat = features.flatten()
    print('flatten', len(flat))
    feature_val.append(flat)
    labels.append(y)
    print(len(feature_val))
    #Partial fit a model
    #print(features,y)
    print('fitting done !!!')
    return feature_val,labels

In [None]:
b_feature_val,b_label = Feature_Extractor(b_train_X,b_train_set,model=encoder,ablation=100)

In [None]:
print(len(b_feature_val))
print(len(b_label))

In [None]:
pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC(kernel='rbf', max_iter=1000, C=100, gamma=0.001, class_weight='balanced'))])
pipe.fit(b_feature_val,b_label)

In [None]:
#b_test_feature, b_test_labels = FeatureExtraction(b_test_X, b_test_set)
X,y = PreProcess(b_test_X, b_test_set,batch_size=len(b_test_X))
#Extract features from model
output = feature_model(encoder,X)
features = output.cpu().data.numpy().reshape(len(X),-1)

In [None]:
binary_y_pred = svm.predict(b_test_feature)

In [None]:
Counter(binary_y_pred)

In [None]:
binary_score_test = metrics.accuracy_score(b_test_labels, binary_y_pred)
print('Base Model Test Score ',binary_score_test)

In [None]:
binary_cm = confusion_matrix(b_test_labels, binary_y_pred)
print(binary_cm)

In [None]:
binary_report = classification_report(b_test_labels,binary_y_pred)
print(binary_report)

In [None]:
roc_auc_score(b_test_labels, binary_y_pred)

### Multi Class SVM

In [None]:
'''
func: SVM model for DeepFake classification 
input:
    i.train_X: array list: image path for model training
    ii.train_set : dictionary: of training image paths and corresponding class
    iii. batch_size: int
    iv. model: feature extractor model
    v. epoch: int
    vi. ablation: optional int value to simple ablation testing
'''
def SVM_model(train_X, train_set,batch_size,model,epoch,ablation=None):
    #Model Initialisation
    svm = linear_model.SGDClassifier()
    
    #Select Data for fit
    if ablation != None:
        train_X = random.sample(train_X, ablation)
        train_set = {image: train_set[image] for image in train_X }
    #Getting the number of batches needed
    batch_num = int(np.floor(len(train_X) / batch_size))
    #For every epoch
    for j in range(epoch):
        #For every batch in an epoch
        for i in range(batch_num):
            #Randomly select data for the batch
            batch_X = random.sample(train_X, batch_size)
            batch_y = {image: train_set[image] for image in batch_X}
            #Preprocess batch data
            X,y = PreProcess(batch_X, batch_y)
            #Extract features from model
            output = feature_model(model,X)
            features = output.cpu().data.numpy().reshape(32,-1)
            #Partial fit a model
            #print(features,y)
            svm.partial_fit(features,y,classes=[0,1])
            print('Epoch ',j, svm.score(features,y))
    print('fitting done !!!')
    return svm

In [None]:
#Train SVM model
m_svm = SVM_model(b_train_X,b_train_set,32,encoder,20)

## Other Things

In [None]:
cnn_model.compile(loss='sparse_categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])
cnn_model.fit(X_train, y_data,
                 batch_size=5,
                 epochs=1,
                 shuffle=True)

In [None]:
# svm = SVC(kernel='rbf')
# svm.fit(out_val,y_data)

# print('fitting done !!!')

In [None]:
# encoder.eval()
# data = torch.tensor(X, dtype=torch.float).to('cuda')
# output = encoder(data)
# #prediction = torch.argmax(output)

In [None]:
# out_val = output.cpu().data.numpy().reshape(32,-1)

In [None]:
# svm.score(out_val,y_data)

In [None]:
model[1].norm1.weight

In [None]:
dir(model[1])

In [None]:
kp_detector

In [None]:
from tensorflow.keras.preprocessing import image

img = image.load_img('/mnt/disks/user/project/Dataset/testing/frames/004_982/004_982_frame285.jpg', target_size=(255,255))
img = img.resize((256, 256), Image.NEAREST)
img = image.img_to_array(img)
img

In [None]:
import torch
import numpy as np
image_train = '/mnt/disks/user/project/Dataset/testing/frames/004_982/004_982_frame285.jpg'


In [None]:
source = torch.tensor(img[np.newaxis].astype(np.float32)).permute(0, 3, 1, 2).to('cuda')

In [None]:
kp_detector(source)

In [None]:
import torch
torch.manual_seed(1)

import torch.nn as nn

#lstm = nn.generator(3, 3)

print(generator._all_weights)

In [None]:
from keras.models import model_from_yaml

In [None]:
yaml_file = open('config/vox-256.yaml', 'r')
loaded_model_yaml = yaml_file.read()
yaml_file.close()
loaded_model = model_from_yaml(loaded_model_yaml)
# # load weights into new model
# loaded_model.load_weights("model.h5")
# print("Loaded model from disk")
 
# # evaluate loaded model on test data
# loaded_model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
# score = loaded_model.evaluate(X, Y, verbose=0)
# print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))

In [None]:
loss_fn = torch.nn.MSELoss(size_average=False)
optimizer = torch.optim.SGD(generator.parameters(), lr=1e-4)

In [None]:
y_pred = generator('/mnt/disks/user/project/Dataset/testing/frames/004_982/004_982_frame285.jpg')