In [1]:
%load_ext autoreload
%autoreload 2

In [1]:
import sys, os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import torch
import numpy as np
import random
torch.manual_seed(23336666)
np.random.seed(23336666)
random.seed(23336666)

kaggle = False
if kaggle:
    !pip install ../input/facenet-pytorch-vggface2/facenet_pytorch-2.0.1-py3-none-any.whl
    !pip install ../input/dfdc-model/codes/pkgs/efficientnet_pytorch-0.6.1/efficientnet_pytorch-0.6.1

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
import glob
import time
import time
from PIL import Image
from torchvision.transforms import Normalize, RandomHorizontalFlip, ToTensor, ToPILImage, Compose, Resize
from sklearn.metrics import log_loss
import pathlib
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
from hashlib import sha1

if kaggle:
    sys.path += ["/kaggle/input/dfdc-model/codes/pkgs", "/kaggle/input/dfdc-model/codes/utils", "/kaggle/input/dfdc-model/codes"]
    from engine import FaceInferenceModel, FastDataset, MTCNN_extractor, MultiFrameModel
else:
    sys.path += [".", "./utils"]
    from engine import FaceInferenceModel, FastDataset, show, Cache_loader, MTCNN_extractor, MultiFrameModel

In [3]:
# torch.cuda.set_device("cuda:0")
if kaggle:
    filenames = glob.glob('/kaggle/input/deepfake-detection-challenge/test_videos/*.mp4')
else:
    filenames = glob.glob('/home/lthpc/test_videos/*.mp4')
if not kaggle:
    def accurate_sample(df, number = 400):
        np.random.seed(23336666)
        reals = df[df["label"] == "REAL"].sample(number)
        fakes = pd.concat([df[df["original"]==name].sample(1) for name in reals.index])
        return pd.concat([reals, fakes])
    split_name = "metadata_40_49_dropped"
    metadata = pd.read_json(f'/data/deepfake/metadata/{split_name}.json').T
    df = metadata[((metadata['split_kailu'] == 'validation'))] # | (metadata['split_kailu'] == 'test')
    df = accurate_sample(df)
    filenames = ["/data/deepfake/dfdc_train/"+fn for fn in df.index]
    real_labels = df.label
    real_labels = real_labels.apply(lambda x: 0 if x=="REAL" else 1).sort_index()

In [76]:
# df["original"] = df.apply(lambda x:x.original if x.original else x.name ,1)
# df["label"] = df["label"].apply(lambda x: 0 if x == "REAL" else 1)
# df["pred"] = pred_df["label"]
# df["loss"] = df.apply(lambda row: log_loss([row.label], [row.pred], labels=[0,1]), axis=1)
# df["confidence"] = df.apply(lambda row: min(row.pred, 1-row.pred), axis=1)
# df[df.confidence<0.1].loss.mean()
# df["correct"] = df["label"]==(df["pred"]>0.5)
# df.loss.sort_values().plot()
# df.groupby("original").loss.mean().sort_values().plot()
# df.correct.sort_values().plot()
# df.groupby(df.groupby("original").correct.mean()).loss.mean()
# df["pair_mean"] = df.groupby("original").pred.mean()
# df.sort_values("original")[:40]
(df[df.label==0]["pred"] < df[df.label==0]["pair_mean"]).count()

400

# pairing

In [92]:
def get_first_frame(path):
    import cv2
    capture = cv2.VideoCapture(path)
    success,frame = capture.read()
    capture.release()
    if success:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        return frame
    return None

def gen_feature(filenames):
    id2feature = {}
    for path in filenames:
        fn = path.split("/")[-1]
        frame = get_first_frame(path)
        img_shot = np.array(Image.fromarray(frame).resize((50,50))).reshape(-1)
        id2feature[fn]=img_shot
    return id2feature
        
id2feature = gen_feature(filenames)

In [94]:
def get_pairs(id2feature):
    ret = []
    fns = [i for i in id2feature.keys()]
    used = [0]* len(fns)
    X = torch.Tensor([i for i in id2feature.values()]).T.cuda()
    X = X/((X*X).sum(0).sqrt())
    arc = (torch.mm(X.T, X).cpu()-2*torch.eye(len(fns)))
    # before using blossom, try simple way first
    for i in range(len(fns)):
        if used[i] == 1:
            continue
        values, j = torch.max(arc[i], 0)
        ret.append((fns[i], fns[j]))
        arc[:,(i, j)]=-1
        used[i] = 1
        used[j] = 1
    return ret
pairs = get_pairs(id2feature)

# preprocess

* only 5G hard drive available
* 4 processor at most
* 13G memory
* 16G GPU memory

In [4]:
from multiprocessing import  Pool
import pathlib
from engine import FastDataset, MTCNN_extractor
import pdb

def get_bbox_sub(paths, sample_number = 16, new_length = 1, save = False):
    face_detector = None
    id2face = dict()
    ds = FastDataset(paths, sample_rate = sample_number, new_length=new_length)
    loader = DataLoader(ds, batch_size=1, num_workers=12 if not kaggle else 2, drop_last=False, shuffle=False)
    
    loader = tqdm(loader, position=0, leave=True)
    for batch, fp in zip(loader, paths):
        frames, iter_frame = batch
        frames = frames[0].numpy().astype(np.uint8)
        iter_frame = iter_frame[0].numpy().astype(int)
        if len(frames) == 0 or frames is None:
            continue

        # tmp/xxx.mp4/13/0.png 1.png
        pathlib.Path("tmp").mkdir(exist_ok=True)
        cache_path = pathlib.Path("tmp") / fp.split('/')[-1]
        cache_path.mkdir(exist_ok=True)

        if face_detector is None:
            face_detector =  MTCNN_extractor(batch_size = 40, my_device = torch.device('cuda', 0), same_bbox_size=True)
        face_info = face_detector._get(frames)
        face_info = [[face._replace(frame=iter_frame[face.frame]) for face in one_frame] for one_frame in face_info]
        
        if save:
            for faces in face_info:
                for face_id, face in enumerate(faces):
                    fid = face.frame
                    target_path = cache_path / str(fid)
                    target_path.mkdir(exist_ok=True)
                    save_path = target_path / (str(face_id) + '.png')
                    face = Image.fromarray(face.face)
                    face.save(save_path, compress_level=1)

        id2face[fp.split('/')[-1]] = face_info
    return id2face

# test
get_bbox_sub(filenames[:2], 16, 1, save = True)

100%|██████████| 2/2 [00:02<00:00,  1.52s/it]


{'zmdzmbfneh.mp4': [[FaceInfo(face=array([[[218, 219, 211],
           [218, 219, 211],
           [218, 219, 211],
           ...,
           [207, 209, 204],
           [207, 209, 204],
           [207, 209, 204]],
   
          [[218, 219, 211],
           [218, 219, 211],
           [218, 219, 211],
           ...,
           [207, 209, 204],
           [207, 209, 204],
           [207, 209, 204]],
   
          [[218, 219, 211],
           [218, 219, 211],
           [218, 219, 211],
           ...,
           [207, 209, 204],
           [207, 209, 204],
           [207, 209, 204]],
   
          ...,
   
          [[179, 180, 170],
           [179, 180, 170],
           [179, 180, 170],
           ...,
           [144, 143, 136],
           [144, 143, 136],
           [144, 143, 136]],
   
          [[179, 180, 170],
           [179, 180, 170],
           [179, 180, 170],
           ...,
           [144, 143, 136],
           [144, 143, 136],
           [144, 143, 136]],
   
    

# test part

In [5]:
class Model1DDP(FaceInferenceModel):
    def __init__(self, model_path):
        super().__init__()
        checkpoint = torch.load(model_path, map_location=device)
        state_dict = checkpoint['model']
        if isinstance(state_dict, torch.nn.Module):
            self.model = state_dict
        else:
            params = checkpoint['params']
            from model1.model import create_model
            self.model, _ = create_model(params['use_hidden_layer'], params['dropout'], params['backbone'], params)
            self.model.load_state_dict(state_dict)
        self.model.cuda().eval()
        
    def _tta(self, pil_img):
        return [pil_img]
    
    def predict_batch(self, batch):
        with torch.no_grad():
            batch = batch.to(device)
            y_pred = torch.sigmoid(self.model(batch))
            y = y_pred.detach().cpu().squeeze().numpy()
            return y.reshape(-1)

In [6]:
import albumentations as aug
from numpy.random import choice

def predict_on_all(id2faces, models):
    predicts = {}
    for fn, faces in id2faces.items():
        if len(faces) == 0 or faces is None:
            predicts[fn]=0.5
            continue
        
        answers = []
        for model in models:
            try:
                answers.append(model.solve_faces(faces))
            except:
                answers.append(0.5)
        predicts[fn]=models[0]._ensemble(answers)
    return pd.DataFrame.from_dict(predicts, orient="index", columns=["label"])

In [8]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
models = [Model1DDP(("/kaggle/input/dfdc-model/" if kaggle else "/home/kailu/models/")+"6ca249e8.pth")]
sample_number = 16
new_length = 1
id2faces = get_bbox_sub(filenames, sample_number, new_length)
pred_df = predict_on_all(id2faces, models)    

100%|██████████| 800/800 [03:22<00:00,  3.91it/s]


In [97]:
guess = pred_df.copy()
for p1,p2 in pairs:
    if guess.at[p1,"label"] > guess.at[p2, "label"]:
        guess.loc[p1, "label"] = 1
        guess.loc[p2, "label"] = 0
    else:
        guess.loc[p1, "label"] = 0
        guess.loc[p2, "label"] = 1
if not kaggle:
        print((guess.sort_index().label == real_labels).mean())

0.9675


# Re-train

In [18]:
from numpy.random import choice
class Simple_DFDCDataset(Dataset):
    def __init__(self, metadata, id2faces, transform):
        self.metadata = metadata
        self.transform = transform
        self.id2faces = id2faces
        self.fns = list(metadata.index)
        self.smooth = 0
        
    def __len__(self):
        return len(self.fns)

    def __getitem__(self, idx: int):
        try:
            fn = self.fns[idx]
            frame = choice(len(self.id2faces[fn]))
            face_id = choice(len(self.id2faces[fn][frame]))
            face = Image.fromarray(self.id2faces[fn][frame][face_id].face)
            image = self.transform(face)
            label = self.metadata.at[fn, "label"]
            if self.smooth and np.random.rand()<self.smooth:
                label = 1 - label
            return image, torch.Tensor([int(np.round(label))])
        except Exception as e:
            print(e)
            return torch.zeros((1, 3, 224, 224)), torch.Tensor([0])
    
from torchvision import transforms
def get_transforms(image_size=224):
    pre_trained_mean, pre_trained_std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
    
    train_transforms = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=pre_trained_mean, std=pre_trained_std),
        transforms.RandomErasing(scale=(0.4, 0.5), ratio=(0.2, 5)),
    ])
    return train_transforms

In [19]:
class Simple_Trainer(object):
    '''This class takes care of training and validation of our model'''

    def __init__(self, train_dl, model: torch.nn.Module, optimizer, criterion, epoch):
        self.train_dl = train_dl
        print("train_dl", len(train_dl))
        self.model = model
        self.optimizer = optimizer
        self.num_epochs = epoch
        self.criterion = criterion
        
        self.current_epoch = 0
        self.best_metric = 1e9
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.phase = 'train'
        self.train()
        self.history_best = {}

    def train(self):
        self.phase = 'train'
        self.model.train()
        
    def forward(self, images, targets) -> (torch.Tensor, torch.Tensor):
        images = images.cuda()
        masks = targets.cuda()

        if self.phase == 'train':
            with torch.set_grad_enabled(True):
                outputs = self.model(images)
                loss = self.criterion(outputs, masks)
        else:
            with torch.no_grad():
                outputs = self.model(images)
                loss = self.criterion(outputs, masks)
        return loss, outputs

    def step(self):
        self.train()
        self.optimizer.zero_grad()
        all_loss = []
        for inputs, labels in self.train_dl:
            with torch.set_grad_enabled(True):
                loss, outputs = self.forward(inputs, labels)
                loss.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()
                all_loss.append(loss.data.cpu().item())
                
        print(f'Epoch {self.current_epoch}: train loss={np.mean(all_loss):.4f}')
        selection_metric = np.mean(all_loss)
        if selection_metric <= self.best_metric:
            print(f'>>> Saving best model metric={selection_metric:.4f} compared to previous best {self.best_metric:.4f}')
            self.best_metric = selection_metric
            checkpoint = {'model': self.model}
            os.makedirs('checkpoints', exist_ok=True)
            torch.save(checkpoint, 'checkpoints/best_model.pth')

    def start(self):
        self.current_epoch = 0
        for epoch in range(self.num_epochs):
            self.current_epoch = epoch
            self.step()

In [20]:
from model1.model import create_model, get_trainable_params
import torch.optim as optim
import torch.nn as nn
def re_train(metadata, id2faces, model, epoch = 5):
    ds = Simple_DFDCDataset(metadata, id2faces, get_transforms())
    print("ds", len(ds))
    loader = DataLoader(ds, batch_size=32, num_workers=1, shuffle=True, drop_last=False)
    print("loader", len(loader))
    criterion = nn.BCEWithLogitsLoss()
    model.cuda()
    params_to_update = get_trainable_params(model)
    optimizer = optim.Adam(params_to_update, lr = 1e-5, weight_decay= 1e-5)
    
    trainer = Simple_Trainer(loader, model, optimizer, criterion, epoch)
    trainer.start()
    return Model1DDP('checkpoints/best_model.pth')

In [21]:
history_df = []
pseudo_pd = pred_df.copy()
model = models[0].model
for _ in range(5):
    print("start round ", _)
    metadata = pseudo_pd.sort_values("label")
    metadata = metadata.drop(metadata[len(metadata)//4:-len(metadata)//4].index)
    metadata.loc[:len(metadata)//2, "label"] = 0
    metadata.loc[len(metadata)//2:, "label"] = 1
    metadata = metadata.sort_index()
    test_model = re_train(metadata, id2faces, model, epoch = 10)
    pred = predict_on_all(id2faces, [test_model]).sort_index()
    if not kaggle:
        print("new acc  ------->", ((pred["label"]>0.5)== real_labels).mean())
        print("new loss", log_loss(real_labels, pred["label"], labels=[0,1]))
        print("new confidence", 1/(pred["label"].mean()))
    pseudo_pd["label"] = pseudo_pd["label"]*0.8 + pred["label"]*0.2
    history_df.append(pred)
    model = test_model.model
# pred_df.loc[:len(pred_df)//2, "label"] = 0
# for _ in range(3):
    
#     re_train()

start round  0
ds 400
loader 13
number layers to learn: 418
train_dl 13
Epoch 0: train loss=0.4412
>>> Saving best model metric=0.4412 compared to previous best 1000000000.0000
Epoch 1: train loss=0.4318
>>> Saving best model metric=0.4318 compared to previous best 0.4412
Epoch 2: train loss=0.4549
Epoch 3: train loss=0.5391
Epoch 4: train loss=0.4103
>>> Saving best model metric=0.4103 compared to previous best 0.4318
Epoch 5: train loss=0.3578
>>> Saving best model metric=0.3578 compared to previous best 0.4103
Epoch 6: train loss=0.4732
Epoch 7: train loss=0.3887
Epoch 8: train loss=0.3877
Epoch 9: train loss=0.4435
new acc  -------> 0.7975
new loss 0.43313711036589525
new confidence 1.9272541770230465
start round  1
ds 400
loader 13
number layers to learn: 418
train_dl 13
Epoch 0: train loss=0.4354
>>> Saving best model metric=0.4354 compared to previous best 1000000000.0000
Epoch 1: train loss=0.3733
>>> Saving best model metric=0.3733 compared to previous best 0.4354
Epoch 2: tra