In [4]:
import easydict
import os
import sys
from PIL import Image
import tqdm
import shutil

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision.transforms as transforms
import torch.utils.data as data
from torchvision import transforms
import cv2
from glob import glob
import pandas as pd

from PIL import Image
from tqdm import tqdm
import dlib
from sklearn.model_selection import train_test_split

ModuleNotFoundError: No module named 'dlib'

In [None]:
TRAIN_PATH = '/mnt/elice/dataset/train'
TEST_PATH = '/mnt/elice/dataset/test'

train_files = sorted(glob(TRAIN_PATH+'/*/*'))
labels = [label.split('/')[-2] for label in train_files]
train_df = pd.DataFrame({'path':train_files, 'label':labels})

## 1) 전처리

In [None]:
# 입력 데이터 경로 및 출력 경로 지정
RAW_PATH = "/mnt/elice/dataset"
PREPROCESSED_PATH = "/home/elicer/data"

# train 데이터에 대해 전처리 수행

def preprocess_worker(path):
    try:
        global face_classifier
        if "face_classifier" not in globals():
            face_classifier = cv2.CascadeClassifier(
                cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
            )
            
        cap = cv2.VideoCapture(path[0])
        output = cv2.VideoWriter(path[1], cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), cap.get(cv2.CAP_PROP_FPS), (224, 224))
        
        _, frame = cap.read()
        frame = cv2.resize(frame, None, fx=1, fy=10/16)
        face_region = face_classifier.detectMultiScale(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
        area = [w * h for _, _, w, h in face_region]
        
        while len(area) == 0:
            _, frame = cap.read()
            frame = cv2.resize(frame, None, fx=1, fy=10/16)
            face_region = face_classifier.detectMultiScale(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
            area = [w * h for _, _, w, h in face_region]
        
        face_region = face_region[area.index(max(area))]

        padding_ratio = 1.2
        x, y, w, h = face_region
        center_x, center_y = x + w // 2, y + h // 2
        w, h = w * padding_ratio, h * padding_ratio
        x, y = max(center_x - w // 2, 0) , max(center_y - h // 2, 0)

        # 제한할 프레임 수 설정
        frame_limit = 600
        frame_count = 0

        while (cap.isOpened()):
            ret, frame = cap.read()
            if not ret or frame_count >= frame_limit:
                break
            
            frame = cv2.resize(frame, None, fx=1, fy=10/16)
            frame = frame[int(y):int(y + h), int(x):int(x + w)]

            frame = cv2.resize(frame, (224, 224))
            
            output.write(frame)
            frame_count += 1

        cap.release()
        output.release()
    except Exception as e:
        print(f"Error occurred: {e}. Skipping video: {path[0]}")
        pass  # 오류가 발생해도 계속해서 진행하기 위해 pass 사용

def preprocess(data_type):
    if data_type == "train":
        candidate = ["fake", "real"]
    else:
        candidate = "."
    
    for label in candidate:
        raw_path = os.path.join(RAW_PATH, data_type, label)
        preprocessed_path = os.path.join(PREPROCESSED_PATH, data_type, label)

        if not os.path.exists(preprocessed_path):
            os.makedirs(preprocessed_path)

        raw_paths = os.listdir(raw_path)
        raw_paths = [os.path.join(raw_path, path) for path in raw_paths]
        preprocess_paths = [os.path.join(preprocessed_path, os.path.basename(path)) for path in raw_paths]

        with mp.Pool(mp.cpu_count() - 4) as pool:
            list(tqdm(pool.imap(preprocess_worker, zip(raw_paths, preprocess_paths)), total=len(raw_paths)))

# train 데이터에 대해 전처리 수행
preprocess("train")

In [None]:
TRAIN_PATH = '/home/elicer/data/train'

train_crop_files = sorted(glob(TRAIN_PATH+'/*/*'))
labels_crop = [label.split('/')[-2] for label in train_crop_files]
train_crop_df = pd.DataFrame({'path':train_crop_files, 'label':labels_crop})
train_crop_df

In [None]:
train_data, val_data = train_test_split(train_crop_df, test_size=0.2, random_state=42)

In [None]:
import cv2
import os
from tqdm import tqdm

# train_data에는 이미 비디오 경로와 레이블이 있는 데이터프레임이 있다고 가정합니다.
# train_data['path']는 비디오 파일 경로를 담은 열이고, train_data['label']은 레이블 열입니다.
# 예를 들어, fake인 경우 label은 0, real인 경우 label은 1로 표시되어 있다고 가정합니다.

output_folder = '/home/elicer/img_data'  # 이미지를 저장할 폴더 경로

for index, row in tqdm(train_data.iterrows(), total=len(train_data), desc="Processing Videos"):
    video_path = row['path']
    video_file = os.path.basename(video_path)
    label = row['label']  # 레이블 정보 가져오기
    
    if video_file.endswith('.mp4'):
        label_folder = os.path.join(output_folder, 'fake' if label == 'fake' else 'real')
        os.makedirs(label_folder, exist_ok=True)  # 레이블 별 폴더 생성

        # 비디오를 읽기 위한 OpenCV VideoCapture 객체 초기화
        cap = cv2.VideoCapture(video_path)

        # 비디오 프레임 읽기
        ret, frame = cap.read()

        # 프레임 수 및 이미지 수
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        images_per_video = total_frames // 10

        for i in range(images_per_video):
            for _ in range(10):
                ret, frame = cap.read()

            if ret:
                # 이미지 저장
                image_path = os.path.join(label_folder, f"{video_file.split('.')[0]}_{i}.jpg")
                cv2.imwrite(image_path, frame)

        cap.release()  # VideoCapture 객체 해제

In [None]:
TRAIN_PATH = '/home/elicer/img_data'

img_file = sorted(glob(TRAIN_PATH+'/*/*'))
labels_img = [label.split('/')[-2] for label in img_file]
img_df = pd.DataFrame({'path':img_file, 'label':labels_img})
img_df

In [None]:
# 'label' 열의 값을 'fake'는 0으로, 'real'은 1로 변환
mapping = {'fake': 0, 'real': 1}
img_df['label'] = img_df['label'].replace(mapping)
img_df

## 2) 모델 생성

In [None]:
args = easydict.EasyDict({
    "num_workers": 32,

    "learning_rate": 0.001,
    "num_epochs": 1,
    "batch_size": 32,

    "save_fn": "deepfake_c0_xception_tuned.pth.tar",
})

In [None]:
"""
Author: Andreas Rössler,
Implemented in https://github.com/ondyari/FaceForensics under MIT license
"""


class SeparableConv2d(nn.Module):
    def __init__(self,in_channels,out_channels,kernel_size=1,stride=1,padding=0,dilation=1,bias=False):
        super(SeparableConv2d,self).__init__()

        self.conv1 = nn.Conv2d(in_channels,in_channels,kernel_size,stride,padding,dilation,groups=in_channels,bias=bias)
        self.pointwise = nn.Conv2d(in_channels,out_channels,1,1,0,1,1,bias=bias)

    def forward(self,x):
        x = self.conv1(x)
        x = self.pointwise(x)
        return x


class Block(nn.Module):
    def __init__(self,in_filters,out_filters,reps,strides=1,start_with_relu=True,grow_first=True):
        super(Block, self).__init__()

        if out_filters != in_filters or strides!=1:
            self.skip = nn.Conv2d(in_filters,out_filters,1,stride=strides, bias=False)
            self.skipbn = nn.BatchNorm2d(out_filters)
        else:
            self.skip=None

        self.relu = nn.ReLU(inplace=True)
        rep=[]

        filters=in_filters
        if grow_first:
            rep.append(self.relu)
            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(out_filters))
            filters = out_filters

        for i in range(reps-1):
            rep.append(self.relu)
            rep.append(SeparableConv2d(filters,filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(filters))

        if not grow_first:
            rep.append(self.relu)
            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
            rep.append(nn.BatchNorm2d(out_filters))

        if not start_with_relu:
            rep = rep[1:]
        else:
            rep[0] = nn.ReLU(inplace=False)

        if strides != 1:
            rep.append(nn.MaxPool2d(3,strides,1))
        self.rep = nn.Sequential(*rep)

    def forward(self,inp):
        x = self.rep(inp)

        if self.skip is not None:
            skip = self.skip(inp)
            skip = self.skipbn(skip)
        else:
            skip = inp

        x+=skip
        return x


class Xception(nn.Module):
    def __init__(self, num_classes=1000):
        super(Xception, self).__init__()
        self.num_classes = num_classes

        self.conv1 = nn.Conv2d(3,32,3,2,0,bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(32,64,3,bias=False)
        self.bn2 = nn.BatchNorm2d(64)

        self.block1=Block(64,128,2,2,start_with_relu=False,grow_first=True)
        self.block2=Block(128,256,2,2,start_with_relu=True,grow_first=True)
        self.block3=Block(256,728,2,2,start_with_relu=True,grow_first=True)

        self.block4=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block5=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block6=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block7=Block(728,728,3,1,start_with_relu=True,grow_first=True)

        self.block8=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block9=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block10=Block(728,728,3,1,start_with_relu=True,grow_first=True)
        self.block11=Block(728,728,3,1,start_with_relu=True,grow_first=True)

        self.block12=Block(728,1024,2,2,start_with_relu=True,grow_first=False)

        self.conv3 = SeparableConv2d(1024,1536,3,1,1)
        self.bn3 = nn.BatchNorm2d(1536)

        self.conv4 = SeparableConv2d(1536,2048,3,1,1)
        self.bn4 = nn.BatchNorm2d(2048)

        self.fc = nn.Linear(2048, num_classes)

    def features(self, input):
        x = self.conv1(input)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)

        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        x = self.block6(x)
        x = self.block7(x)
        x = self.block8(x)
        x = self.block9(x)
        x = self.block10(x)
        x = self.block11(x)
        x = self.block12(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)

        x = self.conv4(x)
        x = self.bn4(x)
        return x

    def logits(self, features):
        x = self.relu(features)

        x = F.adaptive_avg_pool2d(x, (1, 1)) 
        x = x.view(x.size(0), -1)
        x = self.last_linear(x)
        return x

    def forward(self, input):
        x = self.features(input)
        x = self.logits(x)
        return x


## 기존 Xception에 Dropout만 추가
class xception(nn.Module):
    def __init__(self, num_out_classes=2, dropout=0.5):
        super(xception, self).__init__()

        self.model = Xception(num_classes=num_out_classes)
        self.model.last_linear = self.model.fc
        del self.model.fc

        num_ftrs = self.model.last_linear.in_features
        if not dropout:
            self.model.last_linear = nn.Linear(num_ftrs, num_out_classes)
        else:            
            self.model.last_linear = nn.Sequential(
                nn.Dropout(p=dropout),
                nn.Linear(num_ftrs, num_out_classes)
            )

    def forward(self, x):
        x = self.model(x)
        return x

In [None]:
xception_default = {
    'train': transforms.Compose([transforms.ToTensor(),
                                 transforms.Resize((224, 224)),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.Normalize([0.5]*3, [0.5]*3),
                                 ]),
    'valid': transforms.Compose([transforms.ToTensor(),
                                 transforms.Resize((224, 224)),
                                 transforms.Normalize([0.5]*3, [0.5]*3),
                                 ]),
    'test': transforms.Compose([transforms.ToTensor(),
                                transforms.Resize((224, 224)),
                                transforms.Normalize([0.5] * 3, [0.5] * 3),
                                ]),
}

In [None]:
# util

def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')

def adjust_learning_rate(optimizer, epoch, args):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = args.lr * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [None]:
class DFDCDatatset(data.Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def _load_image(self, image_path):
        return Image.open(image_path).convert('RGB')

    def __getitem__(self, index):
        label = int(self.dataframe.iloc[index]['label'])
        image = self._load_image(img_df.iloc[index]['path'])

        if self.transform:
            image = self.transform(image)

        return image, label

    def __len__(self):
        return len(self.dataframe)

In [None]:
# train / validate

def train(train_loader, model, criterion, optimizer, epoch):   
    n = 0
    running_loss = 0.0
    running_corrects = 0

    model.train()

    with tqdm(train_loader, total=len(train_loader), desc="Train", file=sys.stdout) as iterator:
        for images, target in iterator:
            images = images.cuda()
            target = target.cuda()
        
            outputs = model(images)
            _, pred = torch.max(outputs.data, 1)

            loss = criterion(outputs, target)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            n += images.size(0)
            running_loss += loss.item() * images.size(0)
            running_corrects += torch.sum(pred == target.data)

            epoch_loss = running_loss / float(n)
            epoch_acc = running_corrects / float(n)

            log = 'loss - {:.4f}, acc - {:.3f}'.format(epoch_loss, epoch_acc)
            iterator.set_postfix_str(log)

    scheduler.step()

In [None]:
model = xception(num_out_classes=2, dropout=0.5).cuda()
print("=> creating model '{}'".format('xception'))
# model = model()

fn = 'deepfake_c0_xception.pkl'
assert os.path.isfile(fn), 'wrong path'

model.load_state_dict(torch.load(fn))
print("=> model weight '{}' is loaded".format(fn))

criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08)
scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

In [None]:
train_dataset = DFDCDatatset(img_df, transform=xception_default["train"])

In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=args.batch_size,
                                           shuffle=True,
                                           num_workers=args.num_workers,
                                           pin_memory=True,
                                           )

In [None]:
## overfit을 막기 위하여 한 번만 학습
print('-' * 50)
print('Epoch {}/{}'.format(1, args.num_epochs))
train(train_loader, model, criterion, optimizer, 0)
acc = validate(valid_loader, model, criterion)

save_checkpoint(state={'epoch': args.num_epochs + 1,
                       'state_dict': model.state_dict(),
                       'best_acc1': acc,
                       'optimizer': optimizer.state_dict(),},
                is_best=False,
                filename=args.save_fn,
                )