In [None]:
import os
import cv2
import torch
import numpy as np
from torchvision import models, transforms
from torch.utils.data import DataLoader, ConcatDataset, Subset
import torch.nn as nn

In [32]:
# Pre-trained ResNet model for feature extraction
def setup_resnet():
    resnet = models.resnet18(pretrained=True)
    in_features = resnet.fc.in_features  # Save in_features before replacing fc
    resnet.fc = torch.nn.Identity()  # Remove the classification layer
    resnet = resnet.to(device)
    resnet.eval()
    return resnet, in_features

def extract_video_features(video_path, frame_rate=5, frames_count=10):
    capture = cv2.VideoCapture(video_path)
    video_fps = capture.get(cv2.CAP_PROP_FPS)
    interval = int(video_fps / frame_rate) if video_fps > 0 else 1
    frame_features = []
    frame_index = 0

    while len(frame_features) < frames_count:
        ret, frame = capture.read()
        if not ret:
            break
        if frame_index % interval == 0:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = transform(frame).unsqueeze(0).to(device)
            with torch.no_grad():
                feature = resnet(frame).squeeze(0).cpu().numpy()
            frame_features.append(feature)
        frame_index += 1

    capture.release()

    while len(frame_features) < frames_count:
        frame_features.append(np.zeros(in_features))

    return np.stack(frame_features)

# Directory paths
truth_video_path = '/usr3/graduate/uyhuang/.cache/kagglehub/datasets/kambingbersayaphitam/truthorlie/versions/3/TruthOrLie/train/truth'
lie_video_path = '/usr3/graduate/uyhuang/.cache/kagglehub/datasets/kambingbersayaphitam/truthorlie/versions/3/TruthOrLie/train/lie'

# Setup device, resnet, and transforms
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
resnet, in_features = setup_resnet()
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Feature dataset for training
class FeatureDataset(torch.utils.data.Dataset):
    def __init__(self, video_path, label):
        self.video_path = video_path
        self.label = label
        self.video_files = [os.path.join(video_path, f) for f in os.listdir(video_path) if f.endswith('.mp4')]

    def __len__(self):
        return len(self.video_files)

    def __getitem__(self, idx):
        video_file = self.video_files[idx]
        features = extract_video_features(video_file)
        features = torch.tensor(features, dtype=torch.float32)
        return features, self.label

# Initialize datasets
truth_dataset = FeatureDataset(truth_video_path, label=1)
lie_dataset = FeatureDataset(lie_video_path, label=0)

# Split datasets
truth_train_split = int(0.7 * len(truth_dataset))
lie_train_split = int(0.7 * len(lie_dataset))
truth_training_data = Subset(truth_dataset, range(truth_train_split))
truth_validation_data = Subset(truth_dataset, range(truth_train_split, len(truth_dataset)))
lie_training_data = Subset(lie_dataset, range(lie_train_split))
lie_validation_data = Subset(lie_dataset, range(lie_train_split, len(lie_dataset)))

# Combine training and validation datasets
training_data = ConcatDataset([truth_training_data, lie_training_data])
validation_data = ConcatDataset([truth_validation_data, lie_validation_data])

# DataLoaders
batch_size = 16
train_loader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(validation_data, batch_size=batch_size, shuffle=False)

# Define the model
class VideoClassifier(nn.Module):
    def __init__(self):
        super(VideoClassifier, self).__init__()
        self.feature_extractor = nn.Identity()
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.4),
            nn.Linear(in_features, 2)  # Use in_features from ResNet
        )

    def forward(self, inputs):
        batch_size, num_frames, feature_size = inputs.shape
        features = inputs.view(batch_size, num_frames, -1).mean(dim=1)
        outputs = self.classifier(features)
        return outputs

# Training configuration
model = VideoClassifier().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=5e-3)
loss_fn = nn.CrossEntropyLoss()

# Training loop
num_epochs = 20
patience = 5
best_val_loss = float('inf')
epochs_no_improve = 0

for epoch in range(num_epochs):
    model.train()
    epoch_train_loss = 0.0
    for frames_batch, labels_batch in train_loader:
        frames_batch, labels_batch = frames_batch.to(device), labels_batch.to(device)
        optimizer.zero_grad()
        outputs = model(frames_batch)
        loss = loss_fn(outputs, labels_batch)
        loss.backward()
        optimizer.step()
        epoch_train_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Training Loss: {epoch_train_loss / len(train_loader):.4f}")

    # Validation step
    model.eval()
    val_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    with torch.no_grad():
        for frames_batch, labels_batch in validation_loader:
            frames_batch, labels_batch = frames_batch.to(device), labels_batch.to(device)
            outputs = model(frames_batch)
            loss = loss_fn(outputs, labels_batch)
            val_loss += loss.item()
            _, predicted_labels = torch.max(outputs, dim=1)
            correct_predictions += (predicted_labels == labels_batch).sum().item()
            total_samples += labels_batch.size(0)

    val_accuracy = correct_predictions / total_samples * 100
    val_loss_avg = val_loss / len(validation_loader)
    print(f"Validation Loss: {val_loss_avg:.4f}, Accuracy: {val_accuracy:.2f}%")

    # Early stopping
    if val_loss_avg < best_val_loss:
        best_val_loss = val_loss_avg
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1

    if epochs_no_improve >= patience:
        print("Early stopping")
        break


Epoch 1/20, Training Loss: 0.7402
Validation Loss: 0.6669, Accuracy: 54.25%
Epoch 2/20, Training Loss: 0.6855
Validation Loss: 0.6362, Accuracy: 61.13%
Epoch 3/20, Training Loss: 0.6503
Validation Loss: 0.6104, Accuracy: 65.18%
Epoch 4/20, Training Loss: 0.6260
Validation Loss: 0.5878, Accuracy: 68.42%
Epoch 5/20, Training Loss: 0.6153
Validation Loss: 0.5670, Accuracy: 69.23%
Epoch 6/20, Training Loss: 0.5999
Validation Loss: 0.5488, Accuracy: 72.87%
Epoch 7/20, Training Loss: 0.5819
Validation Loss: 0.5329, Accuracy: 75.71%
Epoch 8/20, Training Loss: 0.5603
Validation Loss: 0.5162, Accuracy: 77.33%
Epoch 9/20, Training Loss: 0.5410
Validation Loss: 0.5019, Accuracy: 79.35%
Epoch 10/20, Training Loss: 0.5266
Validation Loss: 0.4885, Accuracy: 80.57%
Epoch 11/20, Training Loss: 0.4934
Validation Loss: 0.4766, Accuracy: 80.97%
Epoch 12/20, Training Loss: 0.5213
Validation Loss: 0.4685, Accuracy: 80.16%
Epoch 13/20, Training Loss: 0.5054
Validation Loss: 0.4597, Accuracy: 82.19%
Epoch 14

In [39]:
# Evaluation on test data
test_truth_video_path ='/usr3/graduate/uyhuang/.cache/kagglehub/datasets/kambingbersayaphitam/truthorlie/versions/3/TruthOrLie/test/truth'
test_lie_video_path = '/usr3/graduate/uyhuang/.cache/kagglehub/datasets/kambingbersayaphitam/truthorlie/versions/3/TruthOrLie/test/lie'

# Initialize test datasets
test_truth_dataset = FeatureDataset(test_truth_video_path, label=1)
test_lie_dataset = FeatureDataset(test_lie_video_path, label=0)

# Combine test datasets
test_dataset = ConcatDataset([test_truth_dataset, test_lie_dataset])
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

def evaluate_model(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for frames_batch, labels_batch in dataloader:
            frames_batch, labels_batch = frames_batch.to(device), labels_batch.to(device)
            outputs = model(frames_batch)
            _, predicted = torch.max(outputs, 1)
            total += labels_batch.size(0)
            correct += (predicted == labels_batch).sum().item()
    print(f"Accuracy: {100 * correct / total:.2f}%")
    
evaluate_model(model, train_loader)
evaluate_model(model, validation_loader)
evaluate_model(model, test_loader)

Accuracy: 87.28%
Accuracy: 86.64%
Accuracy: 81.25%
