In [3]:
import os
import torch
from torchvision import transforms
from PIL import Image
from sklearn.model_selection import train_test_split

# Configuration
image_dir = '/Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame'  # Update this path
batch_size = 8

# Define the image transformations
im_size = 112
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

transform = transforms.Compose([
    transforms.Resize((im_size, im_size)),  # Resize all images to a fixed size
    transforms.ToTensor(),                  # Convert images to PyTorch tensors
    transforms.Normalize(mean, std)         # Normalize the tensor images
])

def load_image(filepath):
    with Image.open(filepath) as img:
        return transform(img)

def prepare_data(root_dir):
    data, labels = [], []
    categories = {'real': 1, 'fake': 0}

    for category in ['real', 'fake']:
        dir_path = os.path.join(root_dir, category)
        video_dict = {}

        # Collect files by the first four digits of their names
        for file in os.listdir(dir_path):
            if file.endswith('.jpg'):
                video_id = file[:4]  # The first four digits of the filename
                if video_id not in video_dict:
                    video_dict[video_id] = []
                video_dict[video_id].append(os.path.join(dir_path, file))

        # Sort files, load images, and store sequences
        for video_id, video_files in video_dict.items():
            try:
                sorted_files = sorted(
                    video_files,
                    key=lambda x: int(os.path.basename(x).split('_frame')[1].split('.jpg')[0])
                )
            except ValueError as e:
                print(f"Error parsing file name from: {x}")
                continue  # Skip this file or handle it according to your policy

            loaded_images = [load_image(fp) for fp in sorted_files]
            data.append(torch.stack(loaded_images))
            labels.append(categories[category])

            # Print example sequence details
            print(f"Sequence ID: {video_id}")
            print(f"Category: {category}")
            print(f"Number of Frames: {len(sorted_files)}")
            print(f"Sample Frames: {sorted_files[:5]}")  # Print first 5 frame filenames for checking

    # Convert labels to tensor
    Y = torch.tensor(labels)
    
    # Split into train and test sets
    return train_test_split(data, Y, test_size=0.2)

# Usage
X_train, X_test, y_train, y_test = prepare_data(image_dir)


Sequence ID: 4159
Category: real
Number of Frames: 6
Sample Frames: ['/Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame/real/4159.mp4_frame0.jpg', '/Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame/real/4159.mp4_frame60.jpg', '/Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame/real/4159.mp4_frame120.jpg', '/Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame/real/4159.mp4_frame180.jpg', '/Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame/real/4159.mp4_frame240.jpg']
Sequence ID: 4994
Category: real
Number of Frames: 5
Sample Frames: ['/Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame/real/4994.mp4_frame0.jpg', '/Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame/real/4994.mp4_frame60.jpg', '/Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame/real/4994.mp4_frame120.jpg', '/Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/pro

In [13]:
import csv
import os
import torch
from torchvision import transforms
from PIL import Image
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset

# Define image transformations
im_size = 112
transform = transforms.Compose([
    transforms.Resize((im_size, im_size)),  # Resize all images to a fixed size
    transforms.ToTensor(),                  # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def load_image(filepath):
    with Image.open(filepath) as img:
        return transform(img)

def prepare_data(root_dir, csv_path):
    sequences, labels, metadata = [], [], []
    categories = {'real': 1, 'fake': 0}

    # Open a CSV file to save the metadata
    with open(csv_path, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['sequence_id', 'category', 'frame_count', 'sample_frames'])

        for category in ['real', 'fake']:
            dir_path = os.path.join(root_dir, category)
            video_dict = {}

            # Collect files by the first four digits of their names
            for file in os.listdir(dir_path):
                if file.endswith('.jpg'):
                    video_id = file[:4]  # The first four digits of the filename
                    if video_id not in video_dict:
                        video_dict[video_id] = []
                    video_dict[video_id].append(os.path.join(dir_path, file))

            # Sort files, load images, and store sequences
            for video_id, video_files in video_dict.items():
                try:
                    sorted_files = sorted(
                        video_files,
                        key=lambda x: int(os.path.basename(x).split('_frame')[1].split('.jpg')[0])
                    )
                except ValueError as e:
                    print(f"Error parsing file name from: {x}")
                    continue  # Skip this file or handle it according to your policy

                loaded_images = [load_image(fp) for fp in sorted_files]
                sequences.append(torch.stack(loaded_images))
                labels.append(categories[category])
                metadata_entry = {
                    'sequence_id': video_id,
                    'category': category,
                    'frame_count': len(sorted_files),
                    'sample_frames': '; '.join(sorted_files[:5])
                }
                metadata.append(metadata_entry)

                # Write to CSV
                writer.writerow([video_id, category, len(sorted_files), '; '.join(sorted_files[:5])])

                # Optionally print the metadata for verification
                print(metadata_entry)

    return sequences, labels, metadata

# Usage: Specify the path where you want to save the CSV
sequences, labels, metadata = prepare_data('/Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame', 'metadata.csv')


{'sequence_id': '4159', 'category': 'real', 'frame_count': 6, 'sample_frames': '/Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame/real/4159.mp4_frame0.jpg; /Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame/real/4159.mp4_frame60.jpg; /Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame/real/4159.mp4_frame120.jpg; /Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame/real/4159.mp4_frame180.jpg; /Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame/real/4159.mp4_frame240.jpg'}
{'sequence_id': '4994', 'category': 'real', 'frame_count': 5, 'sample_frames': '/Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame/real/4994.mp4_frame0.jpg; /Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame/real/4994.mp4_frame60.jpg; /Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame/real/4994.mp4_frame120.jpg; /Users/dr.ake/Documents/GitHub/Deepfake-AI-SU

In [23]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import pandas as pd
from PIL import Image
import os
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torchvision.models import resnext50_32x4d, ResNeXt50_32X4D_Weights

import torch
import torch.nn as nn

class CNN_for_DeepFake(nn.Module):
    def __init__(self, dropout_rate=0.5, fc_units=1024):
        super(CNN_for_DeepFake, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.act1 = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout1 = nn.Dropout(dropout_rate)
        
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.act2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout_rate)

        # Adjust this based on your input image size post pooling
        self.fc1 = nn.Linear(in_features=64 * 56 * 56, out_features=fc_units)
        self.fc2 = nn.Linear(fc_units, 1)
        self.dropout3 = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.act1(x)
        x = self.pool(x)
        x = self.dropout1(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.act2(x)
        x = self.pool(x)
        x = self.dropout2(x)

        x = x.view(x.size(0), -1)  # Flatten the features for the fully connected layer
        x = self.fc1(x)
        x = self.dropout3(x)
        x = self.fc2(x)
        return x

class SequenceModel(nn.Module):
    def __init__(self, num_classes, dropout_rate=0.5, fc_units=1024):
        super(SequenceModel, self).__init__()
        self.cnn = CNN_for_DeepFake(dropout_rate, fc_units)
        self.lstm = nn.LSTM(fc_units, 512, num_layers=1, batch_first=True)
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x, lengths):
        batch_size, timesteps, C, H, W = x.size()
        c_in = x.view(batch_size * timesteps, C, H, W)
        c_out = self.cnn(c_in)
        
        r_in = c_out.view(batch_size, timesteps, -1)

        # Pack the sequence, process through LSTM, and then unpack
        packed_input = pack_padded_sequence(r_in, lengths, batch_first=True, enforce_sorted=False)
        packed_output, _ = self.lstm(packed_input)
        r_out, _ = pad_packed_sequence(packed_output, batch_first=True)
        r_out = r_out[:, -1, :]  # Get the last timestep outputs

        output = self.fc(r_out)
        return output


from torch.nn.utils.rnn import pad_sequence

def collate_fn(batch):
    sequences, labels, lengths = zip(*batch)
    # Convert list of sequences where each sequence is a list of tensors to a list of tensor sequences
    sequences = [torch.stack(seq) for seq in sequences]  # Stack each sequence to make 3D tensor
    sequences_padded = pad_sequence(sequences, batch_first=True, padding_value=0)
    labels = torch.tensor(labels)
    lengths = torch.tensor(lengths)
    return sequences_padded, labels, lengths



import numpy as np

class FrameSequenceDataset(Dataset):
    def __init__(self, csv_file, transform=None, indices=None):
        self.data = pd.read_csv(csv_file)
        if indices is not None:
            self.data = self.data.iloc[indices]
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data.iloc[index]
        frames = row['sample_frames'].split('; ')
        sequence = [self.load_frame(frame) for frame in frames]
        label = int(row['category'] == 'real')
        if self.transform:
            sequence = [self.transform(Image.open(frame).convert('RGB')) for frame in frames]
        return sequence, label, len(sequence)

    def load_frame(self, frame_path):
        img = Image.open(frame_path).convert('RGB')  # Ensure it's always RGB
        return img




# Initialize the Model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SequenceModel(num_classes=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device, checkpoint_path='best_model.pth'):
    best_val_loss = float('inf')  # Initialize the best validation loss to infinity

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        for inputs, labels, lengths in train_loader:
            inputs, labels, lengths = inputs.to(device), labels.to(device), lengths.to(device)
            optimizer.zero_grad()
            outputs = model(inputs, lengths)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_accuracy = 100 * correct / total

        # Validate after every epoch
        val_loss, val_accuracy = validate_model(model, val_loader, criterion, device)
        
        # Print training and validation results
        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss/len(train_loader):.4f}, Train Acc: {train_accuracy:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy*100:.2f}%')

        # Check if the current validation loss is the best we've seen so far
        if val_loss < best_val_loss:
            print(f'Validation loss decreased ({best_val_loss:.6f} --> {val_loss:.6f}). Saving model ...')
            best_val_loss = val_loss
            # Save model state dictionary
            torch.save(model.state_dict(), checkpoint_path)

def validate_model(model, loader, criterion, device):
    model.eval()
    val_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels, lengths in loader:
            inputs, labels, lengths = inputs.to(device), labels.to(device), lengths.to(device)
            outputs = model(inputs, lengths)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_accuracy = correct / total
    return val_loss / len(loader), val_accuracy

# Example usage:
data_indices = np.arange(len(pd.read_csv('metadata.csv')))
np.random.shuffle(data_indices)
split = int(0.8 * len(data_indices))  # 80% for training, 20% for validation

train_indices = data_indices[:split]
val_indices = data_indices[split:]


# Prepare Dataset and DataLoader
train_dataset = FrameSequenceDataset('metadata.csv', transform=transform, indices=train_indices)
val_dataset = FrameSequenceDataset('metadata.csv', transform=transform, indices=val_indices)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)

# Initialize the Model and Set Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SequenceModel(num_classes=2).to(device)

# Setup Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Path to save the best model
checkpoint_path = 'best_model.pth'

# Start Training
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, device=device, checkpoint_path=checkpoint_path)




KeyboardInterrupt: 

In [4]:
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import numpy as np
import torch

class VideoDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Each element in data is a sequence of tensors
        video_frames = self.data[idx]
        label = self.labels[idx]

        # Transform each frame
        if self.transform:
            video_frames = torch.stack([self.transform(frame) for frame in video_frames])

        return video_frames, label

# Assuming data is already loaded and split into X_train, X_test, y_train, y_test
train_dataset = VideoDataset(X_train, y_train)
valid_dataset = VideoDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=4, shuffle=False)


In [7]:
from torchvision.models import resnext50_32x4d, ResNeXt50_32X4D_Weights

class Model(nn.Module):
    def __init__(self, num_classes, latent_dim=2048, lstm_layers=1, hidden_dim=2048, bidirectional=False):
        super(Model, self).__init__()
        # Using the updated API to load pretrained weights
        self.resnext = resnext50_32x4d(weights=ResNeXt50_32X4D_Weights.IMAGENET1K_V1)
        self.cnn = nn.Sequential(*list(self.resnext.children())[:-2])  # Remove the last fully connected layer
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.lstm = nn.LSTM(latent_dim, hidden_dim, lstm_layers, batch_first=True, bidirectional=bidirectional)
        self.fc = nn.Linear(hidden_dim * (2 if bidirectional else 1), num_classes)

    def forward(self, x):
        batch_size, seq_len, c, h, w = x.size()
        x = x.view(batch_size * seq_len, c, h, w)  # Flatten the batch and sequence into a single batch dimension
        x = self.cnn(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)  # Flatten feature maps
        x = x.view(batch_size, seq_len, -1)  # Reshape into (batch_size, seq_len, feature_size)
        x, _ = self.lstm(x)
        x = x[:, -1, :]  # Get the output of the last sequence step
        x = self.fc(x)
        return x


In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from torchvision.models import resnext50_32x4d, ResNeXt50_32X4D_Weights
import os
from PIL import Image

# Configuration
image_dir = '/Users/dr.ake/Documents/GitHub/Deepfake-AI-SUTD/processed_dataset_frame'  # Path to your dataset
num_classes = 2
im_size = 112
batch_size = 4
num_epochs = 10
learning_rate = 0.001

# Define transformations
transform = transforms.Compose([
    transforms.Resize((im_size, im_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Define the dataset
class VideoDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        video_frames = self.data[idx]
        label = self.labels[idx]

        if self.transform:
            video_frames = torch.stack([self.transform(frame) for frame in video_frames])

        return video_frames, label

# Model definition
class Model(nn.Module):
    def __init__(self, num_classes, latent_dim=2048, lstm_layers=1, hidden_dim=2048, bidirectional=False):
        super(Model, self).__init__()
        self.resnext = resnext50_32x4d(weights=ResNeXt50_32X4D_Weights.IMAGENET1K_V1)
        self.cnn = nn.Sequential(*list(self.resnext.children())[:-2])
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.lstm = nn.LSTM(latent_dim, hidden_dim, lstm_layers, batch_first=True, bidirectional=bidirectional)
        self.fc = nn.Linear(hidden_dim * (2 if bidirectional else 1), num_classes)

    def forward(self, x):
        batch_size, seq_len, c, h, w = x.size()
        x = x.view(batch_size * seq_len, c, h, w)
        x = self.cnn(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = x.view(batch_size, seq_len, -1)
        x, _ = self.lstm(x)
        x = x[:, -1, :]
        x = self.fc(x)
        return x

# Initialize model, loss criterion, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Model(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training function
def train_model(model, train_loader, criterion, optimizer, num_epochs, device):
    model.train()
    for epoch in range(num_epochs):
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            if (i + 1) % 10 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}], Loss: {loss.item():.4f}')

# Define your data loader
# Assuming X_train and y_train are prepared lists of tensors and labels
# For demonstration, replace these with your actual data loading logic
train_dataset = VideoDataset(X_train, y_train, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Start training
train_model(model, train_loader, criterion, optimizer, num_epochs, device)


TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>

In [42]:
# #Model with feature visualization
from torch import nn
from torchvision import models
# class Model(nn.Module):
#     def __init__(self, num_classes,latent_dim= 2048, lstm_layers=1 , hidden_dim = 2048, bidirectional = False):
#         super(Model, self).__init__()

#         model = models.resnext50_32x4d(pretrained = True) #Residual Network CNN
#         self.model = nn.Sequential(*list(model.children())[:-2])
#         self.lstm = nn.LSTM(latent_dim,hidden_dim, lstm_layers,  bidirectional)
#         self.relu = nn.LeakyReLU()
#         self.dp = nn.Dropout(0.4)
#         self.linear1 = nn.Linear(2048,num_classes)
#         self.avgpool = nn.AdaptiveAvgPool2d(1)
#     def forward(self, x):
#         batch_size,seq_length, c, h, w = x.shape
#         x = x.view(batch_size * seq_length, c, h, w)
#         fmap = self.model(x)
#         x = self.avgpool(fmap)
#         x = x.view(batch_size,seq_length,2048)
#         x_lstm,_ = self.lstm(x,None)
#         return fmap,self.dp(self.linear1(torch.mean(x_lstm,dim = 1)))
class Model(nn.Module):
    def __init__(self, num_classes, latent_dim=2048, lstm_layers=1, hidden_dim=2048, bidirectional=False):
        super(Model, self).__init__()

        model = models.resnext50_32x4d(pretrained=True)
        self.model = nn.Sequential(*list(model.children())[:-2])
        self.lstm = nn.LSTM(latent_dim, hidden_dim, lstm_layers, bidirectional=bidirectional)
        self.relu = nn.LeakyReLU()
        self.dp = nn.Dropout(0.4)
        self.linear1 = nn.Linear(hidden_dim, num_classes)
        self.avgpool = nn.AdaptiveAvgPool2d(1)

    def forward(self, x):
        # x expected to be in shape [batch_size, channels, height, width]
        fmap = self.model(x)
        x = self.avgpool(fmap)
        x = x.view(x.size(0), -1)  # Flatten the features out
        x_lstm, _ = self.lstm(x.unsqueeze(1))  # Add a sequence dimension
        return fmap, self.dp(self.linear1(torch.mean(x_lstm, dim=1)))


In [8]:
# import torch
# import numpy as np
import torchvision.models as models
from torchvision.models import ResNeXt50_32X4D_Weights

model = models.resnext50_32x4d(weights=ResNeXt50_32X4D_Weights.IMAGENET1K_V1)



In [44]:
import torch
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=4, shuffle=False, num_workers=0)

print(torch.cuda.is_available())
for data, target in train_loader:
    print(data.shape, target.shape)
    break
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

False
torch.Size([4, 3, 112, 112]) torch.Size([4])
Using device: cpu


In [45]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [46]:
import torch

def train_model(model, train_loader, valid_loader, criterion, optimizer, num_epochs=10, device='cpu'):
    model.to(device)  # Ensure model is on the correct device
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0
        correct_predictions = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            # Forward pass
            try:
                feature_map, outputs = model(inputs)
            except Exception as e:
                print(f"Error during model forward pass: {e}")
                continue

            loss = criterion(outputs, labels)

            # Backward and optimize
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = correct_predictions / len(train_loader.dataset)

        # Validation
        valid_loss, valid_acc = validate_model(model, valid_loader, criterion, device)

        print(f'Epoch [{epoch+1}/{num_epochs}], '
              f'Train Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}, '
              f'Validation Loss: {valid_loss:.4f}, Validation Accuracy: {valid_acc:.4f}')

def validate_model(model, valid_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct_predictions = 0

    with torch.no_grad():
        for inputs, labels in valid_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            try:
                feature_map, outputs = model(inputs)
            except Exception as e:
                print(f"Error during model forward pass in validation: {e}")
                continue

            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()

    total_loss = running_loss / len(valid_loader.dataset)
    total_acc = correct_predictions / len(valid_loader.dataset)
    return total_loss, total_acc

# Ensure your device setting is correct
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Initialize your model, criterion, and optimizer as before
model = Model(num_classes=len(dataset.classes))
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Call the training function
num_epochs = 10
train_model(model, train_loader, valid_loader, criterion, optimizer, num_epochs, device)


Using device: cpu
Epoch [1/10], Train Loss: 0.6994, Accuracy: 0.5109, Validation Loss: 0.6925, Validation Accuracy: 0.5207
Epoch [2/10], Train Loss: 0.6945, Accuracy: 0.5173, Validation Loss: 0.6920, Validation Accuracy: 0.4793
Epoch [3/10], Train Loss: 0.6949, Accuracy: 0.5029, Validation Loss: 0.6936, Validation Accuracy: 0.4793


KeyboardInterrupt: 