In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
import numpy as np
import os
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import KFold

class SimplifiedPANN(nn.Module):
    def __init__(self):
        super(SimplifiedPANN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool = nn.AdaptiveAvgPool2d((4, 4))
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.dropout(torch.relu(self.bn1(self.conv1(x))))
        x = self.dropout(torch.relu(self.bn2(self.conv2(x))))
        x = self.pool(x)
        x = self.flatten(x)
        return x

class TwoLayerCNN(nn.Module):
    def __init__(self, input_channels):
        super(TwoLayerCNN, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 16, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(16)
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(16, 1)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.dropout(torch.relu(self.bn1(self.conv1(x))))
        x = self.dropout(torch.relu(self.bn2(self.conv2(x))))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

class CustomPANNwithCNN(nn.Module):
    def __init__(self):
        super(CustomPANNwithCNN, self).__init__()
        self.pann_extractor = SimplifiedPANN()
        self.cnn = TwoLayerCNN(input_channels=1024)  # 64 * 4 * 4 = 1024

    def forward(self, x):
        x = self.pann_extractor(x)
        x = x.view(x.size(0), 1024, 1, 1)
        return self.cnn(x)

class AugmentedDCASEDataset(Dataset):
    def __init__(self, data_path, is_training=True):
        self.data_path = data_path
        self.is_training = is_training
        self.files = []
        self.labels = []
        
        if not os.path.exists(data_path):
            raise FileNotFoundError(f"The directory {data_path} does not exist.")
        
        for root, _, files in os.walk(data_path):
            for file in files:
                if file.endswith('.npy'):
                    self.files.append(os.path.join(root, file))
                    self.labels.append(os.path.basename(root))
        
        if len(self.files) == 0:
            raise ValueError(f"No .npy files found in {data_path}. Please ensure that you have run the preprocessing step or you have valid files")
        
        print(f"Loaded files files from {data_path}")

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        if idx >= len(self.files):
            #check for out of range
            raise IndexError(f"index {idx} is out of range, Dataset contains {len(self.files)} files")
        
        try:
            file_path = self.files[idx]
            data = np.load(file_path)
            label = self.labels[idx]
            
            if self.is_training:
                data = self.augment(data)
            
            if len(data.shape) == 1:
                data = np.expand_dims(data, axis=1)
            elif len(data.shape) > 2:
                data = data.reshape(data.shape[0], -1)
            
            data = np.expand_dims(data, axis=0)
            
            return torch.FloatTensor(data), torch.FloatTensor([1.0])  
        except Exception as e:
            print(f"Error loading file {file_path}: {str(e)}")
            return None
            

    def augment(self, data):
        shift = np.random.randint(-5, 5)
        data = np.roll(data, shift, axis=1)
        #time shifting frequency masking and randrom noise
        if data.shape[0] > 1:
            mask_size = np.random.randint(1, min(5, data.shape[0] - 1))
            mask_freq = np.random.randint(0, data.shape[0] - mask_size)
            data[mask_freq:mask_freq+mask_size, :] = 0
        
        if data.shape[1] > 1:
            mask_size = np.random.randint(1, min(5, data.shape[1] - 1))
            mask_time = np.random.randint(0, data.shape[1] - mask_size)
            data[:, mask_time:mask_time+mask_size] = 0
        
        noise = np.random.normal(0, 0.01, data.shape)
        data = data + noise
        
        return data
def collate_fn(batch):
    batch = [item for item in batch if item is not None]
    if len(batch) == 0:
        return torch.Tensor(), torch.Tensor()
    
    inputs, labels = zip(*batch)
    
    max_channels = max(x.size(0) for x in inputs)
    max_height = max(x.size(1) for x in inputs)
    max_width = max(x.size(2) for x in inputs)
    
    padded_inputs = []
    for x in inputs:
        padded = torch.nn.functional.pad(x, (0, max_width - x.size(2), 0, max_height - x.size(1), 0, max_channels - x.size(0)))
        padded_inputs.append(padded)
    
    return torch.stack(padded_inputs), torch.stack(labels)

def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, device, num_epochs, patience=5):
    best_loss = float('inf')
    epochs_without_improvement = 0
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, targets in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training"):
            if inputs.numel() == 0:
                continue
            inputs, targets = inputs.to(device), targets.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        train_loss = running_loss / len(train_loader)
        print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {train_loss:.4f}")
        #epoch for validation
        val_loss, accuracy, precision, recall, f1 = evaluate(model, val_loader, criterion, device)
        print(f"Validation - Loss: {val_loss:.4f}, Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")
        
        scheduler.step(val_loss)
        
    #if the results do not improve then do early stop after two epochs
        if val_loss < best_loss:
            best_loss = val_loss
            epochs_without_improvement = 0
            torch.save(model.state_dict(), 'best_pann_model.pth')
            print("New best model saved.")
        else:
            epochs_without_improvement += 2
            if epochs_without_improvement >= patience:
                print(f"Early stopping triggered after {epoch+1} epochs")
                model.load_state_dict(torch.load('best_pann_model.pth'))
                break
    
    return model

def evaluate(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_targets = []
    
    with torch.no_grad():
        for inputs, targets in tqdm(val_loader, desc="Evaluating"):
            if inputs.numel() == 0:
                continue
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            running_loss += loss.item()
            
            preds = (outputs > 0.5).float()
            all_preds.extend(preds.cpu().numpy())
            all_targets.extend(targets.cpu().numpy())
    
    avg_loss = running_loss / len(val_loader)
    accuracy = accuracy_score(all_targets, all_preds)
    precision = precision_score(all_targets, all_preds, zero_division=0)
    recall = recall_score(all_targets, all_preds, zero_division=0)
    f1 = f1_score(all_targets, all_preds, zero_division=0)
    
    return avg_loss, accuracy, precision, recall, f1
def main():
    PROCESSED_TRAIN_PATH = "/Users/zaher/Desktop/Project/Processed__PANN_Training_Set"
    PROCESSED_VAL_PATH = "/Users/zaher/Desktop/Project/Processed__PANN_Validation_Set"

    print(f"Path for Training data: {PROCESSED_TRAIN_PATH}")
    print(f"Path for Validation data : {PROCESSED_VAL_PATH}")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    try:
        train_dataset = AugmentedDCASEDataset(PROCESSED_TRAIN_PATH, is_training=True)
        val_dataset = AugmentedDCASEDataset(PROCESSED_VAL_PATH, is_training=False)
    except FileNotFoundError as e:
        print(f"Error: {e}")
        print("Data doesn't exist please make sure that you have run the preprocessing step and check the path of the directories.")
        return
    except ValueError as e:
        print(f"Error: {e}")
        print("Data doesn't exist please make sure that you have run the preprocessing step and check the path of the directories.")
        return

    print(f"Number of training samples: {len(train_dataset)}")
    print(f"Number of validation samples: {len(val_dataset)}")

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

    model = CustomPANNwithCNN().to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)

    num_epochs = 10
    print("Starting training")
    try:
        model = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, device, num_epochs)
    except Exception as e:
        print(f"Error encountered during training: {str(e)}")

    print("Final evaluation")
    try:
        final_loss, accuracy, precision, recall, f1 = evaluate(model, val_loader, criterion, device)
        print(f"Loss: {final_loss:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print(f"F1 Score: {f1:.4f}")
#save the final model
        torch.save(model.state_dict(), 'pann_model_final.pth')
        print("Final model saved")
    except Exception as e:
        print(f"Error during final evaluation: {str(e)}")

    print("Training and evaluation steps completed sucessfully")

if __name__ == "__main__":
    main()

    #this one

Path for Training data: /Users/zaher/Desktop/Project/Processed__PANN_Training_Set
Path for Validation data : /Users/zaher/Desktop/Project/Processed__PANN_Validation_Set
Using device: cpu
Loaded files files from /Users/zaher/Desktop/Project/Processed__PANN_Training_Set
Loaded files files from /Users/zaher/Desktop/Project/Processed__PANN_Validation_Set
Number of training samples: 7573
Number of validation samples: 3406
Starting training


Epoch 1/10 - Training: 100%|██████████████████| 237/237 [13:31<00:00,  3.43s/it]


Epoch 1/10, Training Loss: 0.3441


Evaluating: 100%|█████████████████████████████| 107/107 [01:35<00:00,  1.12it/s]


Validation - Loss: 0.0999, Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1: 1.0000
New best model saved.


Epoch 2/10 - Training: 100%|██████████████████| 237/237 [13:14<00:00,  3.35s/it]


Epoch 2/10, Training Loss: 0.0880


Evaluating: 100%|█████████████████████████████| 107/107 [01:35<00:00,  1.12it/s]


Validation - Loss: 0.0344, Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1: 1.0000
New best model saved.


Epoch 3/10 - Training: 100%|██████████████████| 237/237 [13:21<00:00,  3.38s/it]


Epoch 3/10, Training Loss: 0.0393


Evaluating: 100%|█████████████████████████████| 107/107 [01:40<00:00,  1.06it/s]


Validation - Loss: 0.0155, Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1: 1.0000
New best model saved.


Epoch 4/10 - Training: 100%|██████████████████| 237/237 [13:35<00:00,  3.44s/it]


Epoch 4/10, Training Loss: 0.0231


Evaluating: 100%|█████████████████████████████| 107/107 [01:39<00:00,  1.07it/s]


Validation - Loss: 0.0064, Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1: 1.0000
New best model saved.


Epoch 5/10 - Training: 100%|██████████████████| 237/237 [14:11<00:00,  3.59s/it]


Epoch 5/10, Training Loss: 0.0156


Evaluating: 100%|█████████████████████████████| 107/107 [01:51<00:00,  1.04s/it]


Validation - Loss: 0.0032, Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1: 1.0000
New best model saved.


Epoch 6/10 - Training: 100%|██████████████████| 237/237 [13:22<00:00,  3.39s/it]


Epoch 6/10, Training Loss: 0.0109


Evaluating: 100%|█████████████████████████████| 107/107 [01:43<00:00,  1.03it/s]


Validation - Loss: 0.0024, Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1: 1.0000
New best model saved.


Epoch 7/10 - Training: 100%|██████████████████| 237/237 [13:59<00:00,  3.54s/it]


Epoch 7/10, Training Loss: 0.0087


Evaluating: 100%|█████████████████████████████| 107/107 [01:40<00:00,  1.07it/s]


Validation - Loss: 0.0016, Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1: 1.0000
New best model saved.


Epoch 8/10 - Training: 100%|██████████████████| 237/237 [13:49<00:00,  3.50s/it]


Epoch 8/10, Training Loss: 0.0072


Evaluating: 100%|█████████████████████████████| 107/107 [01:50<00:00,  1.03s/it]


Validation - Loss: 0.0011, Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1: 1.0000
New best model saved.


Epoch 9/10 - Training: 100%|██████████████████| 237/237 [13:56<00:00,  3.53s/it]


Epoch 9/10, Training Loss: 0.0052


Evaluating: 100%|█████████████████████████████| 107/107 [01:41<00:00,  1.05it/s]


Validation - Loss: 0.0008, Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1: 1.0000
New best model saved.


Epoch 10/10 - Training: 100%|█████████████████| 237/237 [15:26<00:00,  3.91s/it]


Epoch 10/10, Training Loss: 0.0044


Evaluating: 100%|█████████████████████████████| 107/107 [01:45<00:00,  1.02it/s]


Validation - Loss: 0.0005, Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1: 1.0000
New best model saved.
Final evaluation


Evaluating: 100%|█████████████████████████████| 107/107 [01:37<00:00,  1.09it/s]

Loss: 0.0005
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000
Final model saved
Training and evaluation steps completed sucessfully





In [5]:
import torch
import torch.nn as nn
import numpy as np
import os
import csv
import gc
import traceback
import psutil
from tqdm import tqdm
from sklearn.metrics import f1_score, precision_score, recall_score
from collections import defaultdict
import random

class SimplifiedPANN(nn.Module):
    def __init__(self):
        super(SimplifiedPANN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool = nn.AdaptiveAvgPool2d((4, 4))
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.dropout(torch.relu(self.bn1(self.conv1(x))))
        x = self.dropout(torch.relu(self.bn2(self.conv2(x))))
        x = self.pool(x)
        x = self.flatten(x)
        return x

class TwoLayerCNN(nn.Module):
    def __init__(self, input_channels):
        super(TwoLayerCNN, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 16, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(16)
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(16, 1)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.dropout(torch.relu(self.bn1(self.conv1(x))))
        x = self.dropout(torch.relu(self.bn2(self.conv2(x))))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

class CustomPANNwithCNN(nn.Module):
    def __init__(self):
        super(CustomPANNwithCNN, self).__init__()
        self.pann_extractor = SimplifiedPANN()
        self.cnn = TwoLayerCNN(input_channels=1024)

    def forward(self, x):
        x = self.pann_extractor(x)
        x = x.view(x.size(0), 1024, 1, 1)
        return self.cnn(x)

def print_memory_usage():
    process = psutil.Process(os.getpid())
    #to check ow much ram we used to tackle the errors
    print(f"Memory usage data: {process.memory_info().rss / 1024 / 1024:.2f} MB")

def load_and_preprocess_file(file_path):
    try:
        data = np.load(file_path)
        #ensuring that the shape of our data is 2d
        if len(data.shape) == 1:
            data = np.expand_dims(data, axis=0)
        elif len(data.shape) > 2:
            data = data.reshape(data.shape[0], -1)
        
        # add dimension if more
        data = np.expand_dims(data, axis=0)
        
        return data
    except Exception as e:
        print(f"Error loading file {file_path}: {str(e)}")
        return None

def augment_data(data):
    # for time shifting
    shift = np.random.randint(-5, 5)
    data = np.roll(data, shift, axis=2)
    
    # for frequency masking
    if data.shape[1] > 1:
        mask_size = np.random.randint(1, min(5, data.shape[1] - 1))
        mask_freq = np.random.randint(0, data.shape[1] - mask_size)
        data[:, mask_freq:mask_freq+mask_size, :] = 0
    
    # for time masking
    if data.shape[2] > 1:
        mask_size = np.random.randint(1, min(5, data.shape[2] - 1))
        mask_time = np.random.randint(0, data.shape[2] - mask_size)
        data[:, :, mask_time:mask_time+mask_size] = 0
    
    # adding a random noise
    noise = np.random.normal(0, 0.01, data.shape)
    data = data + noise
    
    return data

def create_support_and_query_sets(data_path, n_support=5):
    class_files = defaultdict(list)
    for root, _, files in os.walk(data_path):
        for file in files:
            if file.endswith('.npy'):
                class_name = os.path.basename(root)
                class_files[class_name].append(os.path.join(root, file))
    support_set = {}
    query_set = {}
    
    for class_name, files in class_files.items():
        if len(files) >= n_support:
            support_files = random.sample(files, n_support)
            query_files = [f for f in files if f not in support_files]
            
            support_set[class_name] = support_files
            query_set[class_name] = query_files
    
    return support_set, query_set

def evaluate_model(model, data_path, device, output_file, n_support=5):
    model.eval()
    support_set, query_set = create_support_and_query_sets(data_path, n_support)
    
    all_predictions = []
    all_labels = []
    all_file_names = []
    
    try:
        with torch.no_grad():
            for class_name, support_files in tqdm(support_set.items(), desc="Evaluating classes"):
#create support and query sets
                support_features = []
                for file in support_files:
                    data = load_and_preprocess_file(file)
                    if data is not None:
                        data = augment_data(data)
                        data = torch.FloatTensor(data).unsqueeze(0).to(device)  
                        features = model.pann_extractor(data)
                        support_features.append(features)
                
                support_prototype = torch.mean(torch.stack(support_features), dim=0)
                
#for the query set
                for file in query_set[class_name]:
                    data = load_and_preprocess_file(file)
                    if data is not None:
                        data = torch.FloatTensor(data).unsqueeze(0).to(device)  # Add batch dimension
                        query_features = model.pann_extractor(data)
                        
                        distance = torch.norm(query_features - support_prototype)
                        prediction = torch.sigmoid(-distance).item()  # Convert distance to probability
                        
                        all_predictions.append(prediction)
                        all_labels.append(1)  
                        all_file_names.append(os.path.basename(file))
                
                del support_features, support_prototype, query_features
                torch.cuda.empty_cache()
                gc.collect()
                
        
     
        
        threshold = np.median(all_predictions)
        
   #applying threshpld to get binary classification
        binary_predictions = [1 if pred > threshold else 0 for pred in all_predictions]
        #calculating our metrics
        f1 = f1_score(all_labels, binary_predictions, zero_division=0)
        precision = precision_score(all_labels, binary_predictions, zero_division=0)
        recall = recall_score(all_labels, binary_predictions, zero_division=0)
        
        print(f"F1 Score: {f1:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        
#save results to csv
        with open(output_file, 'w', newline='') as csvfile:
            fieldnames = ['file_name', 'raw_output', 'binary_prediction']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            
            writer.writeheader()
            for file_name, raw_output, binary_pred in zip(all_file_names, all_predictions, binary_predictions):
                writer.writerow({
                    'file_name': file_name, 
                    'raw_output': raw_output,
                    'binary_prediction': binary_pred
                })
        
        print(f"predictions saved to {output_file}")
    except Exception as e:
        print(f"Error encountered during evaluation: {str(e)}")
        print(traceback.format_exc())

def main():
    PROCESSED_EVAL_PATH = "/Users/zaher/Desktop/Project/Processed__PANN_Evaluation_Set"
    MODEL_PATH = "best_pann_model.pth"  
    OUTPUT_FILE = "pann_predictions.csv"

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    if not os.path.exists(PROCESSED_EVAL_PATH):
        print(f"Error:  directory {PROCESSED_EVAL_PATH} doesn't  exist.")
        return

    model = CustomPANNwithCNN().to(device)
    model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
    
    print("Starting evaluation process")
    print_memory_usage()
    evaluate_model(model, PROCESSED_EVAL_PATH, device, OUTPUT_FILE, n_support=5)
    print("Evaluation completed")
    print_memory_usage()

if __name__ == "__main__":
    main()

Using device: cpu
Starting evaluation process
Memory usage data: 1321.28 MB


  model.load_state_dict(torch.load(MODEL_PATH, map_location=device))


F1 Score: 0.6593
Precision: 1.0000
Recall: 0.4918
predictions saved to pann_predictions.csv
Evaluation completed
Memory usage data: 1288.08 MB
