In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import os
import h5py
import numpy as np
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch.optim.lr_scheduler import ReduceLROnPlateau
import csv

#paths to our training/validation 
train_path = "/Users/zaher/Desktop/Project/Training_Set"
val_path = "/Users/zaher/Desktop/Project/Validation_Set"

class AudioResNet(nn.Module):
    def __init__(self, num_classes=1):
        super(AudioResNet, self).__init__()
        #set number of input channels to 128
        self.in_channels = 128
        #first conv network
        self.conv1 = nn.Conv2d(1, 128, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(128)
        self.layer1 = self.make_layer(128, 2, stride=1)
        self.layer2 = self.make_layer(256, 2, stride=2)
        self.layer3 = self.make_layer(512, 2, stride=2)
        self.layer4 = self.make_layer(1024, 2, stride=2)
        #reduce the pooling sieze on the feature map to 1 on 1
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(1024, num_classes)
        self.sigmoid = nn.Sigmoid()
#create a ResNet Layer with multiple Blocks
    def make_layer(self, out_channels, blocks, stride):
        layers = []
        layers.append(self.make_resNET_block(out_channels, stride))
        for _ in range(1, blocks):
            layers.append(self.make_resNET_block(out_channels, stride=1))
        return nn.Sequential(*layers)

    def make_resNET_block(self, out_channels, stride):
        downsample = None
        #check if we need to do downsample by checking if output channels
        if stride != 1 or self.in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels),
            )
            #create a ResBlock update number of input channels
        block = ResBlock(self.in_channels, out_channels, stride, downsample)
        self.in_channels = out_channels
        return block
        #in this function we pass it to the next layers

    def forward(self, x):
        #pass the input between our convo layers we have 4
        x = torch.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        #apply pooling
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
    #then pass it through Fully conected lauer amd apply sigmoid activation    
        x = self.sigmoid(x)
        #then remove any extra dimensions by squeezing it
        return x.squeeze()

    def extract_features(self, x):
        x = torch.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avg_pool(x)
        #flatten our outpot to get feature vector
        return x.view(x.size(0), -1)

class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResBlock, self).__init__()
        #pass it to the first convo layer
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        #batch normalisation 
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        out = torch.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        if self.downsample is not None:
            identity = self.downsample(x)
        out += identity
        return torch.relu(out)

def load_Training_data(file_path):
    all_segments = []
    #to store all features and labels that we extracted in preproccessing
    all_labels = []
    with h5py.File(file_path, 'r') as hf:
        #iterate over each file group in the HDF5 file
        for file_group in hf.values():
            if isinstance(file_group, h5py.Group):
                #load the segments from the current group and append them to thet array
                segments = file_group['segments'][:]
                labels = file_group['labels'][:]
                all_segments.append(segments)
                all_labels.append(labels)
    #concatenate them into single numpy array
    x = np.concatenate(all_segments, axis=0)
    y = np.concatenate(all_labels, axis=0)
    
    print(f"Proccessed data shape: x: {x.shape}, y: {y.shape}.")
    print(f"x dtype: {x.dtype}, y dtype: {y.dtype}")
    print(f"x range: ({x.min()}, {x.max()}), y range: ({y.min()}, {y.max()})")
    #convert numpy into pytorch
    return torch.FloatTensor(x), torch.FloatTensor(y)

def load_validation_data(val_path):
    val_data = []
    print(f"Loading the validation data from: {val_path}")
    
    h5_file = os.path.join(val_path, 'val_all.h5')
    #check if file exists
    if os.path.exists(h5_file):
        #open and iterate over the h5 file
        with h5py.File(h5_file, 'r') as hf:
            for key in hf.keys():
                file_data = {'file': hf[key]['file'][()],
                    'segments': torch.FloatTensor(hf[key]['segments'][:]),
                    'labels': np.array(hf[key]['labels'][:]),
                    'start_times': np.array(hf[key]['start_times'][:]),
                    'end_times': np.array(hf[key]['end_times'][:])}
                #APPEND EACH CREATED DICTIONARY TO VAL_DATA
                val_data.append(file_data)
        
        print(f"Loaded data from {h5_file}")
        print(f"Number of validation files: {len(val_data)}")
    else:
        print(f"Couldn't find : {h5_file} file")
    
    return val_data

def train(model, train_loader, criterion, optimizer, scheduler, device, num_epochs=3, patience=5):
    model.train()
    # set the model to training mode
    #generate a maximum value for best loss we willl decrease it later
    best_loss = float('inf')
    #patience counter helps us to stop early incase our model's performance doesn't improve
    patience_counter = 0
    for epoch in range(num_epochs):
#calculate and save the loss in each epoch loop by number of epochs
        running_loss = 0.0
        #loop over batches from training loader
        for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            #move to GPU to improve performance and to avoid KERNEL failing
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            
            outputs = model(inputs)
            loss = criterion(outputs, labels.float())
            #compute loss and update model parameters
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        epoch_loss = running_loss / len(train_loader)
        print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")
        
        scheduler.step(epoch_loss)
        
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            patience_counter = 0
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping")
                break
#after training the model we validate it using FEW SHOT LEARNING
def validate_few_shot(model, val_data, device, thresholds=np.linspace(0, 1, 100), support_size=5, batch_size=16):
    model.eval()
    all_results = []
    #lists to store results and predictions
    all_predictions = []

    for file_data in val_data:
        #loop over validation file
        #extract segments,labels start and end time
        segments = file_data['segments']
        labels = file_data['labels']
        start_times = file_data['start_times']
        end_times = file_data['end_times']
        #find indices of all positive samples
        pos_indices = np.where(labels == 1)[0]
        #we need at least 5 samples for few shot learning
        if len(pos_indices) < support_size:
            print(f"Not enough samples Skipping file: {file_data['file']}.")
            continue
        #we select the first 5 positive samples as support set
        support_indices = pos_indices[:support_size]
        #for the rest of the samples we use them as query
        query_indices = np.arange(len(segments))
        query_indices = query_indices[query_indices > max(support_indices)]
#if we don't have any query samples left for the file then skip it 
        if len(query_indices) == 0:
            print(f" No query samples left after the first 5 Positive in {file_data['file']} Skipping this file.")
            continue
#extract support and query set
        support_set = segments[support_indices]
        query_set = segments[query_indices]
        query_labels = labels[query_indices]

        with torch.no_grad():
            #extract features for support set
            support_features = model.extract_features(support_set.to(device))
            prototype = support_features.mean(dim=0)
            
            query_features = []
            #extract the features for query set in batches 
            for i in range(0, len(query_set), batch_size):
                batch = query_set[i:i+batch_size].to(device)
                batch_features = model.extract_features(batch)
                #move to cpu for distance calculation
                query_features.append(batch_features.cpu())
            query_features = torch.cat(query_features, dim=0)
            #calculate distances between query and prototype

            distances=torch.cdist(query_features, prototype.unsqueeze(0).cpu()).squeeze().numpy()
            #then we normalise distances between 0 and 1
            distances = (distances - distances.min()) / (distances.max() - distances.min())
            
            median_threshold = np.median(distances)
            
            best_threshold = 0
            best_f1 = 0
            #evaluation F metrics
            for threshold in thresholds:
                predictions = (distances < threshold).astype(int)
                f1 = f1_score(query_labels, predictions, average='binary')
                if f1 > best_f1:
                    best_f1 = f1
                    best_threshold = threshold
            
            predictions = (distances < best_threshold).astype(int)

        precision = precision_score(query_labels, predictions, average='binary', zero_division=1)
        recall = recall_score(query_labels, predictions, average='binary', zero_division=1)
        f1 = f1_score(query_labels, predictions, average='binary')
#store results for each file
        all_results.append({
            'file': file_data['file'],
            'precision': precision,
            'recall': recall,
            'f1': f1,})
#store predictions in CSV files
        for i, pred in enumerate(predictions):
            if pred == 1:
                all_predictions.append({
                    'file': file_data['file'],
                    'start_time': start_times[query_indices[i]],
                    'end_time': end_times[query_indices[i]]
                })

    return all_results, all_predictions

def generate_csv_output(predictions, output_file):
    with open(output_file, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        #our header /cols in csv
        writer.writerow(['Audiofilename', 'Starttime', 'Endtime'])
        for pred in predictions:
            #write each prediciton as rows in the csv file
            writer.writerow([pred['file'], pred['start_time'], pred['end_time']])

if __name__ == "__main__":
    print("hello")
    #use gpu if not available use cpu
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#train based on train_all.h5 file created in preproccessing
    train_x, train_y = load_Training_data(os.path.join(train_path, 'train_all.h5'))
    #after training call validation data
    val_data = load_validation_data(val_path)
    
    print(f"Training  path: {train_path}.")
    print(f"Validation  path: {val_path}.")
    print(f"Training shape: {train_x.shape}.")

    model = AudioResNet().to(device)
    for param in model.parameters():
        param.requires_grad = True
#define loss function binary cross entropy
    criterion = nn.BCELoss()
    #optimiser with learning rate
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    #reduce the learning rate if loss doesn't keep decreasing after three epochs
    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=3)
    #create a pytorch dataset and loader for trainnig the data
    train_dataset = TensorDataset(train_x, train_y)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    
    train(model, train_loader, criterion, optimizer, scheduler, device, num_epochs=1, patience=2)
#train the model and load the best model wuth lowest loss for evaluation
    model.load_state_dict(torch.load('best_model.pth'))

    if torch.cuda.is_available():
        torch.cuda.empty_cache()
#validate using few shot learning
    results, predictions = validate_few_shot(model, val_data, device)

    
    if results:
        print("Validation Results:")
        avg_precision = avg_recall = avg_f1 = 0
        for file_result in results:
            print(f"File: {file_result['file']}")
            for metric, value in file_result.items():
                if metric != 'file' and metric != 'best_threshold':
                    print(f"{metric}: {value:.4f}")
                elif metric == 'best_threshold':
                    print(f"{metric}: {value:.6f}")
            print()
            avg_precision += file_result['precision']
            avg_recall += file_result['recall']
            avg_f1 += file_result['f1']
        
        num_files = len(results)
        print("Average Results:")
        print(f"Precision: {avg_precision / num_files:.4f}")
        print(f"Recall: {avg_recall / num_files:.4f}")
        print(f"F1 Score: {avg_f1 / num_files:.4f}")

        generate_csv_output(predictions, 'CNN_ResNet.csv')
        torch.save(model.state_dict(), 'resnet_model.pth')

    else:
        print("No Validation results  found")

hello
Proccessed data shape: x: (18213, 1, 128, 86), y: (18213,).
x dtype: float32, y dtype: int64
x range: (0.0, 1.0), y range: (0, 1)
Loading the validation data from: /Users/zaher/Desktop/Project/Validation_Set
Loaded data from /Users/zaher/Desktop/Project/Validation_Set/val_all.h5
Number of validation files: 41
Training  path: /Users/zaher/Desktop/Project/Training_Set.
Validation  path: /Users/zaher/Desktop/Project/Validation_Set.
Training shape: torch.Size([18213, 1, 128, 86]).


Epoch 1/1: 100%|██████████████████████████████| 570/570 [35:52<00:00,  3.78s/it]
  model.load_state_dict(torch.load('best_model.pth'))


Epoch 1, Loss: 0.3983
Not enough samples Skipping file: b'BUK1_20181013_023504.wav'.
Validation Results:
File: b'BUK5_20180921_015906a.wav'
precision: 1.0000
recall: 0.7500
f1: 0.8571

File: b'BUK5_20161101_002104a.wav'
precision: 1.0000
recall: 0.9881
f1: 0.9940

File: b'pw5.wav'
precision: 1.0000
recall: 0.9900
f1: 0.9950

File: b'pw4.wav'
precision: 1.0000
recall: 0.9947
f1: 0.9973

File: b'pw9.wav'
precision: 1.0000
recall: 0.9944
f1: 0.9972

File: b'pw10.wav'
precision: 1.0000
recall: 0.9952
f1: 0.9976

File: b'pw11.wav'
precision: 1.0000
recall: 0.9933
f1: 0.9967

File: b'pw8.wav'
precision: 1.0000
recall: 0.9928
f1: 0.9964

File: b'pw13.wav'
precision: 1.0000
recall: 0.9956
f1: 0.9978

File: b'pw12.wav'
precision: 1.0000
recall: 0.9877
f1: 0.9938

File: b'pw15.wav'
precision: 1.0000
recall: 0.9937
f1: 0.9968

File: b'pw14.wav'
precision: 1.0000
recall: 0.9933
f1: 0.9966

File: b'BUK1_20181011_001004.wav'
precision: 1.0000
recall: 0.9600
f1: 0.9796

File: b'BUK5_20180921_015906a.

In [87]:
import torch
import torch.nn as nn
import h5py
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import f1_score, precision_score, recall_sc2q121121qaore
from tqdm import tqdm
import os
import csv

class AudioResNet(nn.Module):
    def __init__(self, num_classes=1):
        super(AudioResNet, self).__init__()
        self.in_channels = 128
        self.conv1 = nn.Conv2d(1, 128, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(128)
        self.layer1 = self.make_layer(128, 2, stride=1)
        self.layer2 = self.make_layer(256, 2, stride=2)
        self.layer3 = self.make_layer(512, 2, stride=2)
        self.layer4 = self.make_layer(1024, 2, stride=2)
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(1024, num_classes)
        self.sigmoid = nn.Sigmoid()

    def make_layer(self, out_channels, blocks, stride):
        layers = []
        layers.append(self.make_resNET_block(out_channels, stride))
        for _ in range(1, blocks):
            layers.append(self.make_resNET_block(out_channels, stride=1))
        return nn.Sequential(*layers)

    def make_resNET_block(self, out_channels, stride):
        downsample = None
        if stride != 1 or self.in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels),
            )
        block = ResBlock(self.in_channels, out_channels, stride, downsample)
        self.in_channels = out_channels
        return block

    def forward(self, x):
        x = torch.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        x = self.sigmoid(x)
        return x.squeeze()

    def extract_features(self, x):
        x = torch.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avg_pool(x)
        return x.view(x.size(0), -1)

class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        out = torch.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        if self.downsample is not None:
            identity = self.downsample(x)
        out += identity
        return torch.relu(out)

# AudioDataset class for loading the data
class AudioDataset(Dataset):
    def __init__(self, hdf_path):
        self.hdf_path = hdf_path
        with h5py.File(self.hdf_path, 'r') as hf:
            self.keys = list(hf.keys())

    def __len__(self):
        return len(self.keys)

    def __getitem__(self, idx):
        with h5py.File(self.hdf_path, 'r') as hf:
            group = hf[self.keys[idx]]
            segments = torch.from_numpy(group['segments'][:]).float()
            labels = torch.from_numpy(group['labels'][:]).float()
            start_times = torch.from_numpy(group['start_times'][:]).float()
            end_times = torch.from_numpy(group['end_times'][:]).float()
            file_key = group['file'][()]

        if segments.ndim == 5:
            segments = segments.squeeze(2)  # Remove the extra dimension if needed

        return segments, labels, start_times, end_times, file_key


def evaluate_file(model, segments, labels, device):
    model.eval()
    with torch.no_grad():
        print(f"Segments shape: {segments.shape}")
        print(f"Labels shape: {labels.shape}")
        
        if segments.dim() == 3:
            segments = segments.unsqueeze(1)
        elif segments.dim() == 2:
            segments = segments.unsqueeze(0).unsqueeze(0)
        
        # Check if there are at least 5 positive samples
        pos_indices = (labels == 1).nonzero(as_tuple=True)[0]
        print(f"Number of positive samples: {len(pos_indices)}")
        
        if len(pos_indices) < 5:
            print("Not enough positive samples (less than 5). Skipping this file.")
            return None, None  # Skip evaluation if there are less than 5 positive samples

        # Split into support and query sets
        support_indices = pos_indices[:5]
        query_indices = np.setdiff1d(np.arange(len(labels)), support_indices)

        if len(query_indices) == 0:
            print("No query samples left after selecting support set, Skipping this file")
            return None, None

        support_set = segments[support_indices]
        query_set = segments[query_indices]
        query_labels = labels[query_indices]

        # Extract features for support and query sets
        support_features = model.extract_features(support_set.to(device))
        query_features = model.extract_features(query_set.to(device))
        
        # Compute prototype from the support set
        prototype = support_features.mean(dim=0)
        print(f"Prototype shape: {prototype.shape}")
        
        # Compute distances and predictions for the query set
        distances = torch.cdist(query_features, prototype.unsqueeze(0)).squeeze()
        print(f"Distances shape: {distances.shape}")
        
        predictions = (distances < distances.median()).float()
        print(f"Predictions shape: {predictions.shape}")
    
    return predictions.cpu().numpy(), query_labels.cpu().numpy()

def calculate_metrics(labels, predictions):
    metrics = {}
    metrics['f1'] = f1_score(labels, predictions, average='binary', zero_division=0)
    metrics['precision'] = precision_score(labels, predictions, average='binary', zero_division=0)
    metrics['recall'] = recall_score(labels, predictions, average='binary', zero_division=0)
    metrics['false_positive_rate'] = np.sum((predictions == 1) & (labels == 0)) / max(np.sum(labels == 0), 1)
    metrics['false_negative_rate'] = np.sum((predictions == 0) & (labels == 1)) / max(np.sum(labels == 1), 1)
    return metrics

def evaluate(model, data_loader, device, output_file):
    model.eval()
    all_metrics = []
    results = []
    total_segments = 0
    total_positive_segments = 0

    with torch.no_grad():
        for segments, labels, start_times, end_times, file_key in tqdm(data_loader, desc="Evaluating"):
            print(f"\nProcessing file: {file_key}")
            segments = segments.squeeze(0)  
            labels = labels.squeeze(0)
            start_times = start_times.squeeze(0)
            end_times = end_times.squeeze(0)

            total_segments += segments.shape[0]
            total_positive_segments += torch.sum(labels).item()

            try:
                predictions, query_labels = evaluate_file(model, segments, labels, device)
                
                if predictions is None:
                    continue  # Skip to the next file if evaluation was skipped
                
                # Compute metrics
                file_metrics = calculate_metrics(query_labels, predictions)
                all_metrics.append(file_metrics)
                print(f"Metrics for file {file_key}:")
                for metric, value in file_metrics.items():
                    print(f"{metric}: {value:.4f}")
                
                for i, pred in enumerate(predictions):
                    if pred == 1:
                        results.append({
                            'file': file_key[0],
                            'start_time': start_times[i].item(),
                            'end_time': end_times[i].item()
                        })
            except Exception as e:
                print(f"Error processing file {file_key}: {str(e)}")
                continue

    with open(output_file, 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=['file', 'start_time', 'end_time'])
        writer.writeheader()
        for result in results:
            writer.writerow(result)



    if all_metrics:
        mean_metrics = {metric: np.mean([file_metric[metric] for file_metric in all_metrics]) 
                        for metric in all_metrics[0].keys()}
        return mean_metrics
    else:
        return {metric: float('nan') for metric in ['f1', 'precision', 'recall', 'false_positive_rate', 'false_negative_rate']}


def main():
    hdf_eval = "/Users/zaher/Desktop/Project/eval_2/eval_all.h5"
    output_file = "evaluation_results.csv"

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    eval_dataset = AudioDataset(hdf_eval)
    eval_loader = DataLoader(eval_dataset, batch_size=1, shuffle=False)

    model = AudioResNet().to(device)
    model_path = 'resnet_model.pth'
    if os.path.exists(model_path):
        model.load_state_dict(torch.load(model_path, map_location=device))
        print(f"Model loaded sucessfully from '{model_path}'.")
    else:
        print(f"Error: Model file '{model_path}' not found.")
        return

    print("Starting Evaluation Process")
    try:
        mean_metrics = evaluate(model, eval_loader, device, output_file)
        print("Evaluation Process completed. Mean Metrics:")
        for metric, value in mean_metrics.items():
            print(f"{metric}: {value:.4f}")
        print(f"Results saved to {output_file}")
    except Exception as e:
        print(f"An error occurred during evaluation process: {str(e)}")

if __name__ == "__main__":
    main()

  model.load_state_dict(torch.load(model_path, map_location=device))


Using device: cpu
Model loaded from 'resnet_model.pth'.
Starting evaluation...


Evaluating:   0%|                                        | 0/57 [00:00<?, ?it/s]


Processing file: (b'85MGE.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'89MGE.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'CHE_04.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'CHE_05.wav',)
Segments shape: torch.Size([18, 1, 128, 86])
Labels shape: torch.Size([18])
Number of positive samples: 18


Evaluating:   7%|██▏                             | 4/57 [00:00<00:06,  8.52it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([13])
Predictions shape: torch.Size([13])
Metrics for file (b'CHE_05.wav',):
f1: 0.6316
precision: 1.0000
recall: 0.4615
false_positive_rate: 0.0000
false_negative_rate: 0.5385

Processing file: (b'CHE_11.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'CHE_07.wav',)
Segments shape: torch.Size([21, 1, 128, 86])
Labels shape: torch.Size([21])
Number of positive samples: 21


Evaluating:  11%|███▎                            | 6/57 [00:01<00:12,  4.21it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([16])
Predictions shape: torch.Size([16])
Metrics for file (b'CHE_07.wav',):
f1: 0.6087
precision: 1.0000
recall: 0.4375
false_positive_rate: 0.0000
false_negative_rate: 0.5625

Processing file: (b'CHE_13.wav',)
Segments shape: torch.Size([17, 1, 128, 86])
Labels shape: torch.Size([17])
Number of positive samples: 17


Evaluating:  12%|███▉                            | 7/57 [00:01<00:14,  3.55it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([12])
Predictions shape: torch.Size([12])
Metrics for file (b'CHE_13.wav',):
f1: 0.5882
precision: 1.0000
recall: 0.4167
false_positive_rate: 0.0000
false_negative_rate: 0.5833

Processing file: (b'CHE_12.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'CHE_06.wav',)
Segments shape: torch.Size([18, 1, 128, 86])
Labels shape: torch.Size([18])
Number of positive samples: 18


Evaluating:  16%|█████                           | 9/57 [00:02<00:12,  3.79it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([13])
Predictions shape: torch.Size([13])
Metrics for file (b'CHE_06.wav',):
f1: 0.6316
precision: 1.0000
recall: 0.4615
false_positive_rate: 0.0000
false_negative_rate: 0.5385

Processing file: (b'CHE_02.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'CHE_16.wav',)
Segments shape: torch.Size([16, 1, 128, 86])
Labels shape: torch.Size([16])
Number of positive samples: 16


Evaluating:  19%|█████▉                         | 11/57 [00:02<00:11,  4.04it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([11])
Predictions shape: torch.Size([11])
Metrics for file (b'CHE_16.wav',):
f1: 0.6250
precision: 1.0000
recall: 0.4545
false_positive_rate: 0.0000
false_negative_rate: 0.5455

Processing file: (b'CHE_17.wav',)
Segments shape: torch.Size([30, 1, 128, 86])
Labels shape: torch.Size([30])
Number of positive samples: 30


Evaluating:  21%|██████▌                        | 12/57 [00:03<00:19,  2.30it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([25])
Predictions shape: torch.Size([25])
Metrics for file (b'CHE_17.wav',):
f1: 0.6486
precision: 1.0000
recall: 0.4800
false_positive_rate: 0.0000
false_negative_rate: 0.5200

Processing file: (b'91MGE.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'CHE_03.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'CHE_15.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'CHE_01.wav',)
Segments shape: torch.Size([18, 1, 128, 86])
Labels shape: torch.Size([18])
Number of positive 

Evaluating:  28%|████████▋                      | 16/57 [00:04<00:10,  3.79it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([13])
Predictions shape: torch.Size([13])
Metrics for file (b'CHE_01.wav',):
f1: 0.6316
precision: 1.0000
recall: 0.4615
false_positive_rate: 0.0000
false_negative_rate: 0.5385

Processing file: (b'CHE_14.wav',)
Segments shape: torch.Size([14, 1, 128, 86])
Labels shape: torch.Size([14])
Number of positive samples: 14


Evaluating:  30%|█████████▏                     | 17/57 [00:04<00:11,  3.57it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([9])
Predictions shape: torch.Size([9])
Metrics for file (b'CHE_14.wav',):
f1: 0.6154
precision: 1.0000
recall: 0.4444
false_positive_rate: 0.0000
false_negative_rate: 0.5556

Processing file: (b'CHE_19.wav',)
Segments shape: torch.Size([13, 1, 128, 86])
Labels shape: torch.Size([13])
Number of positive samples: 13


Evaluating:  32%|█████████▊                     | 18/57 [00:04<00:11,  3.47it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([8])
Predictions shape: torch.Size([8])
Metrics for file (b'CHE_19.wav',):
f1: 0.5455
precision: 1.0000
recall: 0.3750
false_positive_rate: 0.0000
false_negative_rate: 0.6250

Processing file: (b'CHE_18.wav',)
Segments shape: torch.Size([20, 1, 128, 86])
Labels shape: torch.Size([20])
Number of positive samples: 20


Evaluating:  33%|██████████▎                    | 19/57 [00:05<00:12,  3.02it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([15])
Predictions shape: torch.Size([15])
Metrics for file (b'CHE_18.wav',):
f1: 0.6364
precision: 1.0000
recall: 0.4667
false_positive_rate: 0.0000
false_negative_rate: 0.5333

Processing file: (b'CHE_09.wav',)
Segments shape: torch.Size([21, 1, 128, 86])
Labels shape: torch.Size([21])
Number of positive samples: 21


Evaluating:  35%|██████████▉                    | 20/57 [00:06<00:16,  2.28it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([16])
Predictions shape: torch.Size([16])
Metrics for file (b'CHE_09.wav',):
f1: 0.6087
precision: 1.0000
recall: 0.4375
false_positive_rate: 0.0000
false_negative_rate: 0.5625

Processing file: (b'DC01.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'DC02.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'DC12.wav',)
Segments shape: torch.Size([32, 1, 128, 86])
Labels shape: torch.Size([32])
Number of positive samples: 32


Evaluating:  40%|████████████▌                  | 23/57 [00:07<00:13,  2.46it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([27])
Predictions shape: torch.Size([27])
Metrics for file (b'DC12.wav',):
f1: 0.6500
precision: 1.0000
recall: 0.4815
false_positive_rate: 0.0000
false_negative_rate: 0.5185

Processing file: (b'E2_208_20190712_0150.wav',)
Segments shape: torch.Size([2, 1, 128, 86])
Labels shape: torch.Size([2])
Number of positive samples: 2
Not enough positive samples (less than 5). Skipping this file.

Processing file: (b'DC06.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'DC07.wav',)
Segments shape: torch.Size([18, 1, 128, 86])
Labels shape: torch.Size([18])
Number of positive samples: 18


Evaluating:  46%|██████████████▏                | 26/57 [00:07<00:09,  3.32it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([13])
Predictions shape: torch.Size([13])
Metrics for file (b'DC07.wav',):
f1: 0.6316
precision: 1.0000
recall: 0.4615
false_positive_rate: 0.0000
false_negative_rate: 0.5385

Processing file: (b'DC05.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'DC11.wav',)
Segments shape: torch.Size([27, 1, 128, 86])
Labels shape: torch.Size([27])
Number of positive samples: 27


Evaluating:  49%|███████████████▏               | 28/57 [00:08<00:10,  2.69it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([22])
Predictions shape: torch.Size([22])
Metrics for file (b'DC11.wav',):
f1: 0.6250
precision: 1.0000
recall: 0.4545
false_positive_rate: 0.0000
false_negative_rate: 0.5455

Processing file: (b'DC10.wav',)
Segments shape: torch.Size([35, 1, 128, 86])
Labels shape: torch.Size([35])
Number of positive samples: 35


Evaluating:  51%|███████████████▊               | 29/57 [00:10<00:15,  1.82it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([30])
Predictions shape: torch.Size([30])
Metrics for file (b'DC10.wav',):
f1: 0.6364
precision: 1.0000
recall: 0.4667
false_positive_rate: 0.0000
false_negative_rate: 0.5333

Processing file: (b'DC04.wav',)
Segments shape: torch.Size([2, 1, 128, 86])
Labels shape: torch.Size([2])
Number of positive samples: 2
Not enough positive samples (less than 5). Skipping this file.

Processing file: (b'DC08.wav',)
Segments shape: torch.Size([22, 1, 128, 86])
Labels shape: torch.Size([22])
Number of positive samples: 22


Evaluating:  54%|████████████████▊              | 31/57 [00:11<00:13,  1.98it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([17])
Predictions shape: torch.Size([17])
Metrics for file (b'DC08.wav',):
f1: 0.6400
precision: 1.0000
recall: 0.4706
false_positive_rate: 0.0000
false_negative_rate: 0.5294

Processing file: (b'cw1300_DCASE.wav',)
Segments shape: torch.Size([13, 1, 128, 86])
Labels shape: torch.Size([13])
Number of positive samples: 13


Evaluating:  56%|█████████████████▍             | 32/57 [00:11<00:11,  2.12it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([8])
Predictions shape: torch.Size([8])
Metrics for file (b'cw1300_DCASE.wav',):
f1: 0.5455
precision: 1.0000
recall: 0.3750
false_positive_rate: 0.0000
false_negative_rate: 0.6250

Processing file: (b'cw1345_DCASE.wav',)
Segments shape: torch.Size([13, 1, 128, 86])
Labels shape: torch.Size([13])
Number of positive samples: 13


Evaluating:  58%|█████████████████▉             | 33/57 [00:11<00:10,  2.25it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([8])
Predictions shape: torch.Size([8])
Metrics for file (b'cw1345_DCASE.wav',):
f1: 0.5455
precision: 1.0000
recall: 0.3750
false_positive_rate: 0.0000
false_negative_rate: 0.6250

Processing file: (b'cw1315_DCASE.wav',)
Segments shape: torch.Size([23, 1, 128, 86])
Labels shape: torch.Size([23])
Number of positive samples: 23


Evaluating:  60%|██████████████████▍            | 34/57 [00:12<00:12,  1.88it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([18])
Predictions shape: torch.Size([18])
Metrics for file (b'cw1315_DCASE.wav',):
f1: 0.6154
precision: 1.0000
recall: 0.4444
false_positive_rate: 0.0000
false_negative_rate: 0.5556

Processing file: (b'E4_49_20190804_0150.wav',)
Segments shape: torch.Size([1, 1, 128, 86])
Labels shape: torch.Size([1])
Number of positive samples: 1
Not enough positive samples (less than 5). Skipping this file.

Processing file: (b'cw1330_DCASE.wav',)
Segments shape: torch.Size([13, 1, 128, 86])
Labels shape: torch.Size([13])
Number of positive samples: 13


Evaluating:  63%|███████████████████▌           | 36/57 [00:12<00:08,  2.60it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([8])
Predictions shape: torch.Size([8])
Metrics for file (b'cw1330_DCASE.wav',):
f1: 0.5455
precision: 1.0000
recall: 0.3750
false_positive_rate: 0.0000
false_negative_rate: 0.6250

Processing file: (b'CHE_F09.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'CHE_F08.wav',)
Segments shape: torch.Size([16, 1, 128, 86])
Labels shape: torch.Size([16])
Number of positive samples: 16


Evaluating:  67%|████████████████████▋          | 38/57 [00:13<00:06,  3.11it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([11])
Predictions shape: torch.Size([11])
Metrics for file (b'CHE_F08.wav',):
f1: 0.6250
precision: 1.0000
recall: 0.4545
false_positive_rate: 0.0000
false_negative_rate: 0.5455

Processing file: (b'CHE_F18.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'CHE_F19.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'CHE_F14.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'CHE_F15.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of posit

Evaluating: 100%|███████████████████████████████| 57/57 [00:13<00:00,  4.18it/s]

Prototype shape: torch.Size([1024])
Distances shape: torch.Size([])
Predictions shape: torch.Size([])
Error processing file (b'CHE_F17.wav',): Singleton array array(0., dtype=float32) cannot be considered a valid collection.

Processing file: (b'CHE_F03.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'CHE_F02.wav',)
Segments shape: torch.Size([4, 1, 128, 86])
Labels shape: torch.Size([4])
Number of positive samples: 4
Not enough positive samples (less than 5). Skipping this file.

Processing file: (b'E1_208_20190712_0150.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples: 5
No query samples left after selecting support set. Skipping this file.

Processing file: (b'CHE_F12.wav',)
Segments shape: torch.Size([5, 1, 128, 86])
Labels shape: torch.Size([5])
Number of positive samples:


