In [78]:
import os, cv2, random, argparse
import numpy as np
import pandas as pd
from helpers import corresponding_label_to_video, get_rtf_text
from collections import deque


In [79]:

class_to_idx = {'No event': 0, 'Fire': 1, 'Smoke': 2}
CONTROLLER_FRAMES = 1
TYPE  = 'baseline_aug_diff'  # Change to 'train' or 'test' as needed
OUTPUT_DIR = f'E:/2025_ICIAP_FIRE/output/{TYPE}/results'

PATH_TO_CSV = os.path.join(OUTPUT_DIR, f'results_{CONTROLLER_FRAMES}.csv')
path_to_videos = 'E:/2025_ICIAP_FIRE/dataset'
path_to_labels = 'E:/2025_ICIAP_FIRE/GT'
#header = ['video_name', 'current_second', 'No_event', 'Fire', 'Smoke']
df = pd.read_csv(PATH_TO_CSV)
print(df.head())


                                  video_name  current_second  No_event  Fire  \
0  E:/2025_ICIAP_FIRE/dataset\1\Video145.mp4              15         1     0   
1  E:/2025_ICIAP_FIRE/dataset\1\Video145.mp4              31         1     0   
2  E:/2025_ICIAP_FIRE/dataset\1\Video145.mp4              47         1     0   
3  E:/2025_ICIAP_FIRE/dataset\1\Video145.mp4              63         1     0   
4  E:/2025_ICIAP_FIRE/dataset\1\Video145.mp4              79         1     0   

   Smoke  
0      0  
1      0  
2      0  
3      0  
4      0  


In [80]:
import numpy as np

def get_tp_fp_fn(g_list, p_list, delta_t=5, t_max=60):
    '''
    Returns sets/lists of true positives (tp_idx), false positives (fp_idx), and false negatives (fn_idx) indices.
    '''
    tp_idx = []
    fp_idx = []
    fn_idx = []

    for idx, (g, p) in enumerate(zip(g_list, p_list)):
        # Fire video
        if g is not None:
            if p is not None:
                dt_min = max(0, g - delta_t)
                dt_max = g + t_max
                if dt_min <= p <= dt_max:
                    tp_idx.append(idx)
                elif p < dt_min:
                    fp_idx.append(idx)
            else:
                fn_idx.append(idx)
        # Non-fire video
        else:
            if p is not None:
                fp_idx.append(idx)
    return tp_idx, fp_idx, fn_idx

def metric_precision(tp, fp):
    return len(tp) / (len(tp) + len(fp)) if (len(tp) + len(fp)) > 0 else 0.0

def metric_recall(tp, fn):
    return len(tp) / (len(tp) + len(fn)) if (len(tp) + len(fn)) > 0 else 0.0

def average_notification_delay(tp_idx, g_list, p_list):
    if not tp_idx:
        return 0.0
    delays = [abs(p_list[i] - g_list[i]) for i in tp_idx]
    return np.mean(delays)

def normalized_delay(D):
    # Dn = max(0, 60 - D) / 60
    return max(0, 60 - D) / 60.0

def pfr(processing_times):
    # processing_times: list of per-video processing time in seconds
    # PFR = 1 / (sum t_i / N)
    if not processing_times:
        return 0.0
    return 1.0 / (np.mean(processing_times))

def pfr_delta(pfr, pfr_target):
    # PFR_delta = max(0, pfr_target/pfr - 1)
    return max(0.0, (pfr_target / pfr) - 1) if pfr > 0 else float('inf')

def mem_delta(mem, mem_target):
    # mem in GB
    return max(0.0, (mem / mem_target) - 1) if mem_target > 0 else float('inf')

def fds(P, R, Dn, pfr_delta_val, mem_delta_val):
    return (P * R * Dn) / ((1 + pfr_delta_val) * (1 + mem_delta_val))

In [81]:
def load_and_split(videos_path, labels_path, seed, split_ratio=0.8):
    vids, lbls = corresponding_label_to_video(videos_path, labels_path)
    combined = list(zip(vids, lbls))
    random.seed(seed); random.shuffle(combined)
    vids, lbls = zip(*combined)
    idx = int(len(vids) * split_ratio)
    return vids[:idx], lbls[:idx], vids[idx:], lbls[idx:]

train_v, train_l, val_v, val_l = load_and_split(path_to_videos, path_to_labels, 42)
#for each label
g_list = []
p_list = []
for vid, lbl in zip(val_v, val_l):
    timestart, cls_event = get_rtf_text(lbl)
    if "No event" in cls_event:
        g_list.append(None)
    else:
        g_list.append(int(timestart))

    #get all predictions for this video
    df_vid = df[df['video_name'] == vid]

    DIM_KERNEL = 3  # you can change this as needed
    # Smooth 'no_event' to help detect transitions more robustly
    no_event_smoothed = pd.Series(df_vid['No_event']).rolling(window=DIM_KERNEL, center=True, min_periods=1).mean()
    
    fire = df_vid['Fire'].values
    smoke = df_vid['Smoke'].values
    current_second = df_vid['current_second'].values
    no_event_smoothed = no_event_smoothed.values

    #get first local minimum of no_event_smoothed
    first_local_min = np.argmin(no_event_smoothed)
    #print(f"First local minimum for video {vid}: {first_local_min}, value: {no_event_smoothed[first_local_min]}")
    if first_local_min == 0 and no_event_smoothed[first_local_min] > 0.5:
        # If the first local minimum is at the start and has a high value, we assume no event
        p_list.append(None)
    else:
        p_list.append(int(current_second[first_local_min])-1)
    #print(f"Predicted time for video {vid} local minimum: {first_local_min}, value: {no_event_smoothed[first_local_min]}, predicted second: {p_list[-1]}")
        

                
        
                


Found 353 video files and 353 label files.


In [82]:
print("Ground truth list:", g_list)
print("Prediction list:", p_list)

print("length of ground truth list:", len(g_list))
print("length of prediction list:", len(p_list))

Ground truth list: [8, 4, 0, 0, None, 0, 1, 0, 4, None, 0, 0, 0, 0, None, None, 4, 1, 0, 0, 0, None, 0, None, 1, 240, None, 0, 1, 0, 0, None, 0, 38, 3, 0, 70, None, 0, 0, 0, None, None, 0, 0, 0, 0, 0, 1, 570, 1, None, 24, 0, 0, 0, None, None, None, 0, 0, None, 1, None, None, 0, 0, 0, None, None, 0]
Prediction list: [None, 14, None, 46, None, 62, 14, 158, 14, 14, 15, 15, 14, 15, None, 14, 14, 14, 62, 15, 14, None, 14, 10, 14, None, 15, 14, 14, 174, 14, None, 94, 14, 14, 190, 174, 14, 46, 14, 14, None, None, 7, 14, 46, 14, 15, 14, None, 13, None, 46, 9, 15, 14, None, None, None, 15, 15, None, 10, None, 6, 14, 7, 14, 14, 15, 11]
length of ground truth list: 71
length of prediction list: 71


In [83]:



tp, fp, fn = get_tp_fp_fn(g_list, p_list)

P = metric_precision(tp, fp)
R = metric_recall(tp, fn)
D = average_notification_delay(tp, g_list, p_list)
Dn = normalized_delay(D)

#PFR is 80 if CONTROLLER_FRAMES == 1 20 if CONTROLLER_FRAMES == 4 or 5 if CONTROLLER_FRAMES == 16
PFR = 65.0 if CONTROLLER_FRAMES == 1 else 13.5 if CONTROLLER_FRAMES == 4  else 3.2

PFR_TARGET = PFR / 2.3
MEM = 4.0
MEM_TARGET = 4.0

PFR_DELTA = pfr_delta(PFR, PFR_TARGET)
MEM_DELTA = mem_delta(MEM, MEM_TARGET)

FDS = fds(P, R, Dn, PFR_DELTA, MEM_DELTA)


In [84]:

print('Precision:', P)
print('Recall:', R)
print('Average delay:', D)
print('Normalized delay:', Dn)
print('FDS:', FDS)

Precision: 0.8125
Recall: 0.9069767441860465
Average delay: 15.615384615384615
Normalized delay: 0.7397435897435898
FDS: 0.5451308139534884
