<a href="https://colab.research.google.com/github/victorvalente/HarmonicOscillatorPSO/blob/main/HarmonicOscillatorPSO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import seaborn as sns
from enum import Enum, auto
import os
import glob
import json
from collections import defaultdict, deque
import time
from scipy.fft import rfft, rfftfreq # For frequency domain features
import traceback # For detailed error printing

# --- Configuration & Enums ---

class SensorType(Enum):
    ACCELEROMETER = auto()
    PIR = auto()
    RSSI = auto()
    VIDEO = auto()

class ActivityType(Enum):
    AMBULATION = auto()
    POSTURE = auto()
    TRANSITION = auto()

class Location(Enum):
    BATH = auto()
    BED1 = auto()
    BED2 = auto()
    HALL = auto()
    KITCHEN = auto()
    LIVING = auto()
    STAIRS = auto()
    STUDY = auto()
    TOILET = auto()
    UNKNOWN = auto()

SPHERE_ACTIVITIES = {
    'a_ascend': ActivityType.AMBULATION, 'a_descend': ActivityType.AMBULATION,
    'a_jump': ActivityType.AMBULATION, 'a_loadwalk': ActivityType.AMBULATION,
    'a_walk': ActivityType.AMBULATION, 'p_bent': ActivityType.POSTURE,
    'p_kneel': ActivityType.POSTURE, 'p_lie': ActivityType.POSTURE,
    'p_sit': ActivityType.POSTURE, 'p_squat': ActivityType.POSTURE,
    'p_stand': ActivityType.POSTURE, 't_bend': ActivityType.TRANSITION,
    't_kneel_stand': ActivityType.TRANSITION, 't_lie_sit': ActivityType.TRANSITION,
    't_sit_lie': ActivityType.TRANSITION, 't_sit_stand': ActivityType.TRANSITION,
    't_stand_kneel': ActivityType.TRANSITION, 't_stand_sit': ActivityType.TRANSITION,
    't_straighten': ActivityType.TRANSITION, 't_turn': ActivityType.TRANSITION
}
SPHERE_ACTIVITY_NAMES = list(SPHERE_ACTIVITIES.keys())

SPHERE_LOCATIONS = ['bath', 'bed1', 'bed2', 'hall', 'kitchen', 'living', 'stairs', 'study', 'toilet']
SPHERE_LOCATION_ENUM_MAP = {name: Location[name.upper()] for name in SPHERE_LOCATIONS}
SPHERE_LOCATION_ENUM_MAP['UNKNOWN'] = Location.UNKNOWN

# --- Data Handling Utilities (Revised v4 - Step 1) ---

def load_sphere_data(data_path, subject_id, data_type):
    """
    Loads specific sensor data or annotations for a subject,
    adapted for potential SPHERE challenge structures (v4 - handles PIR start/end).
    """
    print(f"Attempting to load {data_type} for subject {subject_id} from base path: {data_path}")
    file_path = None # Used for logging the primary file path loaded
    df = None      # DataFrame to be loaded

    # --- Path and Filename Logic ---
    data_type_lower = data_type.lower()

    # Determine base subject folder (try train, test, then base)
    subject_folder_options = [
        os.path.join(data_path, 'train', subject_id),
        os.path.join(data_path, 'test', subject_id),
        os.path.join(data_path, subject_id)
    ]
    subject_base_path = None
    for folder_option in subject_folder_options:
        if os.path.isdir(folder_option):
            subject_base_path = folder_option
            print(f"  Using subject data folder: {subject_base_path}")
            break

    if subject_base_path is None:
         # If loading annotations, maybe they are not in a subject folder?
         if data_type_lower == 'annotations':
              print("  Subject folder not found, checking for annotations in base/train paths...")
              # Let the annotation-specific logic below handle finding the file elsewhere
              pass
         else:
              # For sensor data, subject folder is usually required
              print(f"Error: Could not find subject folder for {subject_id} in train/, test/, or base directory relative to {data_path}.")
              return None

    # --- Specific Loading Logic per Type ---
    if data_type_lower == 'annotations':
        # Annotations are inside the subject's folder, named annotations_*.csv (based on find output)
        annotation_files = []
        if subject_base_path: # Check subject path first
             annotation_pattern = os.path.join(subject_base_path, 'annotations_*.csv')
             annotation_files = sorted(glob.glob(annotation_pattern))

        # If not found in subject folder, check other common locations
        if not annotation_files:
             print(f"  Annotations not found in subject folder, checking other locations...")
             possible_paths = [
                 os.path.join(data_path, 'train', f'{subject_id}_annotations.csv'),
                 os.path.join(data_path, 'train', 'annotations.csv'),
                 os.path.join(data_path, 'annotations', f'{subject_id}.csv'),
                 os.path.join(data_path, 'annotations.csv')
             ]
             for p in possible_paths:
                 if os.path.exists(p):
                     annotation_files = [p] # Treat as single file if found this way
                     print(f"  Found potential annotation file at: {p}")
                     break # Take the first one found

        if not annotation_files:
            print(f"Error: No annotation files found for subject {subject_id} in subject folder or common locations.")
            return None

        print(f"  Found annotation files: { [os.path.basename(f) for f in annotation_files] }")
        file_path = annotation_files[0] # Use first file path for logging purposes

        # Load and concatenate all found annotation files
        all_annotations_df = []
        for f in annotation_files:
             try:
                 df_part = pd.read_csv(f)
                 all_annotations_df.append(df_part)
                 print(f"    Successfully loaded {os.path.basename(f)}")
             except Exception as e:
                 print(f"Warning: Could not load or read annotation file {f}: {e}")

        if not all_annotations_df:
             print(f"Error: Failed to load any valid data from found annotation files.")
             return None

        # Concatenate into a single DataFrame
        df = pd.concat(all_annotations_df, ignore_index=True)
        print(f"  Concatenated {len(annotation_files)} annotation files into one DataFrame.")

    else:
        # Sensor data loading (requires subject_base_path)
        if subject_base_path is None: return None # Should have already exited, but safety

        filename_map = {
            'accelerometer': ['acceleration.csv', 'acc*.csv'],
            'pir': ['pir.csv'],
            'rssi': ['rssi*.csv'],
            'video_features': ['video_features*.csv']
        }
        patterns = filename_map.get(data_type_lower)
        if not patterns:
            print(f"Error: Unknown data_type '{data_type_lower}' for sensor loading.")
            return None

        # Find matching files using glob
        found_files = []
        for pattern in patterns:
             matching_files = glob.glob(os.path.join(subject_base_path, pattern))
             if matching_files:
                 found_files.extend(matching_files)

        if not found_files:
            print(f"Warning: No file found matching patterns '{patterns}' for {data_type_lower} in {subject_base_path}")
            return None

        # File Selection Logic
        primary_target = os.path.join(subject_base_path, patterns[0])
        if primary_target in found_files:
            file_path = primary_target
        else:
            file_path = sorted(found_files)[0]
            print(f"  Note: Using first matched file: {os.path.basename(file_path)}")

        # Load the selected sensor data file
        print(f"  Loading data from: {file_path}")
        try:
            df = pd.read_csv(file_path)
        except Exception as e:
             print(f"Error loading sensor data file {file_path}: {e}")
             traceback.print_exc()
             return None

    # --- Common Processing (Timestamping, Indexing) ---
    if df is None:
        print(f"Error: DataFrame is None after loading attempt for {data_type_lower}.")
        return None

    try:
        # --- Timestamp Handling ---
        # Removed PIR debug print here
        timestamp_cols_to_try = ['t', 'timestamp', 'time']
        timestamp_col = None
        for col in timestamp_cols_to_try:
            if col in df.columns:
                timestamp_col = col
                print(f"  Found timestamp column: '{timestamp_col}'")
                break

        # Handle annotation or PIR files which might use 'start'/'end' instead
        if timestamp_col is None and (data_type_lower == 'annotations' or data_type_lower == 'pir'):
            if 'start' in df.columns and 'end' in df.columns:
                 timestamp_col = 'start' # Use start as the index timestamp
                 print(f"  Using 'start' column as timestamp index for {data_type_lower}.")
                 # Convert start/end to datetime (assuming seconds, VERIFY unit)
                 df['start'] = pd.to_datetime(df['start'], unit='s') # <<< VERIFY UNIT
                 df['end'] = pd.to_datetime(df['end'], unit='s')     # <<< VERIFY UNIT
                 df = df.set_index('start').sort_index()
                 print(f"  ...Processed {len(df)} {data_type_lower} rows.")
                 return df # Return DataFrame indexed by start time
            else:
                 print(f"Error: {data_type_lower.capitalize()} file lacks 'start'/'end' and standard timestamp columns.")
                 return None
        elif timestamp_col is None:
            # If no standard timestamp and not annotation/PIR with start/end
            print(f"Error: Could not find a suitable timestamp column {timestamp_cols_to_try} in {file_path}")
            return None

        # Rename the found timestamp column to 'timestamp' for consistency
        if timestamp_col != 'timestamp':
            df = df.rename(columns={timestamp_col: 'timestamp'})

        # Convert timestamp column to datetime (assuming seconds, VERIFY unit)
        print(f"  Converting timestamp column (assuming units are seconds)...") # <<< VERIFY UNIT
        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') # Adjust unit if needed ('ms')

        # Set timestamp as index for easier time-based operations
        df = df.set_index('timestamp').sort_index()

        print(f"  ...Loaded {len(df)} rows.")
        return df

    except Exception as e:
        print(f"Error processing DataFrame for {file_path if file_path else data_type}: {e}")
        traceback.print_exc()
        return None


def synchronize_data(data_streams, resample_freq='50ms'):
    """
    Synchronizes multiple data streams (dict of {'sensor_type': DataFrame with datetime index}).
    Resamples to a common frequency and forward-fills missing values.
    """
    print(f"Synchronizing data streams to {resample_freq} frequency...")
    combined_df = None
    processed_streams = {}

    for sensor_type, df in data_streams.items():
        if df is None or df.empty:
            print(f"  Skipping empty stream: {sensor_type.name}")
            continue

        print(f"  Processing {sensor_type.name}...")
        # Ensure datetime index
        if not isinstance(df.index, pd.DatetimeIndex):
            # If index is not datetime (e.g., PIR loaded with start/end but not indexed yet)
            # This part might need adjustment based on how PIR df is returned if not indexed by time
            print(f"Error: DataFrame for {sensor_type.name} does not have a DatetimeIndex.")
            continue # Skip streams without proper time index

        # Resample to the target frequency
        try:
            if sensor_type == SensorType.PIR:
                # How to resample PIR data indexed by 'start' time?
                # Option 1: Create a time series indicating presence during intervals
                # Requires 'end' column. Create a new series aligned with target frequency.
                if 'end' in df.columns:
                     min_time = df.index.min()
                     max_time = df['end'].max()
                     # Create target time index
                     target_index = pd.date_range(start=min_time, end=max_time, freq=resample_freq)
                     # Create empty dataframe with target index
                     resampled_df_pir = pd.DataFrame(0, index=target_index, columns=df.columns.drop('end', errors='ignore'))

                     # Mark presence (e.g., set to 1) for intervals where PIR was active
                     for start, row in df.iterrows():
                          end = row['end']
                          # Find target index points within the [start, end) interval
                          active_indices = target_index[(target_index >= start) & (target_index < end)]
                          # Set relevant columns (e.g., location columns if PIR has them) to 1
                          # Assuming PIR columns are location names
                          location_cols = [col for col in df.columns if col in SPHERE_LOCATIONS]
                          if location_cols:
                               # Use the 'name' column from PIR data if it indicates location
                               if 'name' in row.index and row['name'] in location_cols:
                                    resampled_df_pir.loc[active_indices, row['name']] = 1
                               else: # If no 'name' or name mismatch, maybe set all location cols? Risky.
                                    # resampled_df_pir.loc[active_indices, location_cols] = 1 # Or handle differently
                                    pass # For now, only mark if name matches a location column
                          else: # If PIR data doesn't have location columns, what to do? Mark a generic 'pir_active'?
                               # resampled_df_pir['pir_active'] = 0 # Add column if needed
                               # resampled_df_pir.loc[active_indices, 'pir_active'] = 1
                               pass # Skip if structure unknown

                     resampled_df = resampled_df_pir.fillna(0) # Fill any NaNs from creation
                else:
                     print(f"Warning: Cannot resample PIR data for {sensor_type.name} without 'end' column. Skipping.")
                     continue
            else:
                # Resample others: take the mean
                resampled_df = df.resample(resample_freq).mean()

             # Add suffix to column names
            resampled_df.columns = [f"{col}_{sensor_type.name}" for col in resampled_df.columns]
            processed_streams[sensor_type] = resampled_df
            print(f"    Resampled {sensor_type.name} to {len(resampled_df)} rows.")

        except Exception as e:
            print(f"Error resampling {sensor_type.name}: {e}")
            traceback.print_exc()

    # Merge the resampled streams
    print("  Merging streams...")
    first_stream = True
    for sensor_type, resampled_df in processed_streams.items():
        if first_stream:
            combined_df = resampled_df
            first_stream = False
        else:
            combined_df = pd.merge(combined_df, resampled_df, left_index=True, right_index=True, how='outer')
            print(f"    Merged {sensor_type.name}, shape now: {combined_df.shape}")

    # Interpolate or fill missing values
    if combined_df is not None and not combined_df.empty:
        print("  Forward-filling missing values...")
        combined_df = combined_df.ffill()
        combined_df = combined_df.bfill()
        combined_df = combined_df.fillna(0)
        print(f"  Final synchronized shape: {combined_df.shape}")
    elif combined_df is not None and combined_df.empty:
         print("Warning: Combined DataFrame is empty after merging.")
    else:
        print("Error: No data streams could be processed and merged.")
        return None

    return combined_df

def create_time_windows(sync_data, window_size_sec, overlap_sec):
    """
    Creates time-windowed chunks from synchronized data (single DataFrame with datetime index).
    Yields dictionaries containing data for each sensor type within the window.
    """
    if sync_data is None or sync_data.empty:
        print("Error: Cannot create windows from empty synchronized data.")
        return

    print(f"Creating time windows (size: {window_size_sec}s, overlap: {overlap_sec}s)...")
    window_delta = pd.Timedelta(seconds=window_size_sec)
    step_delta = pd.Timedelta(seconds=window_size_sec - overlap_sec)

    if step_delta.total_seconds() <= 0: print("Error: Overlap cannot be >= window size."); return

    start_time = sync_data.index.min()
    end_time = sync_data.index.max()
    current_time = start_time
    window_count = 0

    while current_time + window_delta <= end_time:
        window_end = current_time + window_delta
        window_df = sync_data[(sync_data.index >= current_time) & (sync_data.index < window_end)]

        if window_df.empty: current_time += step_delta; continue

        # Split the combined window_df back into chunks per original sensor type
        window_chunks = {}
        for sensor_type in SensorType:
            sensor_suffix = f"_{sensor_type.name}"
            # Handle PIR specifically if its columns aren't locations but 'name' etc.
            if sensor_type == SensorType.PIR:
                 # Find columns that were originally from PIR (use suffix)
                 pir_cols_with_suffix = [col for col in window_df.columns if col.endswith(sensor_suffix)]
                 if pir_cols_with_suffix:
                     sensor_chunk = window_df[pir_cols_with_suffix].copy()
                     # Rename columns back (e.g., 'bath_PIR' -> 'bath')
                     # This assumes the resampled PIR df has location names as columns
                     sensor_chunk.columns = [col.replace(sensor_suffix, '') for col in pir_cols_with_suffix]
                     # Ensure all location columns exist, fill missing with 0
                     for loc in SPHERE_LOCATIONS:
                          if loc not in sensor_chunk.columns:
                               sensor_chunk[loc] = 0
                     window_chunks[sensor_type] = sensor_chunk[SPHERE_LOCATIONS] # Keep only location columns
                 # else: PIR data wasn't successfully merged/resampled

            else: # Handle other sensor types
                sensor_cols = [col for col in window_df.columns if col.endswith(sensor_suffix)]
                if sensor_cols:
                    sensor_chunk = window_df[sensor_cols].copy()
                    sensor_chunk.columns = [col.replace(sensor_suffix, '') for col in sensor_cols]
                    window_chunks[sensor_type] = sensor_chunk

        yield {'timestamp': window_end, 'data': window_chunks}
        window_count += 1
        current_time += step_delta

    print(f"Generated {window_count} windows.")


# --- Agent Class (Steps 2, 3, 4, 9) ---

class Agent:
    """An agent with unique sensory capabilities analyzing sensor data."""

    def __init__(self, agent_id, sensor_type, feature_extractors=None, learning_rate=0.1, history_len=10):
        self.id = agent_id
        self.sensor_type = sensor_type
        self.learning_rate = learning_rate
        self.history = deque(maxlen=history_len)

        self.confidence = 0.5
        self.activity_accuracy = {act: 0.5 for act in SPHERE_ACTIVITY_NAMES}
        self.knowledge = {act: 1.0 / len(SPHERE_ACTIVITY_NAMES) for act in SPHERE_ACTIVITY_NAMES}
        self.activity_specialization = {act: 0.5 for act in SPHERE_ACTIVITY_NAMES}
        self.transition_model = {}

        self.feature_extractors = feature_extractors or self._default_feature_extractors()
        self.last_timestamp = None

    def _default_feature_extractors(self):
        """Define default feature extractors based on sensor type (Step 3)"""
        global RESAMPLE_FREQ
        try: sampling_rate = 1.0 / pd.Timedelta(RESAMPLE_FREQ).total_seconds()
        except NameError: sampling_rate = 20.0; print(f"Warning: Assuming sampling rate {sampling_rate} Hz.")

        if self.sensor_type == SensorType.ACCELEROMETER:
            return {
                'mean': lambda x: np.nanmean(x, axis=0) if x.shape[0] > 0 else np.zeros(3),
                'std': lambda x: np.nanstd(x, axis=0) if x.shape[0] > 0 else np.zeros(3),
                'max': lambda x: np.nanmax(x, axis=0) if x.shape[0] > 0 else np.zeros(3),
                'min': lambda x: np.nanmin(x, axis=0) if x.shape[0] > 0 else np.zeros(3),
                'range': lambda x: np.ptp(x[~np.isnan(x).any(axis=1)], axis=0) if x.shape[0] > 0 and np.sum(~np.isnan(x)) > 0 else np.zeros(3),
                'median': lambda x: np.nanmedian(x, axis=0) if x.shape[0] > 0 else np.zeros(3),
                'energy': lambda x: np.sum(np.square(x[~np.isnan(x).any(axis=1)]), axis=0) / x.shape[0] if x.shape[0] > 0 else np.zeros(3),
                'iqr': lambda x: np.subtract(*np.nanpercentile(x[~np.isnan(x).any(axis=1)], [75, 25], axis=0)) if x.shape[0] > 0 and np.sum(~np.isnan(x)) > 1 else np.zeros(3),
                'fft_energy': lambda x: self._calculate_fft_energy(x, sampling_rate) if x.shape[0] > 1 else np.zeros(3),
                'fft_peak_freq': lambda x: self._calculate_fft_peak_freq(x, sampling_rate) if x.shape[0] > 1 else np.zeros(3),
            }
        elif self.sensor_type == SensorType.PIR:
             # Assumes input 'x' to lambdas will have columns matching SPHERE_LOCATIONS
             # after processing in create_time_windows
             return {
                'sum': lambda x: np.nansum(x.values, axis=0) if x.shape[0] > 0 else np.zeros(len(SPHERE_LOCATIONS)),
                'any': lambda x: np.nanmax(x.values, axis=0).astype(int) if x.shape[0] > 0 else np.zeros(len(SPHERE_LOCATIONS)),
                'count': lambda x: np.nansum(x.values > 0, axis=0) if x.shape[0] > 0 else np.zeros(len(SPHERE_LOCATIONS)),
            }
        elif self.sensor_type == SensorType.RSSI:
             num_aps_init = 4
             return {
                'mean': lambda x: np.nanmean(x, axis=0) if x.shape[0] > 0 else np.full(num_aps_init, -100.0),
                'std': lambda x: np.nanstd(x, axis=0) if x.shape[0] > 0 else np.zeros(num_aps_init),
                'max': lambda x: np.nanmax(x, axis=0) if x.shape[0] > 0 and np.sum(~np.isnan(x)) > 0 else np.full(num_aps_init, -100.0),
                'min': lambda x: np.nanmin(x, axis=0) if x.shape[0] > 0 and np.sum(~np.isnan(x)) > 0 else np.full(num_aps_init, -100.0),
                'diff': lambda x: np.nanmax(x, axis=0) - np.nanmin(x, axis=0) if x.shape[0] > 0 and np.sum(~np.isnan(x)) > 1 else np.zeros(num_aps_init),
                'stability': lambda x: 1.0 / (1.0 + np.nanstd(x, axis=0)) if x.shape[0] > 0 else np.zeros(num_aps_init)
            }
        elif self.sensor_type == SensorType.VIDEO:
             return {
                'center_mean': lambda x: np.nanmean(x[['centre_2d_x', 'centre_2d_y']].values, axis=0) if x.shape[0] > 0 else np.zeros(2),
                'center_std': lambda x: np.nanstd(x[['centre_2d_x', 'centre_2d_y']].values, axis=0) if x.shape[0] > 0 else np.zeros(2),
                'area_mean': lambda x: self._calculate_area(x, 'mean') if x.shape[0] > 0 else np.zeros(1),
                'area_std': lambda x: self._calculate_area(x, 'std') if x.shape[0] > 0 else np.zeros(1),
                'aspect_ratio_mean': lambda x: self._calculate_aspect_ratio(x, 'mean') if x.shape[0] > 0 else np.zeros(1),
                'movement_mean': lambda x: self._calculate_movement(x, 'mean') if x.shape[0] > 1 else np.zeros(1),
                'height_mean': lambda x: self._calculate_height(x, 'mean') if x.shape[0] > 0 else np.zeros(1),
                'y_center_mean': lambda x: np.nanmean(x['centre_2d_y'].values) if x.shape[0] > 0 else np.zeros(1),
            }
        else: return {'identity': lambda x: x}

    # --- Feature Calculation Helpers ---
    # (Methods _calculate_fft_energy, _calculate_fft_peak_freq, _calculate_area,
    #  _calculate_aspect_ratio, _calculate_movement, _calculate_height remain the same)
    def _calculate_fft_energy(self, data, sampling_rate):
        energies = []
        n_samples = data.shape[0]
        if n_samples == 0: return np.zeros(data.shape[1])
        for i in range(data.shape[1]):
            col_data = data[:, i]; valid_data = col_data[~np.isnan(col_data)]; n_valid = len(valid_data)
            if n_valid < 2: energies.append(0); continue
            fft_values = rfft(valid_data); fft_power = np.abs(fft_values)**2 / n_valid; energies.append(np.sum(fft_power))
        return np.array(energies)

    def _calculate_fft_peak_freq(self, data, sampling_rate):
        peak_freqs = []
        n_samples = data.shape[0]
        if n_samples == 0: return np.zeros(data.shape[1])
        for i in range(data.shape[1]):
            col_data = data[:, i]; valid_data = col_data[~np.isnan(col_data)]; n_valid = len(valid_data)
            if n_valid < 2 or sampling_rate <= 0: peak_freqs.append(0); continue
            freqs = rfftfreq(n_valid, 1.0 / sampling_rate); fft_values = rfft(valid_data); fft_power = np.abs(fft_values)**2
            valid_freq_indices = np.arange(len(freqs))
            if len(fft_power) > 1 :
                 peak_index_rel = np.argmax(fft_power[1:]); peak_index_abs = valid_freq_indices[1:][peak_index_rel]
                 peak_freqs.append(freqs[peak_index_abs])
            else: peak_freqs.append(0)
        return np.array(peak_freqs)

    def _calculate_area(self, x, stat='mean'):
        required_cols = ['bb_2d_br_x', 'bb_2d_br_y', 'bb_2d_tl_x', 'bb_2d_tl_y']
        if not all(col in x.columns for col in required_cols): return np.zeros(1)
        width = x['bb_2d_br_x'] - x['bb_2d_tl_x']; height = x['bb_2d_br_y'] - x['bb_2d_tl_y']; area = width * height
        if area.isnull().all(): return np.zeros(1)
        if stat == 'mean': return np.array([np.nanmean(area)])
        if stat == 'std': return np.array([np.nanstd(area)])
        return np.zeros(1)

    def _calculate_aspect_ratio(self, x, stat='mean'):
        required_cols = ['bb_2d_br_x', 'bb_2d_br_y', 'bb_2d_tl_x', 'bb_2d_tl_y']
        if not all(col in x.columns for col in required_cols): return np.zeros(1)
        width = x['bb_2d_br_x'] - x['bb_2d_tl_x']; height = x['bb_2d_br_y'] - x['bb_2d_tl_y']
        valid_mask = (height != 0) & (~height.isnull()) & (~width.isnull())
        if not valid_mask.any(): return np.zeros(1)
        ratio = pd.Series(np.nan, index=x.index); ratio.loc[valid_mask] = width.loc[valid_mask] / height.loc[valid_mask]
        if ratio.isnull().all(): return np.zeros(1)
        if stat == 'mean': return np.array([np.nanmean(ratio)])
        if stat == 'std': return np.array([np.nanstd(ratio)])
        return np.zeros(1)

    def _calculate_movement(self, x, stat='mean'):
        required_cols = ['centre_2d_x', 'centre_2d_y']
        if not all(col in x.columns for col in required_cols) or x.shape[0] < 2: return np.zeros(1)
        center_x = x['centre_2d_x']; center_y = x['centre_2d_y']
        diff_x = center_x - center_x.shift(1); diff_y = center_y - center_y.shift(1)
        movement = np.sqrt(diff_x**2 + diff_y**2)
        if movement.isnull().all(): return np.zeros(1)
        if stat == 'mean': return np.array([np.nanmean(movement)])
        if stat == 'std': return np.array([np.nanstd(movement)])
        return np.zeros(1)

    def _calculate_height(self, x, stat='mean'):
        required_cols = ['bb_2d_br_y', 'bb_2d_tl_y']
        if not all(col in x.columns for col in required_cols): return np.zeros(1)
        height = x['bb_2d_br_y'] - x['bb_2d_tl_y']
        if height.isnull().all(): return np.zeros(1)
        if stat == 'mean': return np.array([np.nanmean(height)])
        if stat == 'std': return np.array([np.nanstd(height)])
        return np.zeros(1)

    # --- Core Processing Logic ---
    def process_reading(self, timestamp, data_chunk, ground_truth_activity=None):
        """Process a chunk of sensor data, update knowledge, return interpretation."""
        self.last_timestamp = timestamp

        if data_chunk is None or (isinstance(data_chunk, pd.DataFrame) and data_chunk.empty):
             default_scores = {act: 1.0 / len(SPHERE_ACTIVITY_NAMES) for act in SPHERE_ACTIVITY_NAMES}
             return {'agent_id': self.id, 'timestamp': timestamp, 'activity_scores': default_scores, 'confidence': 0.1, 'prediction': None, 'features': None, 'error': 'Missing data'}

        features = self._extract_features(data_chunk)
        if not features:
             default_scores = {act: 1.0 / len(SPHERE_ACTIVITY_NAMES) for act in SPHERE_ACTIVITY_NAMES}
             return {'agent_id': self.id, 'timestamp': timestamp, 'activity_scores': default_scores, 'confidence': 0.1, 'prediction': None, 'features': features, 'error': 'Feature extraction failed'}

        activity_scores = self._determine_activity_scores(features)

        if not activity_scores: predicted_activity = None
        else:
            scores_with_noise = {k: v + np.random.rand()*1e-9 for k, v in activity_scores.items()}
            predicted_activity = max(scores_with_noise, key=scores_with_noise.get)

        if ground_truth_activity is not None:
            self.update_knowledge(features, activity_scores, predicted_activity, ground_truth_activity)

        self.history.append((timestamp, features, activity_scores, predicted_activity))

        return {
            'agent_id': self.id, 'timestamp': timestamp, 'sensor_type': self.sensor_type,
            'activity_scores': activity_scores, 'confidence': self.confidence,
            'activity_specialization': self.activity_specialization,
            'prediction': predicted_activity, 'features': features
        }

    def _extract_features(self, data_chunk):
        """Extract features from data based on sensor type. Assumes column names are standardized."""
        features = {}
        required_columns_present = True

        try:
            if self.sensor_type == SensorType.ACCELEROMETER:
                 required_cols = ['x', 'y', 'z']
                 if all(col in data_chunk.columns for col in required_cols):
                     accel_data = data_chunk[required_cols].values
                     for name, extractor in self.feature_extractors.items(): features[name] = extractor(accel_data)
                 else: required_columns_present = False
            elif self.sensor_type == SensorType.PIR:
                 required_cols = SPHERE_LOCATIONS # Expects columns named like locations
                 if all(col in data_chunk.columns for col in required_cols):
                    pir_data = data_chunk[required_cols] # Keep as DataFrame for lambda
                    for name, extractor in self.feature_extractors.items(): features[name] = extractor(pir_data)
                 else: required_columns_present = False
            elif self.sensor_type == SensorType.RSSI:
                 rssi_columns = [col for col in data_chunk.columns if col.startswith('AP') or 'rssi' in col.lower()]
                 if rssi_columns and all(col in data_chunk.columns for col in rssi_columns):
                     rssi_data = data_chunk[rssi_columns].apply(pd.to_numeric, errors='coerce').values
                     self.feature_extractors = self._update_num_aps(rssi_data.shape[1])
                     for name, extractor in self.feature_extractors.items(): features[name] = extractor(rssi_data)
                 else: required_columns_present = False
            elif self.sensor_type == SensorType.VIDEO:
                 required_cols = ['centre_2d_x', 'centre_2d_y', 'bb_2d_br_x', 'bb_2d_br_y', 'bb_2d_tl_x', 'bb_2d_tl_y']
                 if all(col in data_chunk.columns for col in required_cols):
                     video_data = data_chunk[required_cols].apply(pd.to_numeric, errors='coerce')
                     for name, extractor in self.feature_extractors.items(): features[name] = extractor(video_data)
                 else: required_columns_present = False

            if not required_columns_present: return {}

            # Flatten feature dictionary
            flat_features = {}
            for name, value in features.items():
                if isinstance(value, (np.ndarray, list)):
                    valid_values = value[~np.isnan(value)] if isinstance(value, np.ndarray) else [v for v in value if pd.notna(v)]
                    if len(valid_values) > 0:
                        if len(value) > 1:
                            for i, v in enumerate(value): flat_features[f"{name}_{i}"] = v if pd.notna(v) else 0
                        elif len(value) == 1: flat_features[name] = value[0] if pd.notna(value[0]) else 0
                elif pd.notna(value): flat_features[name] = value
            return flat_features

        except Exception as e:
            print(f"Error extracting features for Agent {self.id} ({self.sensor_type}): {e}")
            traceback.print_exc()
            return {}

    def _update_num_aps(self, num_found):
        """Helper to update RSSI feature lambdas if num_aps differs."""
        if self.sensor_type == SensorType.RSSI:
             num_aps = num_found
             try: current_lambda_dim = len(self.feature_extractors['mean'](np.array([[]])))
             except: current_lambda_dim = -1
             if num_aps != current_lambda_dim and num_aps > 0: # Add check for num_aps > 0
                 print(f"  Agent {self.id}: Updating RSSI feature extractors for {num_aps} APs.")
                 return {
                    'mean': lambda x: np.nanmean(x, axis=0) if x.shape[0] > 0 else np.full(num_aps, -100.0),
                    'std': lambda x: np.nanstd(x, axis=0) if x.shape[0] > 0 else np.zeros(num_aps),
                    'max': lambda x: np.nanmax(x, axis=0) if x.shape[0] > 0 and np.sum(~np.isnan(x)) > 0 else np.full(num_aps, -100.0),
                    'min': lambda x: np.nanmin(x, axis=0) if x.shape[0] > 0 and np.sum(~np.isnan(x)) > 0 else np.full(num_aps, -100.0),
                    'diff': lambda x: np.nanmax(x, axis=0) - np.nanmin(x, axis=0) if x.shape[0] > 0 and np.sum(~np.isnan(x)) > 1 else np.zeros(num_aps),
                    'stability': lambda x: 1.0 / (1.0 + np.nanstd(x, axis=0)) if x.shape[0] > 0 else np.zeros(num_aps)
                }
        return self.feature_extractors

    # --- CORRECTED INDENTATION & REFINED SCORING LOGIC ---
    def _determine_activity_scores(self, features):
        """Determine confidence scores for each activity based on extracted features."""
        if not features: return {act: 1.0 / len(SPHERE_ACTIVITY_NAMES) for act in SPHERE_ACTIVITY_NAMES}
        scores = self.knowledge.copy(); base_multiplier = 1.0
        try:
            if self.sensor_type == SensorType.ACCELEROMETER:
                std_sum=features.get('std_0',0)+features.get('std_1',0)+features.get('std_2',0); energy_sum=features.get('energy_0',0)+features.get('energy_1',0)+features.get('energy_2',0)
                if std_sum > 1.5 or energy_sum > 1.0:
                    for act, type in SPHERE_ACTIVITIES.items():
                        if type == ActivityType.AMBULATION: scores[act] *= (base_multiplier * 1.5)
                        elif type == ActivityType.TRANSITION: scores[act] *= (base_multiplier * 1.2)
                        elif type == ActivityType.POSTURE: scores[act] *= (base_multiplier * 0.5)
                    if features.get('fft_peak_freq_2',0)>1.5 and features.get('range_2',0)>5.0: scores['a_jump'] *= 1.5
                    elif features.get('std_0',0)>1.0 or features.get('std_1',0)>1.0: scores['a_walk'] *= 1.2; scores['a_loadwalk'] *= 1.2
                elif std_sum < 0.3 and energy_sum < 0.2:
                     for act, type in SPHERE_ACTIVITIES.items():
                        if type == ActivityType.POSTURE: scores[act] *= (base_multiplier * 1.5)
                        elif type == ActivityType.AMBULATION: scores[act] *= (base_multiplier * 0.5)
                     mean_z=features.get('mean_2',0); mean_y=features.get('mean_1',0); mean_x=features.get('mean_0',0)
                     mag=np.sqrt(mean_x**2+mean_y**2+mean_z**2); norm_x,norm_y,norm_z=(mean_x/mag,mean_y/mag,mean_z/mag) if mag>1e-6 else (0,0,0)
                     if norm_z > 0.8: scores['p_stand'] *= 1.4
                     elif abs(norm_x)>0.7 or abs(norm_y)>0.7 : scores['p_lie'] *= 1.4
                     else: scores['p_sit'] *= 1.3
                else:
                    for act, type in SPHERE_ACTIVITIES.items():
                         if type == ActivityType.TRANSITION: scores[act] *= (base_multiplier * 1.4)
                         elif type == ActivityType.POSTURE: scores[act] *= (base_multiplier * 0.8)
                         elif type == ActivityType.AMBULATION: scores[act] *= (base_multiplier * 0.8)
            elif self.sensor_type == SensorType.PIR:
                total_count=sum(features.get(f'count_{i}',0) for i in range(len(SPHERE_LOCATIONS))); num_active_zones=sum(features.get(f'any_{i}',0) for i in range(len(SPHERE_LOCATIONS)))
                if total_count > 3 or num_active_zones > 1:
                    for act, type in SPHERE_ACTIVITIES.items():
                         if type == ActivityType.AMBULATION: scores[act] *= (base_multiplier * 1.4)
                         elif type == ActivityType.POSTURE: scores[act] *= (base_multiplier * 0.6)
                elif total_count == 0:
                     for act, type in SPHERE_ACTIVITIES.items():
                         if type == ActivityType.POSTURE: scores[act] *= (base_multiplier * 1.2)
                         elif type == ActivityType.AMBULATION: scores[act] *= (base_multiplier * 0.8)
            elif self.sensor_type == SensorType.RSSI:
                num_aps=len([k for k in features if k.startswith('std_')]); avg_std=np.mean([features.get(f'std_{i}',100) for i in range(num_aps)]) if num_aps > 0 else 100
                if avg_std > 3.0:
                     for act, type in SPHERE_ACTIVITIES.items():
                        if type == ActivityType.AMBULATION: scores[act] *= (base_multiplier * 1.3)
                        elif type == ActivityType.POSTURE: scores[act] *= (base_multiplier * 0.7)
                elif avg_std < 1.0:
                     for act, type in SPHERE_ACTIVITIES.items():
                        if type == ActivityType.POSTURE: scores[act] *= (base_multiplier * 1.3)
                        elif type == ActivityType.AMBULATION: scores[act] *= (base_multiplier * 0.7)
            elif self.sensor_type == SensorType.VIDEO:
                aspect=features.get('aspect_ratio_mean',1.0); move=features.get('movement_mean',0.0); height=features.get('height_mean',0.0); y_center=features.get('y_center_mean',0.0)
                MV_HI=15.0; MV_LO=3.0; ASP_TALL=0.6; ASP_WIDE=1.3; H_TALL=150; H_SHORT=70; Y_LOW=300
                if move > MV_HI:
                     for act, type in SPHERE_ACTIVITIES.items():
                        if type == ActivityType.AMBULATION: scores[act] *= (base_multiplier * 1.4)
                        elif type == ActivityType.TRANSITION: scores[act] *= (base_multiplier * 1.2)
                        elif type == ActivityType.POSTURE: scores[act] *= (base_multiplier * 0.6)
                elif move < MV_LO:
                    for act, type in SPHERE_ACTIVITIES.items():
                        if type == ActivityType.POSTURE: scores[act] *= (base_multiplier * 1.4)
                        elif type == ActivityType.AMBULATION: scores[act] *= (base_multiplier * 0.6)
                    if aspect < ASP_TALL and height > H_TALL: scores['p_stand'] *= 1.5
                    elif aspect > ASP_WIDE and y_center > Y_LOW: scores['p_lie'] *= 1.5
                    elif height < H_SHORT and aspect > 0.8: scores['p_squat'] *= 1.2; scores['p_kneel'] *= 1.2
                    else: scores['p_sit'] *= 1.3; scores['p_bent'] *= 1.2
        except KeyError as e: print(f"Warning: Agent {self.id} missing feature key during scoring: {e}")
        except Exception as e: print(f"Error during scoring for Agent {self.id}: {e}"); traceback.print_exc()
        if self.history:
            last_pred=self.history[-1][3]
            if last_pred is not None and last_pred in scores: scores[last_pred] *= (1.0 + 0.1)
        total=sum(scores.values())
        if total > 1e-9:
            norm_scores={act: max(0, s / total) for act, s in scores.items()}; final_total=sum(norm_scores.values())
            if final_total > 1e-9: norm_scores = {act: s / final_total for act, s in norm_scores.items()}
            else: norm_scores = {act: 1.0 / len(SPHERE_ACTIVITY_NAMES) for act in SPHERE_ACTIVITY_NAMES}
        else: norm_scores = {act: 1.0 / len(SPHERE_ACTIVITY_NAMES) for act in SPHERE_ACTIVITY_NAMES}
        return norm_scores

    def update_knowledge(self, features, activity_scores, prediction, ground_truth):
        """Update agent's confidence, knowledge, and specialization."""
        alpha = self.learning_rate
        if prediction == ground_truth:
            self.confidence=(1-alpha)*self.confidence+alpha*1.0; self.activity_accuracy[prediction]=(1-alpha)*self.activity_accuracy[prediction]+alpha*1.0; self.activity_specialization[prediction]=(1-alpha)*self.activity_specialization[prediction]+alpha*1.0
        else:
            self.confidence=(1-alpha)*self.confidence+alpha*0.0
            if prediction is not None: self.activity_accuracy[prediction]=(1-alpha)*self.activity_accuracy[prediction]+alpha*0.0; self.activity_specialization[prediction]=max(0.01,(1-alpha)*self.activity_specialization[prediction]+alpha*0.3)
            self.activity_accuracy[ground_truth]=(1-alpha)*self.activity_accuracy[ground_truth]+alpha*0.3; self.activity_specialization[ground_truth]=max(0.01,(1-alpha)*self.activity_specialization[ground_truth]+alpha*0.4)
        beta=alpha*0.5; current_knowledge_sum=sum(self.knowledge.values())
        for act, score in activity_scores.items(): self.knowledge[act]=(1-beta)*self.knowledge[act]+beta*score*current_knowledge_sum
        total_knowledge=sum(self.knowledge.values())
        if total_knowledge > 1e-9: self.knowledge = {act: k / total_knowledge for act, k in self.knowledge.items()}

# --- SwarmQueen Integration System ---
class SwarmQueen:
    """Manages the swarm of agents and integrates their outputs."""
    def __init__(self, agents, activity_names, location_names):
        self.agents={agent.id: agent for agent in agents}; self.activity_names=activity_names; self.location_names=location_names; self.num_activities=len(activity_names); self.system_history=deque(maxlen=20)
        self.location_activity_prob=self._initialize_location_activity_map(); self.confusion_matrix=pd.DataFrame(np.zeros((self.num_activities,self.num_activities)),index=activity_names,columns=activity_names)
        self.fusion_confidence_weight=1.0; self.fusion_specialization_weight=1.0; self.temporal_inertia_boost=1.1

    def _initialize_location_activity_map(self):
        loc_act_map={loc: {act: 1.0/self.num_activities for act in self.activity_names} for loc in self.location_names+['UNKNOWN']}; base_prob=1.0/self.num_activities
        def update_probs(location, activity_factors):
            if location not in loc_act_map: return
            for act, factor in activity_factors.items():
                 if act in loc_act_map[location]: loc_act_map[location][act] *= factor
            total_prob=sum(loc_act_map[location].values())
            if total_prob > 1e-9: loc_act_map[location]={act: p/total_prob for act, p in loc_act_map[location].items()}
            else: loc_act_map[location]={act: base_prob for act in self.activity_names}
        update_probs('living',{'p_sit':4,'p_lie':2,'a_walk':1.5,'t_sit_stand':1.5,'t_stand_sit':1.5}); update_probs('bed1',{'p_lie':5,'p_sit':2,'t_lie_sit':2,'t_sit_lie':2}); update_probs('bed2',{'p_lie':5,'p_sit':2,'t_lie_sit':2,'t_sit_lie':2})
        update_probs('kitchen',{'p_stand':3,'a_walk':2,'p_bent':1.5}); update_probs('bath',{'p_stand':3,'a_walk':1.5,'p_bent':1.5}); update_probs('toilet',{'p_sit':5,'p_stand':1.5,'t_sit_stand':1.5,'t_stand_sit':1.5})
        update_probs('stairs',{'a_ascend':6,'a_descend':6,'p_stand':1.5,'a_walk':1.2}); update_probs('hall',{'a_walk':4,'p_stand':2}); update_probs('study',{'p_sit':5,'p_stand':1.5,'t_sit_stand':1.2,'t_stand_sit':1.2})
        loc_act_map['UNKNOWN']={act: base_prob for act in self.activity_names}; return loc_act_map

    def process_system_reading(self, timestamp, data_chunks_by_sensortype, ground_truth_activity=None, ground_truth_location=None):
        agent_outputs={}; agent_locations={}
        for agent_id, agent in self.agents.items():
            data_chunk=data_chunks_by_sensortype.get(agent.sensor_type,None); gt_for_agent=ground_truth_activity
            output=agent.process_reading(timestamp,data_chunk,ground_truth_activity=gt_for_agent); agent_outputs[agent_id]=output
            if output and 'error' not in output:
                if agent.sensor_type==SensorType.PIR and output.get('features'):
                    pir_any=[output['features'].get(f'any_{i}',0) for i in range(len(self.location_names))]
                    if sum(pir_any)==1: loc_idx=np.argmax(pir_any); agent_locations[agent_id]={'location':self.location_names[loc_idx],'confidence':agent.confidence}
        fused_scores=self._weighted_fusion(agent_outputs); inferred_location=self._determine_location(agent_locations,ground_truth_location)
        contextual_scores=self._apply_location_context(fused_scores,inferred_location); final_scores=self._apply_temporal_consistency(contextual_scores)
        if not final_scores: final_prediction=None
        else: scores_with_noise={k:v+np.random.rand()*1e-9 for k,v in final_scores.items()}; final_prediction=max(scores_with_noise,key=scores_with_noise.get)
        self.system_history.append((timestamp,final_prediction,inferred_location,final_scores))
        if ground_truth_activity is not None and final_prediction is not None: self.update_confusion_matrix(ground_truth_activity,final_prediction)
        return {'timestamp':timestamp,'final_prediction':final_prediction,'final_scores':final_scores,'inferred_location':inferred_location,'agent_outputs':agent_outputs}

    def _determine_location(self, agent_locations, ground_truth_location=None):
         if not agent_locations: return "UNKNOWN"
         location_votes=defaultdict(float); total_confidence=0.0
         for agent_id, loc_info in agent_locations.items():
             conf=loc_info.get('confidence',0.1); loc=loc_info.get('location','UNKNOWN')
             if loc != 'UNKNOWN': location_votes[loc]+=conf; total_confidence+=conf
         if not location_votes or total_confidence < 0.1:
            if self.system_history: last_loc=self.system_history[-1][2]; return last_loc if last_loc != "UNKNOWN" else "UNKNOWN"
            return "UNKNOWN"
         return max(location_votes,key=location_votes.get)

    def _weighted_fusion(self, agent_outputs):
        fused_scores={act:0.0 for act in self.activity_names}; total_weight={act:1e-9 for act in self.activity_names}
        for agent_id, output in agent_outputs.items():
            if not output or 'error' in output or not output.get('activity_scores'): continue
            agent_conf=output.get('confidence',0.1); agent_spec=output.get('activity_specialization',{act:0.5 for act in self.activity_names}); agent_scores=output['activity_scores']
            for activity, score in agent_scores.items():
                if activity not in fused_scores: continue
                conf_term=max(0.01,agent_conf)**self.fusion_confidence_weight; spec_term=max(0.01,agent_spec.get(activity,0.5))**self.fusion_specialization_weight; weight=conf_term*spec_term
                fused_scores[activity]+=score*weight; total_weight[activity]+=weight
        for activity in fused_scores: fused_scores[activity]/=total_weight[activity]
        final_total=sum(fused_scores.values())
        if final_total > 1e-9: fused_scores={act:s/final_total for act,s in fused_scores.items()}
        else: fused_scores={act:1.0/self.num_activities for act in self.activity_names}
        return fused_scores

    def _apply_location_context(self, scores, location):
        if location not in self.location_activity_prob: location="UNKNOWN"
        contextual_scores={}; loc_probs=self.location_activity_prob[location]
        for activity, score in scores.items(): prior_prob=loc_probs.get(activity,1.0/self.num_activities); contextual_scores[activity]=score*prior_prob
        total_score=sum(contextual_scores.values())
        if total_score > 1e-9: contextual_scores={act:s/total_score for act,s in contextual_scores.items()}
        else: contextual_scores={act:1.0/self.num_activities for act in self.activity_names}
        return contextual_scores

    def _apply_temporal_consistency(self, current_scores):
        if not self.system_history: return current_scores
        last_prediction=self.system_history[-1][1]; smoothed_scores=current_scores.copy()
        if last_prediction is not None and last_prediction in smoothed_scores: smoothed_scores[last_prediction]*=self.temporal_inertia_boost
        total_score=sum(smoothed_scores.values())
        if total_score > 1e-9: smoothed_scores={act:s/total_score for act,s in smoothed_scores.items()}
        else: smoothed_scores={act:1.0/self.num_activities for act in self.activity_names}
        return smoothed_scores

    def update_confusion_matrix(self, ground_truth, prediction):
        if ground_truth in self.confusion_matrix.index and prediction in self.confusion_matrix.columns: self.confusion_matrix.loc[ground_truth,prediction]+=1

    def get_confusion_matrix(self): return self.confusion_matrix

# --- Main Execution Logic ---
if __name__ == "__main__":
    print("Initializing SPHERE Swarm Intelligence System...")
    # San Diego Time: Monday, March 31, 2025 at 4:25 AM PDT
    print(f"Current Time: {time.strftime('%Y-%m-%d %H:%M:%S')}")

    # --- Configuration ---
    DATA_PATH = "/content/SPHERE_unzipped" # <<< --- VERIFY YOUR UNZIPPED DATA PATH --- <<<
    TARGET_SUBJECT = "00001" # <<<--- USE CORRECT SUBJECT ID (e.g., "00001") ---<<<
    WINDOW_SIZE_SEC = 2.0
    OVERLAP_SEC = 1.0
    RESAMPLE_FREQ = '50ms' # Corresponds to 20 Hz

    # --- Initialize Agents ---
    agents_to_use = [
        Agent(agent_id="Accel_01", sensor_type=SensorType.ACCELEROMETER, learning_rate=0.05, history_len=10),
        Agent(agent_id="PIR_01", sensor_type=SensorType.PIR, learning_rate=0.1, history_len=5),
    ]
    print(f"Initialized {len(agents_to_use)} agents.")

    # --- Initialize SwarmQueen ---
    queen = SwarmQueen(agents_to_use, SPHERE_ACTIVITY_NAMES, SPHERE_LOCATIONS)
    print("Initialized SwarmQueen.")

    # --- Data Loading ---
    print("\n--- Starting Data Loading and Processing ---")
    print(f"Target Subject: {TARGET_SUBJECT}")
    print(f"Attempting to load data from: {DATA_PATH}")
    accel_data = load_sphere_data(DATA_PATH, TARGET_SUBJECT, 'accelerometer')
    pir_data = load_sphere_data(DATA_PATH, TARGET_SUBJECT, 'pir')
    annotations = load_sphere_data(DATA_PATH, TARGET_SUBJECT, 'annotations')

    # --- Robustness Check ---
    if accel_data is None or annotations is None:
         print("\n--- ERROR: Failed to load essential Accelerometer or Annotation data. Stopping. ---")
    else:
        print("\n--- Essential data (Accel, Annotations) loaded successfully. Proceeding... ---")
        if pir_data is None: print("--- Warning: Failed to load PIR data correctly. Proceeding without it. ---")
        else: print("--- PIR data loaded successfully. ---")

        all_data_streams = { SensorType.ACCELEROMETER: accel_data, SensorType.PIR: pir_data }
        sync_data = synchronize_data(all_data_streams, resample_freq=RESAMPLE_FREQ)

        if sync_data is None or sync_data.empty: print("\n--- ERROR: Data synchronization failed. Stopping. ---")
        else:
            window_generator = create_time_windows(sync_data, WINDOW_SIZE_SEC, OVERLAP_SEC)
            print("Sorting annotations index...")
            if 'end' not in annotations.columns: print("Error: Annotations missing 'end' column."); exit()
            annotations = annotations.sort_index()

            # --- Processing Loop ---
            print("\n--- Processing Windows ---")
            all_predictions=[]; all_ground_truths=[]; window_count=0; start_processing_time=time.time()
            for window in window_generator:
                window_count+=1; window_end_time=window['timestamp']; window_data_chunks=window['data']
                ground_truth=None
                try: # Ground Truth Lookup
                    window_start_time=window_end_time-pd.Timedelta(seconds=WINDOW_SIZE_SEC); window_mid_point=window_start_time+(window_end_time-window_start_time)/2
                    matching_annotations=annotations.loc[(annotations.index<=window_mid_point)&(annotations['end']>window_mid_point)]
                    if not matching_annotations.empty:
                        matched_annotation=matching_annotations.iloc[0]
                        activity_col=next((col for col in ['activity','annotation','label'] if col in annotations.columns),None)
                        if activity_col: ground_truth=matched_annotation[activity_col]
                except Exception as e: print(f"Error looking up annotation: {e}"); traceback.print_exc()

                system_output=queen.process_system_reading(window_end_time,window_data_chunks,ground_truth_activity=ground_truth)
                if ground_truth is not None and ground_truth in SPHERE_ACTIVITY_NAMES: all_ground_truths.append(ground_truth); all_predictions.append(system_output['final_prediction'])
                if window_count%100==0: print(f"  Processed {window_count} windows... Time: {window_end_time.strftime('%H:%M:%S.%f')[:-3]} Pred: {system_output['final_prediction']} (GT: {ground_truth if ground_truth else 'N/A'}) Loc: {system_output['inferred_location']}")
            end_processing_time=time.time(); print(f"\n--- Finished processing {window_count} windows in {end_processing_time-start_processing_time:.2f} seconds ---")

            # --- Evaluation ---
            print("\n--- Evaluation Results ---"); final_confusion_matrix=queen.get_confusion_matrix(); print(f"Collected {len(all_ground_truths)} predictions with valid ground truth.")
            if len(all_ground_truths) > 0 and len(all_predictions) == len(all_ground_truths):
                accuracy=accuracy_score(all_ground_truths,all_predictions); print(f"\nOverall Accuracy: {accuracy:.4f}")
                unique_labels=sorted(list(set(all_ground_truths)|set(all_predictions))); unique_labels=[label for label in unique_labels if label is not None and label in SPHERE_ACTIVITY_NAMES]
                print("\nClassification Report:");
                try: report=classification_report(all_ground_truths,all_predictions,labels=unique_labels,zero_division=0); print(report)
                except ValueError as e: print(f"Could not generate classification report: {e}")
                print("\nFinal Confusion Matrix:"); cm_labels=sorted(list(set(all_ground_truths)|set(all_predictions))); cm_labels=[l for l in cm_labels if l is not None and l in final_confusion_matrix.index and l in final_confusion_matrix.columns]
                if cm_labels:
                    cm_display=final_confusion_matrix.loc[cm_labels,cm_labels]; print(cm_display)
                    try:
                        plt.figure(figsize=(12,10)); sns.heatmap(cm_display,annot=True,fmt=".0f",cmap="Blues",xticklabels=cm_labels,yticklabels=cm_labels); plt.title(f'System Confusion Matrix (Subject: {TARGET_SUBJECT})')
                        plt.ylabel('True Label'); plt.xlabel('Predicted Label'); plt.xticks(rotation=45,ha='right'); plt.yticks(rotation=0); plt.tight_layout()
                        plot_filename=f"confusion_matrix_{TARGET_SUBJECT}.png"; plt.savefig(plot_filename); print(f"\nConfusion matrix plot saved as {plot_filename}"); plt.close()
                    except Exception as plot_err: print(f"Error plotting confusion matrix: {plot_err}")
                else: print("\nCould not generate confusion matrix (no common valid labels found).")
            else:
                 if len(all_ground_truths)==0: print("\nEvaluation failed: No valid ground truth labels were found."); print("Check annotation loading and GT lookup logic.")
                 else: print("\nEvaluation failed: Length mismatch between predictions and ground truth lists.")

    print("\nSystem processing finished (or terminated due to loading/sync error).")



Initializing SPHERE Swarm Intelligence System...
Current Time: 2025-03-31 11:18:50
Initialized 2 agents.
Initialized SwarmQueen.

--- Starting Data Loading and Processing ---
Target Subject: 00001
Attempting to load data from: /content/SPHERE_unzipped
Attempting to load accelerometer for subject 00001 from base path: /content/SPHERE_unzipped
  Using subject data folder: /content/SPHERE_unzipped/train/00001
  Loading data from: /content/SPHERE_unzipped/train/00001/acceleration.csv
  Found timestamp column: 't'
  Converting timestamp column (assuming units are seconds)...
  ...Loaded 35710 rows.
Attempting to load pir for subject 00001 from base path: /content/SPHERE_unzipped
  Using subject data folder: /content/SPHERE_unzipped/train/00001
  Loading data from: /content/SPHERE_unzipped/train/00001/pir.csv
  Using 'start' column as timestamp index for pir.
  ...Processed 115 pir rows.
Attempting to load annotations for subject 00001 from base path: /content/SPHERE_unzipped
  Using subject