In [6]:
from experiments.block_cat.helpers import load_data
import numpy as np
import pandas as pd

def get_trial_category_labels(
    category_cues: np.ndarray,
    pres_categoryids: np.ndarray,
) -> list[str]:
    """
    Determine trial category labels ("block", "isolate", "control") based on 
    how many times the cued category appears in the presented categories.

    Args:
        category_cues (np.ndarray): shape [n_trials, 1], category ID cue for each trial
        pres_categoryids (np.ndarray): shape [n_trials, n_items], category ID for each study item

    Returns:
        list[str]: one label per trial
    """
    labels = []
    for cue_cat, pres_cats in zip(category_cues[:, 0], pres_categoryids):
        matches = np.sum(pres_cats == cue_cat)
        if matches == 0:
            label = "control"
        elif matches == 1:
            label = "isolate"
        else:
            label = "block"
        labels.append(label)
    return labels

def get_block_success_group(
    target_success: np.ndarray,
    cue_type: list[str],
    recalls: np.ndarray,
) -> list[int]:
    """
    Returns a trial-level feature:
    - 0 if not a successful block-cue targeting trial
    - 1 if first recalled item was studied in position 4-6
    - 2 if first recalled item was studied in position 10-12
    """

    labels = []
    for success, cue, recall_positions in zip(target_success[:, 0], cue_type, recalls):
        if not (success and cue == "block"):
            labels.append(0)
            continue

        first_pos = next((pos for pos in recall_positions if pos > 0), 0)
        if first_pos in [4, 5, 6]:
            labels.append(1)
        elif first_pos in [10, 11, 12]:
            labels.append(2)
        else:
            labels.append(0)

    return labels

In [2]:
embam_data_path = "experiments/block_cat/2025_04_10_block_cat.h5"
data = load_data(embam_data_path)

print(list(data.keys()))

['block', 'category_cues', 'condition', 'listLength', 'pres_categoryids', 'pres_itemids', 'pres_itemnos', 'rec_categoryids', 'rec_itemids', 'recalls', 'subject', 'target_success']


In [3]:
# precompute per-trial cue labels to reuse for both study and recall events
trial_count = data['recalls'].shape[0]

trial_labels = get_trial_category_labels(
    data["category_cues"],
    data["pres_categoryids"],
)
block_success_group = get_block_success_group(
    data["target_success"],
    trial_labels,
    data['recalls'],
)

print(block_success_group)

[1, 1, 2, 0, 1, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 0, 2, 2, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 2, 2, 2, 0, 0, 2, 2, 0, 2, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 2, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 2, 1, 2, 0, 0, 2, 0, 0, 1, 0, 1, 1, 0, 0, 1, 2, 1, 0, 2, 2, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 2, 2, 0, 2, 2, 0, 1, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 2, 2, 2, 1, 0, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 2, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 1, 1, 0, 0, 2, 0, 1, 0, 2, 2, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 2, 1, 1, 0, 2, 0, 1, 0, 0, 0, 2, 1, 0, 0, 1, 0, 2, 1, 0, 2, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0, 1, 2, 2, 1, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 2, 0, 1, 0, 2, 1, 0, 2, 2, 2, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 1, 2, 1, 2, 2, 2, 0, 0, 0, 0, 0, 1, 2, 0, 0]


In [4]:
max(data['pres_itemnos'][0])

np.int64(15)

In [11]:
subject_trial = 0
old_subject = 0
events = []
for trial_index in range(trial_count):

    # recalled study positions
    recalls = data['recalls'][trial_index]

    # study positions
    studied = data['pres_itemnos'][trial_index]
    list_length = len(studied)

    # category ids of study positions
    studied_categoryids = data['pres_categoryids'][trial_index]

    # track trial index within each subject
    subject = data['subject'][trial_index].item() + 1
    if subject != old_subject:
        subject_trial = 0
    subject_trial += 1
    old_subject = subject

    if block_success_group[trial_index] == 0:
        continue

    # identify study position of first recall
    first_pos = next((pos for pos in recalls if pos > 0), 0)
    if first_pos == 0:
        continue

    # identify study position of second recall
    second_pos = next((pos for pos in recalls[1:] if pos > 0), 0)
    if second_pos == 0:
        continue

    # add to `events` for each position that is not first_pos:
    # •	Subject: participant ID (factor). ✅
	# •	Trial: an ID for each recall trial (factor). Each trial corresponds to one cue presentation + the first cued recall. ✅
	# •	first_pos: The position i of the just-recalled (cued) item. ✅
	# •	CandPos: The position j of the candidate next item in the list. ✅
	# •	Chosen: (binary) 1 if CandPos was actually recalled next, 0 otherwise.  ✅
	# •	Lag: CandPos - JustPos. (You might also store AbsLag = abs(CandPos - JustPos) if you prefer adjacency in absolute terms.)
	# •	SameCat: (binary) 1 if CandPos is in the same category as JustPos, 0 otherwise.
	# •	DistFromEnd: (N + 1) - CandPos. If N=15, then DistFromEnd = 16 - CandPos.
    events.extend(
        {
            "Subject": subject,
            "Trial": subject_trial,
            "JustPos": first_pos,
            "CandPos": cand_pos,
            "Chosen": cand_pos == second_pos,
            "AbsLag": abs(cand_pos - first_pos),
            "SameCat": studied_categoryids[cand_pos - 1]
            == studied_categoryids[first_pos - 1],
        }
        for cand_pos in studied
        if cand_pos != first_pos
    )

In [12]:
pd.DataFrame.from_dict(events)

Unnamed: 0,Subject,Trial,JustPos,CandPos,Chosen,AbsLag,SameCat
0,1,1,6,1,False,5,False
1,1,1,6,2,False,4,False
2,1,1,6,3,False,3,False
3,1,1,6,4,True,2,True
4,1,1,6,5,False,1,True
...,...,...,...,...,...,...,...
1731,20,13,11,10,False,1,True
1732,20,13,11,12,False,1,True
1733,20,13,11,13,False,2,False
1734,20,13,11,14,True,3,False
