## This code generates the electrode subset used in the Neuroprobe Lite benchmark.

NOTE: You do not need to run this notebook. The results are already saved in the `neuroprobe/config.py` file, in the variable NEUROPROBE_LITE_ELECTRODES. This file is provided for review of the code used to generate the subsets.

The idea behind this subset selection is to preserve as many full probes / continuous parts of probes as possible, to allow the most flexibility for various types of preprocessing, like Laplacian re-referencing, that depend on the neighbor data.

In [13]:
from neuroprobe.braintreebank_subject import Subject
NEUROPROBE_LITE_N_ELECTRODES = 120
def stem_electrode_name(name):
    #names look like 'O1aIb4', 'O1aIb5', 'O1aIb6', 'O1aIb7'
    #names look like 'T1b2
    found_stem_end = False
    stem, num = [], []
    for c in reversed(name):
        if c.isalpha():
            found_stem_end = True
        if found_stem_end:
            stem.append(c)
        else:
            num.append(c)
    return ''.join(reversed(stem)), int(''.join(reversed(num)))
def find_streaks(nums):
    if not nums:
        return []
    nums = sorted(nums)
    streaks = []
    current_streak = [nums[0]]
    
    for i in range(1, len(nums)):
        if nums[i] == nums[i-1] + 1:
            current_streak.append(nums[i])
        else:
            if len(current_streak) > 0:
                streaks.append(current_streak)
            current_streak = [nums[i]]
            
    streaks.append(current_streak)
    return streaks

electrode_subsets = {}
for subject_id in range(1, 11):
    print("\n\n\n", "=== Generating electrode subsets for subject", subject_id)

    subject = Subject(subject_id, cache=False)
    electrode_labels = subject.electrode_labels

    stem_nums = [stem_electrode_name(e) for e in electrode_labels]
    stems = [x[0] for x in stem_nums]
    nums = [x[1] for x in stem_nums]

    probes = {
        stem: [num for (s, num) in stem_nums if s == stem]
        for stem in set(stems)
    }
    print("Found", len(probes), "probes:")
    print(probes)

    # Find longest streak for each probe
    probe_streaks = {}
    for probe, nums in probes.items():
        streaks = find_streaks(nums)
        # Sort streaks by length (descending) and take all streaks
        probe_streaks[probe] = sorted(streaks, key=len, reverse=True)
    # Create list of all streaks with their probe names
    all_streaks = []
    for probe, streaks in probe_streaks.items():
        for streak in streaks:
            all_streaks.append((probe, streak))
    # Sort by streak length in descending order
    all_streaks.sort(key=lambda x: len(x[1]), reverse=True)
    print("All streaks sorted by length (descending):")
    for probe, streak in all_streaks:
        print(f"{probe}: {streak} (length: {len(streak)})")

    # Initialize variables to track selected electrodes
    selected_electrodes = []
    total_electrodes = 0

    # Iterate through streaks in descending order of length
    for probe, streak in all_streaks:
        # If adding this streak won't exceed the target number
        if total_electrodes + len(streak) <= NEUROPROBE_LITE_N_ELECTRODES:
            # Add all electrodes from this streak
            for num in streak:
                selected_electrodes.append(f"{probe}{num}")
            total_electrodes += len(streak)
        
        # Break if we've reached our target
        if total_electrodes >= NEUROPROBE_LITE_N_ELECTRODES:
            break

    print(f"\nSelected {len(selected_electrodes)} electrodes:")
    print(selected_electrodes)

    electrode_subsets[subject.subject_identifier] = selected_electrodes





 === Generating electrode subsets for subject 1
Found 13 probes:
{'F3dIe': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 14], 'T1cIf': [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16], 'T3aHb': [6, 9, 10, 12], 'T2bHa': [1, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14], 'T2c': [4, 5, 6, 7, 8], 'T3bOT': [1, 2, 3, 4, 5, 6, 8, 9, 10, 12], 'T2aA': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 'F3aOFa': [2, 3, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], 'T1aIb': [1, 2, 3, 4, 5, 6, 7, 8], 'F3bIaOFb': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], 'T1bIc': [1, 2, 3, 4, 5, 6, 7, 8], 'F3cId': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'T2d': [1, 2, 3, 4, 5, 6]}
All streaks sorted by length (descending):
F3bIaOFb: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] (length: 16)
T2aA: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] (length: 12)
F3dIe: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] (length: 10)
F3aOFa: [7, 8, 9, 10, 11, 12, 13, 14, 15, 16] (length: 10)
F3cId: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] (length: 10)
T1cIf: [1, 2, 3, 4,

In [17]:
print(json.dumps(electrode_subsets))


{"btbank1": ["F3bIaOFb1", "F3bIaOFb2", "F3bIaOFb3", "F3bIaOFb4", "F3bIaOFb5", "F3bIaOFb6", "F3bIaOFb7", "F3bIaOFb8", "F3bIaOFb9", "F3bIaOFb10", "F3bIaOFb11", "F3bIaOFb12", "F3bIaOFb13", "F3bIaOFb14", "F3bIaOFb15", "F3bIaOFb16", "T2aA1", "T2aA2", "T2aA3", "T2aA4", "T2aA5", "T2aA6", "T2aA7", "T2aA8", "T2aA9", "T2aA10", "T2aA11", "T2aA12", "F3dIe1", "F3dIe2", "F3dIe3", "F3dIe4", "F3dIe5", "F3dIe6", "F3dIe7", "F3dIe8", "F3dIe9", "F3dIe10", "F3aOFa7", "F3aOFa8", "F3aOFa9", "F3aOFa10", "F3aOFa11", "F3aOFa12", "F3aOFa13", "F3aOFa14", "F3aOFa15", "F3aOFa16", "F3cId1", "F3cId2", "F3cId3", "F3cId4", "F3cId5", "F3cId6", "F3cId7", "F3cId8", "F3cId9", "F3cId10", "T1cIf1", "T1cIf2", "T1cIf3", "T1cIf4", "T1cIf5", "T1cIf6", "T1cIf7", "T1cIf8", "T2bHa7", "T2bHa8", "T2bHa9", "T2bHa10", "T2bHa11", "T2bHa12", "T2bHa13", "T2bHa14", "T1aIb1", "T1aIb2", "T1aIb3", "T1aIb4", "T1aIb5", "T1aIb6", "T1aIb7", "T1aIb8", "T1bIc1", "T1bIc2", "T1bIc3", "T1bIc4", "T1bIc5", "T1bIc6", "T1bIc7", "T1bIc8", "T1cIf10", "T1cIf