# Determine if we can infer full physio timing from scan timing and _partial_ physio timing

In [1]:
import numpy as np
import pandas as pd

In [2]:
N_RUNS = 10  # true number of runs
N_SEGMENTS = 6  # number of runs detected by phys
CLOCK_MISMATCH = 3.5 # seconds

# Scan detects all runs
true_onsets = np.cumsum(np.random.normal(loc=450, scale=50, size=N_RUNS))
true_durations = np.ones(N_RUNS) * 100
labels_scan = ['scan_run-{}'.format(i) for i in range(N_RUNS)]
scan_df = pd.DataFrame(
    columns=['label'],
    data=labels_scan
)
scan_df['onset'] = true_onsets
scan_df['duration'] = true_durations

# Phys misses some runs, so grab random subset of onsets
onsets_phys = np.sort(np.random.choice(true_onsets, size=N_SEGMENTS, replace=False))
# clocks are also mismatched between computers
onsets_phys = onsets_phys + CLOCK_MISMATCH
labels_phys = ['phys_run-{}'.format(i) for i in range(N_SEGMENTS)]
phys_df = pd.DataFrame(
    columns=['label'],
    data=labels_phys
)
phys_df['onset'] = onsets_phys

In [3]:
scan_df

Unnamed: 0,label,onset,duration
0,scan_run-0,460.931244,100.0
1,scan_run-1,913.267155,100.0
2,scan_run-2,1371.072873,100.0
3,scan_run-3,1750.175999,100.0
4,scan_run-4,2153.091367,100.0
5,scan_run-5,2650.151258,100.0
6,scan_run-6,3141.982471,100.0
7,scan_run-7,3630.187169,100.0
8,scan_run-8,4174.528344,100.0
9,scan_run-9,4657.298624,100.0


In [4]:
phys_df

Unnamed: 0,label,onset
0,phys_run-0,464.431244
1,phys_run-1,1374.572873
2,phys_run-2,2156.591367
3,phys_run-3,2653.651258
4,phys_run-4,3145.482471
5,phys_run-5,4660.798624


In [5]:
# Get difference between each scan onset and each physio trigger onset
diffs = np.zeros((scan_df.shape[0], phys_df.shape[0]))
for i, i_row in scan_df.iterrows():
    for j, j_row in phys_df.iterrows():
        onset_diff = i_row['onset'] - j_row['onset']
        diffs[i, j] = np.round(onset_diff, 0)

print(diffs)

[[-4.000e+00 -9.140e+02 -1.696e+03 -2.193e+03 -2.685e+03 -4.200e+03]
 [ 4.490e+02 -4.610e+02 -1.243e+03 -1.740e+03 -2.232e+03 -3.748e+03]
 [ 9.070e+02 -4.000e+00 -7.860e+02 -1.283e+03 -1.774e+03 -3.290e+03]
 [ 1.286e+03  3.760e+02 -4.060e+02 -9.030e+02 -1.395e+03 -2.911e+03]
 [ 1.689e+03  7.790e+02 -4.000e+00 -5.010e+02 -9.920e+02 -2.508e+03]
 [ 2.186e+03  1.276e+03  4.940e+02 -4.000e+00 -4.950e+02 -2.011e+03]
 [ 2.678e+03  1.767e+03  9.850e+02  4.880e+02 -4.000e+00 -1.519e+03]
 [ 3.166e+03  2.256e+03  1.474e+03  9.770e+02  4.850e+02 -1.031e+03]
 [ 3.710e+03  2.800e+03  2.018e+03  1.521e+03  1.029e+03 -4.860e+02]
 [ 4.193e+03  3.283e+03  2.501e+03  2.004e+03  1.512e+03 -4.000e+00]]


In [6]:
# Find a scan onset for each physio onset where the time difference
# matches up across *all* physio onsets
sel_rows = []
for i_row in range(diffs.shape[0]):
    val = diffs[i_row, 0]
    idx = np.where(diffs == val)
    if np.array_equal(idx[1], np.arange(diffs.shape[1])):
        print('GOT IT: {} (row {})'.format(val, i_row))
        sel_rows.append(i_row)
if len(sel_rows) != 1:
    raise Exception('Bad sel_rows')
sel_row = sel_rows[0]
clock_diff = scan_df.loc[sel_row, 'onset'] - phys_df.loc[0, 'onset']
print('Physio time series must be shifted {}s to match scans'.format(clock_diff))

GOT IT: -4.0 (0)
Physio time series must be shifted -3.5s to match scans


In [7]:
scan_df['phys_onset'] = scan_df['onset'] - clock_diff

In [8]:
scan_df

Unnamed: 0,label,onset,duration,phys_onset
0,scan_run-0,460.931244,100.0,464.431244
1,scan_run-1,913.267155,100.0,916.767155
2,scan_run-2,1371.072873,100.0,1374.572873
3,scan_run-3,1750.175999,100.0,1753.675999
4,scan_run-4,2153.091367,100.0,2156.591367
5,scan_run-5,2650.151258,100.0,2653.651258
6,scan_run-6,3141.982471,100.0,3145.482471
7,scan_run-7,3630.187169,100.0,3633.687169
8,scan_run-8,4174.528344,100.0,4178.028344
9,scan_run-9,4657.298624,100.0,4660.798624
