In [1]:
import numpy as np
import pandas as pd

In [2]:
N_RUNS = 10  # true number of runs
N_SEGMENTS = 6  # number of runs detected by phys
CLOCK_MISMATCH = 3.5 # seconds

# Scan detects all runs
true_onsets = np.cumsum(np.random.normal(loc=450, scale=50, size=N_RUNS))
true_durations = np.ones(N_RUNS) * 100
labels_scan = ['scan_run-{}'.format(i) for i in range(N_RUNS)]
scan_df = pd.DataFrame(
    columns=['label'],
    data=labels_scan
)
scan_df['onset'] = true_onsets
scan_df['duration'] = true_durations

# Phys misses some runs, so grab random subset of onsets
onsets_phys = np.sort(np.random.choice(true_onsets, size=N_SEGMENTS, replace=False))
# clocks are also mismatched between computers
onsets_phys = onsets_phys + CLOCK_MISMATCH
labels_phys = ['phys_run-{}'.format(i) for i in range(N_SEGMENTS)]
phys_df = pd.DataFrame(
    columns=['label'],
    data=labels_phys
)
phys_df['onset'] = onsets_phys

In [3]:
scan_df

Unnamed: 0,label,onset,duration
0,scan_run-0,534.045323,100.0
1,scan_run-1,912.312804,100.0
2,scan_run-2,1344.618031,100.0
3,scan_run-3,1862.620347,100.0
4,scan_run-4,2386.632384,100.0
5,scan_run-5,2966.408975,100.0
6,scan_run-6,3516.87145,100.0
7,scan_run-7,4034.547829,100.0
8,scan_run-8,4466.369409,100.0
9,scan_run-9,4839.901908,100.0


In [4]:
phys_df

Unnamed: 0,label,onset
0,phys_run-0,915.812804
1,phys_run-1,1866.120347
2,phys_run-2,2390.132384
3,phys_run-3,2969.908975
4,phys_run-4,3520.37145
5,phys_run-5,4469.869409


In [5]:
diffs = np.zeros((phys_df.shape[0], phys_df.shape[0]))
onsets = phys_df['onset'].values
for i, val1 in enumerate(onsets):
    for j, val2 in enumerate(onsets[1:]):
        diffs[i, j] = abs(val2 - val1)
diffs = diffs[:, :-1]
diffs = np.triu(diffs)
diff_cols = ['time before {}'.format(i+1) for i in range(diffs.shape[1])]
diff_df_phys = pd.DataFrame(
    columns=diff_cols,
    data=diffs,
)

In [6]:
comb_df_phys = pd.concat((phys_df, diff_df_phys), axis=1)

In [7]:
comb_df_phys

Unnamed: 0,label,onset,time before 1,time before 2,time before 3,time before 4,time before 5
0,phys_run-0,915.812804,950.307543,1474.31958,2054.096171,2604.558646,3554.056605
1,phys_run-1,1866.120347,0.0,524.012037,1103.788628,1654.251103,2603.749062
2,phys_run-2,2390.132384,0.0,0.0,579.776591,1130.239066,2079.737025
3,phys_run-3,2969.908975,0.0,0.0,0.0,550.462475,1499.960434
4,phys_run-4,3520.37145,0.0,0.0,0.0,0.0,949.497959
5,phys_run-5,4469.869409,0.0,0.0,0.0,0.0,0.0


In [8]:
diffs = np.zeros((scan_df.shape[0], scan_df.shape[0]))
onsets = scan_df['onset'].values
for i, val1 in enumerate(onsets):
    for j, val2 in enumerate(onsets[1:]):
        diffs[i, j] = abs(val2 - val1)
diffs = diffs[:, :-1]
diffs = np.triu(diffs)
diff_cols = ['time before {}'.format(i+1) for i in range(diffs.shape[1])]
diff_df_scan = pd.DataFrame(
    columns=diff_cols,
    data=diffs,
)

In [9]:
comb_df_scan = pd.concat((scan_df, diff_df_scan), axis=1)

In [10]:
comb_df_scan       

Unnamed: 0,label,onset,duration,time before 1,time before 2,time before 3,time before 4,time before 5,time before 6,time before 7,time before 8,time before 9
0,scan_run-0,534.045323,100.0,378.267481,810.572708,1328.575024,1852.587061,2432.363652,2982.826127,3500.502507,3932.324086,4305.856585
1,scan_run-1,912.312804,100.0,0.0,432.305227,950.307543,1474.31958,2054.096171,2604.558646,3122.235026,3554.056605,3927.589104
2,scan_run-2,1344.618031,100.0,0.0,0.0,518.002315,1042.014352,1621.790944,2172.253419,2689.929798,3121.751378,3495.283877
3,scan_run-3,1862.620347,100.0,0.0,0.0,0.0,524.012037,1103.788628,1654.251103,2171.927483,2603.749062,2977.281561
4,scan_run-4,2386.632384,100.0,0.0,0.0,0.0,0.0,579.776591,1130.239066,1647.915446,2079.737025,2453.269524
5,scan_run-5,2966.408975,100.0,0.0,0.0,0.0,0.0,0.0,550.462475,1068.138855,1499.960434,1873.492933
6,scan_run-6,3516.87145,100.0,0.0,0.0,0.0,0.0,0.0,0.0,517.67638,949.497959,1323.030458
7,scan_run-7,4034.547829,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,431.821579,805.354078
8,scan_run-8,4466.369409,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,373.532499
9,scan_run-9,4839.901908,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
diffs = np.zeros((scan_df.shape[0], phys_df.shape[0]))
for i, i_row in scan_df.iterrows():
    for j, j_row in phys_df.iterrows():
        onset_diff = i_row['onset'] - j_row['onset']
        diffs[i, j] = np.round(onset_diff, 0)

In [23]:
sel_rows = []
for i_row in range(diffs.shape[0]):
    val = diffs[i_row, 0]
    idx = np.where(diffs == val)
    if np.array_equal(idx[1], np.arange(diffs.shape[1])):
        print('GOT IT: {} ({})'.format(val, i_row))
        sel_rows.append(sel_row)
if len(sel_rows) != 1:
    raise Exception('Bad sel_rows')
sel_row = sel_rows[0]
clock_diff = scan_df.loc[sel_row, 'onset'] - phys_df.loc[0, 'onset']
print('Physio time series must be shifted {}s to match scans'.format(clock_diff))

GOT IT: -4.0 (1)
Physio time series must be shifted -3.5s to match scans


In [27]:
scan_df['phys_onset'] = scan_df['onset'] - clock_diff

In [28]:
scan_df

Unnamed: 0,label,onset,duration,phys_onset
0,scan_run-0,534.045323,100.0,537.545323
1,scan_run-1,912.312804,100.0,915.812804
2,scan_run-2,1344.618031,100.0,1348.118031
3,scan_run-3,1862.620347,100.0,1866.120347
4,scan_run-4,2386.632384,100.0,2390.132384
5,scan_run-5,2966.408975,100.0,2969.908975
6,scan_run-6,3516.87145,100.0,3520.37145
7,scan_run-7,4034.547829,100.0,4038.047829
8,scan_run-8,4466.369409,100.0,4469.869409
9,scan_run-9,4839.901908,100.0,4843.401908


In [29]:
phys_df

Unnamed: 0,label,onset
0,phys_run-0,915.812804
1,phys_run-1,1866.120347
2,phys_run-2,2390.132384
3,phys_run-3,2969.908975
4,phys_run-4,3520.37145
5,phys_run-5,4469.869409
