In [9]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# Info:
The quality is just the area of detections in pixel units.  (1 pixel = 0.65 microns)

Units of X,Y,R are microns; unit of area is microns^2

The cyto means are recalculated. The area of nuclei is excluded.

In [10]:
def cyto_in(n_info, c_info):
    denom = c_info['quality'].values[0] - n_info['quality'].values[0]
    return [
        (c_info['CH{}_Sum'.format(i)].values[0] - n_info['CH{}_Sum'.format(i)].values[0]) / denom
        for i in range(1, 5)
    ]

def match(cyto_path, nu_path):
    datac = pd.read_csv(cyto_path)
    datan = pd.read_csv(nu_path)

    all_frames = datan['frame'].unique()
    match_frames = []

    for t in all_frames:
        cyto = datac[datac['frame'] == t]
        nu = datan[datan['frame'] == t]

        dists = np.sqrt((nu['X'].values[:, None] - cyto['X'].values) ** 2 + 
                        (nu['Y'].values[:, None] - cyto['Y'].values) ** 2)

        min_dists = dists.min(axis=1)
        min_idxs = dists.argmin(axis=1)
        
        df_dict = {
            'nu_track_id': nu['track_id'].values,
            'cyto_track_id': cyto.iloc[min_idxs]['track_id'].values,
            'frame': [t] * len(nu),
            'cyto_X': cyto.iloc[min_idxs]['X'].values,
            'cyto_Y': cyto.iloc[min_idxs]['Y'].values,
            'nu_X': nu['X'].values,
            'nu_Y': nu['Y'].values
        }

        mask = (nu['radius'].values >= min_dists) & (cyto.iloc[min_idxs]['area'].values > nu['area'].values)
        df_dict['cyto_track_id'] = df_dict['cyto_track_id'].astype(float)
        df_dict['cyto_track_id'][~mask] = np.nan

        match_frames.append(pd.DataFrame(df_dict))

    match_df = pd.concat(match_frames, ignore_index=True)
    
    # Intensity calculations
    mean_in = np.empty((len(match_df), 8))
    
    for ind, (nid, cid, frame) in enumerate(zip(match_df['nu_track_id'], match_df['cyto_track_id'], match_df['frame'])):
        n_info = datan[(datan['frame'] == frame) & (datan['track_id'] == nid)]

        if not np.isnan(cid):
            c_info = datac[(datac['frame'] == frame) & (datac['track_id'] == cid)]
            c_in1, c_in2, c_in3, c_in4 = cyto_in(n_info, c_info)
            mean_in[ind, :4] = c_in1, c_in2, c_in3, c_in4
        else:
            mean_in[ind, :4] = [np.nan] * 4

        mean_in[ind, 4:] = n_info[['CH1_mean', 'CH2_mean', 'CH3_mean', 'CH4_mean']].values[0]

    columns = ['cyto_CH1_mean', 'cyto_CH2_mean', 'cyto_CH3_mean', 'cyto_CH4_mean', 'nu_CH1_mean', 'nu_CH2_mean', 'nu_CH3_mean', 'nu_CH4_mean']
    match_df = pd.concat([match_df, pd.DataFrame(mean_in, columns=columns)], axis=1)
    
    return match_df

def inten_array(match_df):
    fr = int(match_df['frame'].max()) + 1
    id = int(match_df['nu_track_id'].max()) + 1
    
    # Initial array with NaNs
    its = np.empty((id, fr, 8))
    its.fill(np.NaN)

    # Features to be collected
    features = ['cyto_CH1_mean', 'cyto_CH2_mean', 'cyto_CH3_mean', 'cyto_CH4_mean', 
                'nu_CH1_mean', 'nu_CH2_mean', 'nu_CH3_mean', 'nu_CH4_mean']
    
    # Loop over features to fill the array
    for idx, feature in enumerate(features):
        pivot_table = match_df.pivot(index='nu_track_id', columns='frame', values=feature)
        
        # Assign the pivot_table values to the corresponding slice of the 3D array
        its[:, :, idx] = pivot_table.values

    return its


# Save match result as csv
cyto_path = r"C:\Users\yuxin\Desktop\tc_data\cyto_xy1.csv"
nu_path = r"C:\Users\yuxin\Desktop\tc_data\nu_xy1.csv"

df = match(cyto_path,nu_path) 
df['frame'] = df['frame'].astype(int)
csv_path = r'C:\Users\yuxin\Desktop\tc_data\228df1'
df.to_csv(csv_path+'.csv', index=False)  

# Usage
cyto_path = r"C:\Users\yuxin\Desktop\tc_data\cyto.csv"
nu_path = r"C:\Users\yuxin\Desktop\tc_data\nu.csv"

df = match(cyto_path,nu_path) 