In [None]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('/home/jupyter/checkmate-histo/consolidated_workflow/immunoprofile_analysis/')

from imports import *
import xarray
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neighbors import KNeighborsClassifier
# import plotly.express as px

set_rc()

In [None]:
from skimage.io import imread
from deepcell.applications import Mesmer

In [None]:
qptiff_channels = ['dapi', 'foxp3', 'tumor_specific', 'cd8', 'pd1', 'pdl1','autofluorescence']

gating_cols = ['cd8_gate','tumor_gate']

morph_fts = [
    'centroid_dif', # asymmetry ??
    'num_concavities', # "concavities"
    'convex_hull_resid', # I think this is "Fill"
    'major_axis_equiv_diam_ratio', # "aspect ratio"
    'perim_square_over_area', # "perimeter to area ratio"
    'arcsinh_area',
]

gating_cols = ['tumor_gate','cd8_gate']

feature_cols = qptiff_channels + morph_fts

In [None]:
candidates = pd.read_csv('./immunoprofile_hne_rag_features__passing_14_subset.csv', index_col=0)

In [None]:
candidates = candidates.drop('IP_19_E00218')

In [None]:
# # 20220906 NB generation
# f = '/mnt/disks/image_data/immunoprofile/ccrcc_subset_processing/ccrcc_ip14_arcsinh_quant_agg_with_basic_cutoffs__rerun_correct_grade_seg__hybrid_manual_global_louvain__residual_markers__morph_flag.pkl'

# data = pd.read_pickle(f)

### Cell-wise morph flagging

In [None]:
base_dummy_cols = [
    'cell_label_cd8+',
    'cell_label_other',
    'cell_label_tumor+',
    'cell_label_ungated/omit'
]

detailed_dummy_cols = [
    'manual_cell_call_CD8+ PD1+',
    'manual_cell_call_CD8+ PD1-',
    'manual_cell_call_Omit',
    'manual_cell_call_Other FOXP3+',
    'manual_cell_call_Other FOXP3-',
    'manual_cell_call_Tumor+ PDL1+',
    'manual_cell_call_Tumor+ PDL1-',
    'manual_cell_call_Ungated'
]

filtered_cats = [
    'filtered_cell_label_cd8+',
    'filtered_cell_label_tumor+',
    'filtered_meta_other',
    'cd8-af-ratio_omit',
    'morphology_omit',
    'dapi_omit',
    'unclear_intermediate'
]

def filter_clusters_post_morph_flagging(df, dapi_cutoff=7.0, omit_thresh=0.2, lower_thresh=0.3, upper_thresh = 0.6):
    """
    Filter through cell type proportions sequentially based on manually selected gates 
    Intended to catch FP/FN and refine cell type assignments
    """
    df['filtered_cell_type'] = np.nan
    
    # order matters here since we'll use nan filter to decide what is considered in loop
    cell_cols = [
        'cell_label_tumor+',
        'cell_label_cd8+',
    ]
    
    crit = df['dapi'] < dapi_cutoff
    df.loc[crit, 'filtered_cell_type'] = 'dapi_omit'
    
    crit = (df['filtered_cell_type'].isna()) & (df['morph_flag_flagged'] == 1.)
    df.loc[crit, 'filtered_cell_type'] = 'morphology_omit'

    crit = (df['filtered_cell_type'].isna()) & (df['manual_cell_call_Omit'] >= omit_thresh)
    df.loc[crit, 'filtered_cell_type'] = 'cd8-af-ratio_omit'
    
    for col in cell_cols:
        crit = (df['filtered_cell_type'].isna()) & (df[col] > upper_thresh)
        df.loc[crit, 'filtered_cell_type'] = f'filtered_{col}'
    
        crit = (df['filtered_cell_type'].isna()) & (df[col] > lower_thresh) & (df[col] < upper_thresh)
        df.loc[crit, 'filtered_cell_type'] = f'unclear_intermediate'
    
    df['filtered_cell_type'] = df['filtered_cell_type'].fillna('filtered_meta_other')
    

In [None]:
def visualize_cluster_distributions(df, cluster_col, ordering_col='cd8_af_ratio'):
    #### visualize manual cell calls vs clustering results
    fig,axes = plt.subplots(1,2)
    order = df.groupby(cluster_col)[ordering_col].median().sort_values().index.values
    df[cluster_col] = df[cluster_col].astype('category')
    df[cluster_col] = df[cluster_col].cat.reorder_categories(list(order))

    g = sns.histplot(x=cluster_col, hue='cell_label', hue_order=['ungated/omit', 'other', 'cd8+',  'tumor+', ], multiple='fill', data=df.reset_index(), ax=axes[0])
    sns.move_legend(g, loc='center', bbox_to_anchor=(0.5, -0.5))
    g = sns.histplot(x=cluster_col, hue='manual_cell_call', hue_order=['Ungated','Omit','CD8+ PD1+', 'CD8+ PD1-',  'Other FOXP3+', 'Other FOXP3-','Tumor+ PDL1+', 'Tumor+ PDL1-', ], multiple='fill', data=df.reset_index(), ax=axes[1])
    sns.move_legend(g, loc='center', bbox_to_anchor=(0.5, -0.5))

    plt.title(case_id)
    plt.show()

def visualize_fov_samples_postcutoff(case_id, cell_df, tumor_parses, n_samples=5, width=7, height=15, channels=[2, 3, 0], 
                          lower_clip_percentile=25, upper_clip_percentile=99, max_cell_area=2000,
                          cell_label_col='cell_label',
                          mask_key = {0: 'cd8+', 1: 'other', 2: 'tumor+', 3: 'ungated/omit'},
                          full_color_map= {idx:color for idx,color in enumerate(['black','green','grey','purple','yellow','orange',])},
                          return_samples=False
                         ):


    parse_samples = tumor_parses.loc[case_id].sample(n_samples)

    print('Loading FOV data')
    seg, data = load_fov_data(parse_samples)
    print('Normalizing for vis')
    store = quantile_normalize_fov_examples(data, channels=channels, lower_clip_percentile=lower_clip_percentile, upper_clip_percentile=upper_clip_percentile)


    set_rc(int(n_samples*width), height)
    fig, axes = plt.subplots(2,n_samples)
    
    for i, (fov_idx, row) in enumerate(parse_samples.iterrows()):
        # plt.suptitle('Red: DAPI \n Green: CD8 \n Blue: Tumor')
        axes[0,i].imshow(mark_boundaries(store[i], seg[i,...,0].astype(int), color=(0.5, 0.5, 0.5)))
        # axes[0,i].imshow(store)


        tile = np.copy(seg[i,...,0])
        fill_mask = np.copy(seg[i,...,0])

        fov_calls = cell_df.loc[(case_id, fov_idx)]


        # print('Dropping big cells (1500 area)')
        fov_calls = fov_calls.loc[fov_calls['area'] < max_cell_area]

        tile_img = ((tile > 0)*255).astype(np.uint8)
        # print(fov_calls[cell_label_col].value_counts())
        for cell_type_idx, cell_type in mask_key.items():
            temp_mask = np.isin(tile, fov_calls.loc[fov_calls[cell_label_col] == cell_type,'label'].values)
            fill_mask[temp_mask] = cell_type_idx+1


        # catch cells we dropped and mark as orange
        fill_mask[fill_mask > len(mask_key)] = len(mask_key)+1

        plot_colors = [full_color_map[k] for k in np.unique(fill_mask.astype(int))[1:]]
        colormapped = label2rgb(fill_mask.astype(int), tile_img, alpha=1., colors=plot_colors)
        axes[1,i].imshow(mark_boundaries(colormapped,tile.astype(int), color=(0,0,0)))
    plt.show()
        
    if return_samples:
        return parse_samples, seg, data, fill_mask, fov_calls, store

def visualize_fov_samples_clusters(case_id, cell_df, tumor_parses, n_samples=5, width=7, height=15, channels=[2, 3, 0], 
                          lower_clip_percentile=25, upper_clip_percentile=99, max_cell_area=2000,
                          cell_label_col='cell_label',
                          mask_key = {0: 'cd8+', 1: 'other', 2: 'tumor+', 3: 'ungated/omit'},
                          full_color_map= {idx:color for idx,color in enumerate(['black','green','grey','purple','yellow','orange',])},
                          return_samples=False
                         ):


    parse_samples = tumor_parses.loc[case_id].sample(n_samples)

    print('Loading FOV data')
    seg, data = load_fov_data(parse_samples)
    print('Normalizing for vis')
    store = quantile_normalize_fov_examples(data, channels=channels, lower_clip_percentile=lower_clip_percentile, upper_clip_percentile=upper_clip_percentile)


    set_rc(int(n_samples*width), height)
    fig, axes = plt.subplots(2,n_samples)
    
    for i, (fov_idx, row) in enumerate(parse_samples.iterrows()):
        print(i, fov_idx)
        # plt.suptitle('Red: DAPI \n Green: CD8 \n Blue: Tumor')
        axes[0,i].imshow(mark_boundaries(store[i], seg[i,...,0].astype(int), color=(0.5, 0.5, 0.5)))
        # axes[0,i].imshow(store)


        tile = np.copy(seg[i,...,0])
        fill_mask = np.copy(seg[i,...,0])

        fov_calls = cell_df.loc[(case_id, fov_idx)]


        # print('Dropping big cells (1500 area)')
        fov_calls = fov_calls.loc[fov_calls['area'] < max_cell_area]

        tile_img = ((tile > 0)*255).astype(np.uint8)
        # print(fov_calls[cell_label_col].value_counts())
        
        for cell_type_idx, cell_type in mask_key.items():
            temp_mask = np.isin(tile, fov_calls.loc[fov_calls[cell_label_col] == cell_type,'label'].values)
            fill_mask[temp_mask] = cell_type_idx+1

        colormapped = label2rgb(fill_mask.astype(int), tile_img, alpha=1.,)
        axes[1,i].imshow(mark_boundaries(colormapped,tile.astype(int), color=(0,0,0)))
    plt.show()
        
    if return_samples:
        return parse_samples, seg, data, fill_mask, fov_calls

In [None]:
def filter_clusters_post_morph_flagging(df, morph_flag_col='individual_morph_flag', morph_flag_cutoff=0.5,
                                        dapi_cutoff=7.0, omit_thresh=0.2, lower_thresh=0.3, upper_thresh = 0.6):
    """
    Filter through cell type proportions sequentially based on manually selected gates 
    Intended to catch FP/FN and refine cell type assignments
    """
    df['filtered_cell_type'] = np.nan
    
    # order matters here since we'll use nan filter to decide what is considered in loop
    cell_cols = [
        'cell_label_tumor+',
        'cell_label_cd8+',
    ]
    
    crit = df['dapi'] < dapi_cutoff
    df.loc[crit, 'filtered_cell_type'] = 'dapi_omit'
    
    crit = (df['filtered_cell_type'].isna()) & (df[morph_flag_col] > morph_flag_cutoff)
    df.loc[crit, 'filtered_cell_type'] = 'morphology_omit'

    crit = (df['filtered_cell_type'].isna()) & (df['manual_cell_call_Omit'] >= omit_thresh)
    df.loc[crit, 'filtered_cell_type'] = 'cd8-af-ratio_omit'
    
    for col in cell_cols:
        crit = (df['filtered_cell_type'].isna()) & (df[col] > upper_thresh)
        df.loc[crit, 'filtered_cell_type'] = f'filtered_{col}'
    
        crit = (df['filtered_cell_type'].isna()) & (df[col] > lower_thresh) & (df[col] < upper_thresh)
        df.loc[crit, 'filtered_cell_type'] = f'unclear_intermediate'
    
    df['filtered_cell_type'] = df['filtered_cell_type'].fillna('filtered_meta_other')
    

In [None]:
from sklearn.linear_model import LinearRegression

def regress_out(df, x, y, fit_intercept=True):
    linreg = LinearRegression(fit_intercept=fit_intercept)
    linreg.fit(df[[x]], y=df[[y]])
    residuals = linreg.predict(df[[x]])
    df[f'pred_{y}_cond_{x}'] = residuals
    df[f'residual_{y}_cond_{x}'] = df[y] - df[f'pred_{y}_cond_{x}']

In [None]:
def run_af_regression(df, marker_channels=['dapi', 'foxp3', 'tumor_specific', 'cd8', 'pd1', 'pdl1']):
    print(df.shape)
    for channel in marker_channels:
        regress_out(df, 'autofluorescence', channel, fit_intercept=True)
    
    return df

In [None]:
# `20220824_scratch_pd1_foxp3_checking` NB source

cutoff_agg = pd.read_csv('./20220822_manual_cutoffs_temp_all_cells.csv',index_col=0).transpose()
cutoff_agg.index.name = 'case_id'

updated_cutoffs = pd.read_csv('./20220823_manual_cutoffs_temp_v3.csv',index_col=0).transpose()
updated_cutoffs.index.name = 'case_id'

evaluation = pd.read_csv('cutoff_set_comparison_results.csv', index_col=0).iloc[:,0]

In [None]:
f_update = '/mnt/disks/image_data/immunoprofile/ccrcc_subset_processing/ccrcc_ip14_arcsinh_quant_agg_with_basic_cutoffs__rerun_correct_grade_seg__hybrid_manual_global_louvain__morph_flag__celltypes.pkl'
labeled_data_updated = pd.read_pickle(f_update)

meta_map = {False:'Stroma/Omitted (H&E)',True:'Tumor (H&E)'}
labeled_data_updated['meta'] = (~labeled_data_updated['collapsed_grade_seg'].isin(['adj_grade_seg_label_0','adj_grade_seg_label_1'])).map(meta_map)
labeled_data_updated['merged_labels'] = labeled_data_updated['collapsed_grade_seg'].apply(lambda x: int(x.split('_')[-1]) - 1)

fov_out_dir = '/mnt/disks/image_data/immunoprofile/ccrcc_subset_processing/fov_tiles/filtered'
template = '/mnt/disks/image_data/immunoprofile/ccrcc_subset_processing/fov_tiles/filtered/{case_id}_fov{x}_{y}_{format}.nc'
parses = glob(fov_out_dir+'/*')
parses = format_parses(template, parses)
parses = parses.pivot(index=['case_id','x','y'], values='filepath', columns='format')
parses['fov'] = ['_'.join(x[1:]) for x in parses.index]

# colors = [to_rgb('grey'), to_rgb('tab:purple'), to_rgb('tab:green'), ]
full_color_map = {idx:color for idx,color in enumerate(['black','green','grey','purple','yellow','orange',])}
colors = [to_rgb('grey'), to_rgb('tab:green'), to_rgb('tab:purple'), ]


mask_key = {0: 'cd8+', 1: 'other', 2: 'tumor+', 3: 'ungated/omit'}

fov_meta_map = labeled_data_updated.reset_index()[['case_id','fov','meta']].drop_duplicates().set_index(['case_id','fov'])

parses_meta = parses.reset_index().set_index(['case_id','fov']).join(fov_meta_map)
tumor_parses = parses_meta.loc[parses_meta['meta'] == 'Tumor (H&E)']
nontumor_parses = parses_meta.loc[parses_meta['meta'] != 'Tumor (H&E)']

In [None]:
mask_key = {i:v for i,v in enumerate(filtered_cats)}
full_color_map= {idx:color for idx,color in enumerate(['black','green','pink','grey','yellow','white','red','cyan','orange'])}
named_color_map = dict(zip(filtered_cats,['green','pink','grey','yellow','white','red','cyan','orange']))

# # for case_id in ['IP_18_A00093']:
# for case_id in candidates.sample(5).index.values:
#     print(case_id)
#     df = labeled_data_updated.loc[[case_id]]
#     print(df.shape)
    
#     print(named_color_map)
#     cell_samples = df.loc[(case_id, tumor_parses.loc[case_id].index.values),:]
#     tumor_parses_subset = tumor_parses.loc[(case_id, get_indices(cell_samples.value_counts(['fov','filtered_cell_type',],sort=False).loc[(slice(None),'filtered_cell_label_cd8+')] > 5)),:]
#     parse_samples, seg, data, fill_mask, fov_calls, store = visualize_fov_samples_postcutoff(case_id, df, tumor_parses_subset, cell_label_col='filtered_cell_type', n_samples=5,
#                                      mask_key=mask_key, full_color_map=full_color_map, return_samples=True)

In [None]:

# pos_samples = pd.Series(get_indices(alt_crit)).sample(10)
# tumor_parses_subset = tumor_parses.loc[(case_id,list(pos_samples)),:]    

# parse_samples, seg, data, fill_mask, fov_calls, store = visualize_fov_samples_postcutoff(case_id, df, tumor_parses_subset,
#                                                                                          channels=[5,2,3], lower_clip_percentile=5, upper_clip_percentile=95,
#                                                                                          cell_label_col='res_pdl1_pos_tumor', n_samples=10,
#                                  mask_key={0:0, 1:1, 2:2}, full_color_map=full_color_map, return_samples=True,
#                                                                                     )


In [None]:
# case_id = 'IP_19_R00053'
# 'IP_19_T00810', 'IP_19_R00053'

for case_id in ['IP_19_R00053']:
    print('\n\n\n', case_id)
    df = labeled_data_updated.loc[[case_id]]
    print(df.shape)

    rerun =  pd.concat([run_af_regression(subdf) for cell_type, subdf in df.groupby('filtered_cell_type')])

    for marker in qptiff_channels[:-1]:
        rerun[f'res_{marker}'] = rerun[f'residual_{marker}_cond_autofluorescence']

    df = rerun
    
    df['res_pdl1_pos'] = df['res_pdl1'] > 0.
    df['res_pdl1_pos_tumor'] = (df['filtered_cell_type'] == 'filtered_cell_label_tumor+') & df['res_pdl1_pos']
    df['res_pdl1_pos_tumor'] = df['res_pdl1_pos_tumor'].astype(int)+2
    df.loc[df['filtered_cell_type'] != 'filtered_cell_label_tumor+', 'res_pdl1_pos_tumor'] = 0
    
    set_rc(5,5)
    sns.histplot(x='res_pdl1', stat='density', data=df.loc[df['filtered_cell_type'] == 'filtered_cell_label_tumor+'].reset_index(), alpha=0.5)
    plt.show()
    
    print(named_color_map)
    cell_samples = df.loc[(case_id, tumor_parses.loc[case_id].index.values),:]

    # crit = cell_samples.value_counts(['fov','filtered_cell_type',],sort=False).loc[(slice(None),'filtered_cell_label_tumor+')] > 0

    crit = cell_samples.value_counts(['fov','res_pdl1_pos_tumor',],sort=False).loc[(slice(None),3)] < 5
    alt_crit = cell_samples.value_counts(['fov','res_pdl1_pos_tumor',],sort=False).loc[(slice(None),3)] > 15
    # alt_crit = crit & alt_crit

    try:
        neg_samples = pd.Series(get_indices(crit)).sample(5)
        pos_samples = pd.Series(get_indices(alt_crit)).sample(5)


        tumor_parses_subset = tumor_parses.loc[(case_id, list(neg_samples) + list(pos_samples)),:]

        parse_samples, seg, data, fill_mask, fov_calls, store = visualize_fov_samples_postcutoff(case_id, df, tumor_parses_subset,
                                                                                                 channels=[5,2,3], lower_clip_percentile=25, upper_clip_percentile=95,
                                                                                                 cell_label_col='res_pdl1_pos_tumor', n_samples=6,
                                         mask_key={0:0, 1:1, 2:2}, full_color_map=full_color_map, return_samples=True,
                                                                                            )
        print(parse_samples)
    except Exception as e:
        print(e)
        print('Probably not enough examples!')

In [None]:
subset = df.loc[df['filtered_cell_type'] == 'filtered_cell_label_tumor+']

In [None]:
set_rc(5,5)
sns.histplot(x='tumor_specific', y='pdl1', data=subset.reset_index())

In [None]:
set_rc(5,5)
sns.jointplot(x='tumor_specific', y='res_pdl1', kind='hist', data=subset.reset_index())

In [None]:
subset['pt'] = subset['pdl1']/subset['tumor_specific']

In [None]:
set_rc(5,5)
sns.jointplot(x='res_pdl1', y='pt', kind='hist', data=subset.reset_index())

In [None]:
df['pt'] = df['pdl1']/df['tumor_specific']

In [None]:
set_rc(5,5)
sns.histplot(x='res_pdl1', data=df.sample(frac=0.1).reset_index())

In [None]:
subset = df.loc[df['filtered_cell_type'] == 'filtered_cell_label_cd8+']

In [None]:
set_rc(5,5)
sns.jointplot(x='autofluorescence', y='res_pd1', kind='hist', data=subset.reset_index())

In [None]:
set_rc(5,5)
sns.jointplot(x='autofluorescence', y='pd1', kind='hist', data=subset.reset_index())

In [None]:
x

In [None]:
(df['meta'] == 'Tumor (H&E)').mean()

In [None]:
pdl1_mode_check = {}

for case_id in candidates.sample(frac=1).index.values:
    # print('\n\n\n', case_id)
    df = labeled_data_updated.loc[[case_id]]
    print(df.shape)

    subset = df.loc[df['filtered_cell_type'] == 'filtered_cell_label_tumor+'] 
    set_rc(15,5)
    fig,axes = plt.subplots(1,3)
    
    sns.histplot(x='tumor_specific', y='pdl1', data=subset.reset_index(), ax=axes[0])
    sns.histplot(x='autofluorescence', y='pdl1', data=subset.reset_index(), ax=axes[1])
    sns.histplot(x='pdl1', y='res_pdl1', data=subset.reset_index(), ax=axes[2])
    plt.show()

    
    print('Limiting to H&E based tumor area')
    subset = subset.loc[subset['meta'] == 'Tumor (H&E)']
    print(subset.shape)
    set_rc(15,5)
    fig,axes = plt.subplots(1,3)
    
    sns.histplot(x='tumor_specific', y='pdl1', data=subset.reset_index(), ax=axes[0])
    sns.histplot(x='autofluorescence', y='pdl1', data=subset.reset_index(), ax=axes[1])
    sns.histplot(x='pdl1', y='res_pdl1', data=subset.reset_index(), ax=axes[2])
    plt.show()

    pdl1_mode_check[case_id] = int(input('Modes?'))

In [None]:
pdl1_mode_check

In [None]:

for case_id in ['IP_19_G00660']:
    # print('\n\n\n', case_id)
    df = labeled_data_updated.loc[[case_id]]
    print(df.shape)

    subset = df.loc[df['filtered_cell_type'] == 'filtered_cell_label_tumor+'] 
    set_rc(15,5)
    fig,axes = plt.subplots(1,3)
    
    sns.histplot(x='tumor_specific', y='pdl1', data=subset.reset_index(), ax=axes[0])
    sns.histplot(x='autofluorescence', y='pdl1', data=subset.reset_index(), ax=axes[1])
    sns.histplot(x='pdl1', y='res_pdl1', data=subset.reset_index(), ax=axes[2])
    plt.show()

    
    print('Limiting to H&E based tumor area')
    subset = subset.loc[subset['meta'] == 'Tumor (H&E)']
    print(subset.shape)
    set_rc(15,5)
    fig,axes = plt.subplots(1,3)
    
    sns.histplot(x='tumor_specific', y='pdl1', data=subset.reset_index(), ax=axes[0])
    sns.histplot(x='autofluorescence', y='pdl1', data=subset.reset_index(), ax=axes[1])
    sns.histplot(x='pdl1', y='res_pdl1', data=subset.reset_index(), ax=axes[2])
    plt.show()

    # pdl1_mode_check[case_id] = int(input('Modes?'))

In [None]:
for case_id in candidates.sample(5).index.values:
    print('\n\n\n', case_id)
    df = labeled_data_updated.loc[[case_id]]
    print(df.shape)

    subset = df.loc[df['filtered_cell_type'] == 'filtered_cell_label_cd8+'] 
    set_rc(20,5)
    fig,axes = plt.subplots(1,4)
    
    sns.histplot(x='cd8', y='pd1', data=subset.reset_index(), ax=axes[0])
    sns.histplot(x='autofluorescence', y='pd1', data=subset.reset_index(), ax=axes[1])
    sns.histplot(x='autofluorescence', y='res_pd1', data=subset.reset_index(), ax=axes[2])
    sns.histplot(x='pd1', y='res_pd1', data=subset.reset_index(), ax=axes[3])
    plt.show()

    
    print('Limiting to H&E based tumor area')
    subset = subset.loc[subset['meta'] == 'Tumor (H&E)']
    print(subset.shape)
    set_rc(20,5)
    fig,axes = plt.subplots(1,4)
    
    sns.histplot(x='cd8', y='pd1', data=subset.reset_index(), ax=axes[0])
    sns.histplot(x='autofluorescence', y='pd1', data=subset.reset_index(), ax=axes[1])
    sns.histplot(x='autofluorescence', y='res_pd1', data=subset.reset_index(), ax=axes[2])
    sns.histplot(x='pd1', y='res_pd1', data=subset.reset_index(), ax=axes[3])
    plt.show()


In [None]:
def get_cd8(df, label_col='filtered_cell_type', label_key='filtered_cell_label_cd8+'):
    return df.loc[df[label_col] == label_key]

def get_tumor(df, label_col='filtered_cell_type', label_key='filtered_cell_label_tumor+'):
    return df.loc[df[label_col] == label_key]

In [None]:
subset = get_cd8(labeled_data_updated)

In [None]:
set_rc(5,5)
sns.histplot(x='res_pd1', data=subset.reset_index())

In [None]:
run_gating_flex(subset, 'res_pd1', 2)

In [None]:
subset.groupby('gmm_res_pd1')['res_pd1'].min()

### Check Visual Relationship Between PD1 Residual Score and Appearance

In [None]:
full_color_map

In [None]:
color_map = {idx:color for idx,color in enumerate(['black','grey','white','cyan','orange','yellow','green',])}

In [None]:
color_map

In [None]:
# case_id = 'IP_19_R00053'
# 'IP_19_T00810', 'IP_19_R00053'

n_samples = 5
marker = 'pd1'
cell_type = 'filtered_cell_label_cd8+'
min_residual = 0.463
channels = [4,3,2]


for case_id in candidates.sample(3).index.values:
    print('\n\n\n', case_id)
    df = labeled_data_updated.loc[[case_id]]
    print(df.shape)

    print('Retaining global based residuals')
#     rerun =  pd.concat([run_af_regression(subdf) for cell_type, subdf in df.groupby('filtered_cell_type')])
#     for marker in qptiff_channels[:-1]:
#         rerun[f'res_{marker}'] = rerun[f'residual_{marker}_cond_autofluorescence']
#     df = rerun
    
    df[f'res_{marker}_pos'] = df[f'res_{marker}'] > min_residual
    df[f'res_{marker}_pos_subset'] = (df[f'filtered_cell_type'] == cell_type) & df[f'res_{marker}_pos']
    df[f'res_{marker}_pos_subset'] = df[f'res_{marker}_pos_subset'].astype(int)+2
    df.loc[df[f'filtered_cell_type'] != cell_type, f'res_{marker}_pos_subset'] = 0
    
    set_rc(20,5)
    fig,axes = plt.subplots(1,4)
    subset = df.loc[df['meta'] == 'Tumor (H&E)']
    subset = subset.loc[subset['filtered_cell_type'] == cell_type]
    sns.histplot(x='cd8', y='pd1', data=subset.reset_index(), ax=axes[0])
    sns.histplot(x='autofluorescence', y='pd1', data=subset.reset_index(), ax=axes[1])
    sns.histplot(x='autofluorescence', y='res_pd1', data=subset.reset_index(), ax=axes[2])
    sns.histplot(x='pd1', y='res_pd1', data=subset.reset_index(), ax=axes[3])
    plt.show()
    
    
    print(named_color_map)
    cell_samples = df.loc[(case_id, tumor_parses.loc[case_id].index.values),:]

    crit = cell_samples.value_counts([f'fov','filtered_cell_type',],sort=False).loc[(slice(None),cell_type)] > 25
    filtered_indices = get_indices(crit)
    
    alt_crit = cell_samples.value_counts([f'fov',f'res_{marker}_pos_subset',],sort=False).loc[(slice(None),3)] > 5
    alt_indices = get_indices(alt_crit)
    
    if len(alt_indices) >= n_samples:
        print('Found examples')
        tumor_parses_subset = tumor_parses.loc[(case_id, alt_indices),:]
        print(len(tumor_parses_subset))
    else:
        print('cant find enough examples')
        tumor_parses_subset = tumor_parses.loc[(case_id, filtered_indices),:]
        print(len(tumor_parses_subset))


    parse_samples, seg, data, fill_mask, fov_calls, store = visualize_fov_samples_postcutoff(case_id, df, tumor_parses_subset,
                                                                                             channels=channels, lower_clip_percentile=5, upper_clip_percentile=95,
                                                                                             cell_label_col=f'res_{marker}_pos_subset', n_samples=n_samples,
                                     mask_key={0:0, 1:1, 2:2}, full_color_map=color_map, return_samples=True )

        
    # crit = cell_samples.value_counts([f'fov','res_{marker}_pos_subset',],sort=False).loc[(slice(None),3)] < 5
    # alt_crit = cell_samples.value_counts([f'fov','res_{marker}_pos_subset',],sort=False).loc[(slice(None),3)] > 5
    # # alt_crit = crit & alt_crit

#     try:
#         neg_samples = pd.Series(get_indices(crit)).sample(5)
#         pos_samples = pd.Series(get_indices(alt_crit)).sample(5)


#         tumor_parses_subset = tumor_parses.loc[(case_id, list(neg_samples) + list(pos_samples)),:]

#         parse_samples, seg, data, fill_mask, fov_calls, store = visualize_fov_samples_postcutoff(case_id, df, tumor_parses_subset,
#                                                                                                  channels=[5,2,3], lower_clip_percentile=5, upper_clip_percentile=95,
#                                                                                                  cell_label_col='res_{marker}_pos_subset', n_samples=6,
#                                          mask_key={0:0, 1:1, 2:2}, full_color_map=full_color_map, return_samples=True,
#                                                                                             )
#         print(parse_samples)
#     except Exception as e:
#         print(e)
#         print('Probably not enough examples!')

In [None]:
pdl1_mode_check

In [None]:
subset = get_tumor(labeled_data_updated)

In [None]:
subset = subset.loc[get_indices(pd.Series(pdl1_mode_check) == 2)]

In [None]:
set_rc(6,6)
sns.histplot(x='res_pdl1', data=subset.reset_index())

In [None]:
set_rc(6,6)
sns.displot(x='res_pdl1', col='case_id', data=subset.reset_index())

In [None]:
cutoffs = []
for case_id in get_indices(pd.Series(pdl1_mode_check) == 2):
    print(case_id)
    tmp = subset.loc[case_id]

    run_gating_flex(tmp, 'res_pdl1', 3)

    cut = tmp.groupby('gmm_res_pdl1')['res_pdl1'].min().max()
    cutoffs.append(cut)
    sns.histplot(x='res_pdl1', data=tmp.reset_index())
    plt.axvline(cut, c='r')
    plt.show()

In [None]:
np.array(cutoffs).mean()

In [None]:
pd.Series(cutoffs).describe()

In [None]:
subset = get_tumor(labeled_data_updated)

In [None]:
cutoffs = []
for case_id in get_indices(pd.Series(pdl1_mode_check) != 2):
    print(case_id)
    tmp = subset.loc[case_id]

    run_gating_flex(tmp, 'res_pdl1', 3)

    cut = tmp.groupby('gmm_res_pdl1')['res_pdl1'].min().max()
    cutoffs.append(cut)
    set_rc(5,5)
    sns.histplot(x='res_pdl1', data=tmp.reset_index())
    plt.axvline(cut, c='r')
    plt.show()
pd.Series(cutoffs).describe()

In [None]:
subset = get_cd8(labeled_data_updated)
marker = 'pd1'
cutoffs = []
for case_id in candidates.index.values:
    print(case_id)
    tmp = subset.loc[case_id]

    run_gating_flex(tmp, f'res_{marker}', 3)

    cut = tmp.groupby(f'gmm_res_{marker}')[f'res_{marker}'].min().max()
    cutoffs.append(cut)
    # set_rc(5,5)
    # sns.histplot(x='res_pdl1', data=tmp.reset_index())
    # plt.axvline(cut, c='r')
    # plt.show()
pd.Series(cutoffs).describe()

In [None]:
crit

In [None]:
# subset = get_cd8(labeled_data_updated)
# marker = 'pd1'
# cutoffs = []
# for case_id in candidates.index.values:
#     print(case_id)
#     tmp = subset.loc[case_id]
#     tmp = tmp.loc[tmp['meta'] == 'Tumor (H&E)']
#     lb = tmp[f'res_{marker}'].quantile(0.01)
#     ub = tmp[f'res_{marker}'].quantile(0.99)

#     crit = (tmp[f'res_{marker}'] < ub) & (tmp[f'res_{marker}'] > lb)
#     tmp = tmp.loc[crit]

#     run_gating_flex(tmp, f'res_{marker}', 2)

#     cut = tmp.groupby(f'gmm_res_{marker}')[f'res_{marker}'].min().max()
#     cutoffs.append(cut)
#     set_rc(5,5)
#     sns.histplot(x=f'res_{marker}', data=tmp.reset_index())
#     plt.axvline(cut, c='r')
#     plt.axvline(0.463, c='g')
#     plt.show()
# pd.Series(cutoffs).describe()

In [None]:
# subset = get_cd8(labeled_data_updated)
# marker = 'pd1'
# cutoffs = []
# for case_id in candidates.index.values:
#     print(case_id)
#     tmp = subset.loc[case_id]
#     tmp = tmp.loc[tmp['meta'] == 'Tumor (H&E)']
#     lb = tmp[f'res_{marker}'].quantile(0.01)
#     ub = tmp[f'res_{marker}'].quantile(0.99)

#     crit = (tmp[f'res_{marker}'] < ub) & (tmp[f'res_{marker}'] > lb)
#     tmp = tmp.loc[crit]

#     run_gating_flex(tmp, f'res_{marker}', 2)

#     cut = tmp.groupby(f'gmm_res_{marker}')[f'res_{marker}'].min().max()
#     cutoffs.append(cut)
#     set_rc(5,5)
#     sns.histplot(x=f'res_{marker}', data=tmp.reset_index())
#     plt.axvline(cut, c='r')
#     plt.axvline(0.463, c='g')
#     plt.show()
# pd.Series(cutoffs).describe()

In [None]:
# subset = get_tumor(labeled_data_updated)
# marker = 'pdl1'
# cutoffs = []
# for case_id in candidates.index.values:
#     print(case_id)
#     tmp = subset.loc[case_id]
#     tmp = tmp.loc[tmp['meta'] == 'Tumor (H&E)']
#     lb = tmp[f'res_{marker}'].quantile(0.01)
#     ub = tmp[f'res_{marker}'].quantile(0.99)

#     crit = (tmp[f'res_{marker}'] < ub) & (tmp[f'res_{marker}'] > lb)
#     tmp = tmp.loc[crit]

#     run_gating_flex(tmp, f'res_{marker}', 2)

#     cut = tmp.groupby(f'gmm_res_{marker}')[f'res_{marker}'].min().max()
#     cutoffs.append(cut)
#     set_rc(5,5)
#     sns.histplot(x=f'res_{marker}', data=tmp.reset_index())
#     plt.axvline(cut, c='r')
#     plt.show()
# pd.Series(cutoffs).describe()

In [None]:
subset = get_tumor(labeled_data_updated)
marker = 'pdl1'
cutoffs = []
for case_id in candidates.index.values:
    print(case_id)
    tmp = subset.loc[case_id]
    tmp = tmp.loc[tmp['meta'] == 'Tumor (H&E)']
    lb = tmp[f'res_{marker}'].quantile(0.01)
    ub = tmp[f'res_{marker}'].quantile(0.99)

    crit = (tmp[f'res_{marker}'] < ub) & (tmp[f'res_{marker}'] > lb)
    tmp = tmp.loc[crit]
    
    print('Rerunning Residuals')
    rerun =  pd.concat([run_af_regression(subdf) for cell_type, subdf in tmp.groupby('filtered_cell_type')])
    for col in qptiff_channels[:-1]:
        rerun[f'res_{col}'] = rerun[f'residual_{col}_cond_autofluorescence']
    tmp = rerun
    

    run_gating_flex(tmp, f'res_{marker}', 2)

    cut = tmp.groupby(f'gmm_res_{marker}')[f'res_{marker}'].min().max()
    cutoffs.append(cut)
    set_rc(5,5)
    sns.histplot(x=f'res_{marker}', data=tmp.reset_index())
    plt.axvline(cut, c='r')
    plt.show()
pd.Series(cutoffs).describe()

In [None]:
subset = get_cd8(labeled_data_updated)
marker = 'pd1'
cutoffs = []
for case_id in candidates.index.values:
    print(case_id)
    tmp = subset.loc[case_id]
    tmp = tmp.loc[tmp['meta'] == 'Tumor (H&E)']
    lb = tmp[f'res_{marker}'].quantile(0.01)
    ub = tmp[f'res_{marker}'].quantile(0.99)

    crit = (tmp[f'res_{marker}'] < ub) & (tmp[f'res_{marker}'] > lb)
    tmp = tmp.loc[crit]
    
    print('Rerunning Residuals')
    rerun =  pd.concat([run_af_regression(subdf) for cell_type, subdf in tmp.groupby('filtered_cell_type')])
    for col in qptiff_channels[:-1]:
        rerun[f'res_{col}'] = rerun[f'residual_{col}_cond_autofluorescence']
    tmp = rerun
    

    a = run_gating_flex(tmp, f'res_{marker}', 2)
    print(a)

    cut = tmp.groupby(f'gmm_res_{marker}')[f'res_{marker}'].min().max()
    cutoffs.append(cut)
    set_rc(5,5)
    sns.histplot(x=f'res_{marker}', data=tmp.reset_index())
    plt.axvline(cut, c='r')
    plt.show()
pd.Series(cutoffs).describe()

In [None]:
run_gating_flex??

In [None]:
n_samples = 5
marker = 'pdl1'
cell_type = 'filtered_cell_label_tumor+'
min_residual = 0.095
channels = [5,2,3]


for case_id in candidates.sample(3).index.values:
    print('\n\n\n', case_id)
    df = labeled_data_updated.loc[[case_id]]
    print(df.shape)

    print('Rerunning AF Residuals')
    rerun =  pd.concat([run_af_regression(subdf) for cell_type, subdf in df.groupby('filtered_cell_type')])
    for col in qptiff_channels[:-1]:
        rerun[f'res_{col}'] = rerun[f'residual_{col}_cond_autofluorescence']
    df = rerun
    
    df[f'res_{marker}_pos'] = df[f'res_{marker}'] > min_residual
    df[f'res_{marker}_pos_subset'] = (df[f'filtered_cell_type'] == cell_type) & df[f'res_{marker}_pos']
    df[f'res_{marker}_pos_subset'] = df[f'res_{marker}_pos_subset'].astype(int)+2
    df.loc[df[f'filtered_cell_type'] != cell_type, f'res_{marker}_pos_subset'] = 0
    

    set_rc(6,6)
    subset = df.loc[df['meta'] == 'Tumor (H&E)']
    subset = subset.loc[subset['filtered_cell_type'] == cell_type]
    sns.histplot(x='autofluorescence', y=f'res_{marker}', data=subset.reset_index(), )
    plt.axhline(min_residual, c='r')
    plt.show()
        
    
    print(named_color_map)
    cell_samples = df.loc[(case_id, tumor_parses.loc[case_id].index.values),:]

    crit = cell_samples.value_counts([f'fov','filtered_cell_type',],sort=False).loc[(slice(None),cell_type)] > 25
    filtered_indices = get_indices(crit)
    
    alt_crit = cell_samples.value_counts([f'fov',f'res_{marker}_pos_subset',],sort=False).loc[(slice(None),3)] > 5
    alt_indices = get_indices(alt_crit)
    
    if len(alt_indices) >= n_samples:
        print('Found examples')
        tumor_parses_subset = tumor_parses.loc[(case_id, alt_indices),:]
        print(len(tumor_parses_subset))
    else:
        print('cant find enough examples')
        tumor_parses_subset = tumor_parses.loc[(case_id, filtered_indices),:]
        print(len(tumor_parses_subset))


    parse_samples, seg, data, fill_mask, fov_calls, store = visualize_fov_samples_postcutoff(case_id, df, tumor_parses_subset,
                                                                                             channels=channels, lower_clip_percentile=5, upper_clip_percentile=95,
                                                                                             cell_label_col=f'res_{marker}_pos_subset', n_samples=n_samples,
                                     mask_key={0:0, 1:1, 2:2}, full_color_map=color_map, return_samples=True )

In [None]:
n_samples = 5
min_pos_cells = 25
marker = 'pdl1'
cell_type = 'filtered_cell_label_tumor+'
min_residual = 0.095
channels = [5,2,3]


for case_id in candidates.sample(5).index.values:
    print('\n\n\n', case_id)
    df = labeled_data_updated.loc[[case_id]]
    print(df.shape)

    print('Rerunning AF Residuals')
    rerun =  pd.concat([run_af_regression(subdf) for cell_type, subdf in df.groupby('filtered_cell_type')])
    for col in qptiff_channels[:-1]:
        rerun[f'res_{col}'] = rerun[f'residual_{col}_cond_autofluorescence']
    df = rerun
    
    df[f'res_{marker}_pos'] = df[f'res_{marker}'] > min_residual
    df[f'res_{marker}_pos_subset'] = (df[f'filtered_cell_type'] == cell_type) & df[f'res_{marker}_pos']
    df[f'res_{marker}_pos_subset'] = df[f'res_{marker}_pos_subset'].astype(int)+2
    df.loc[df[f'filtered_cell_type'] != cell_type, f'res_{marker}_pos_subset'] = 0
    

    set_rc(6,6)
    subset = df.loc[df['meta'] == 'Tumor (H&E)']
    subset = subset.loc[subset['filtered_cell_type'] == cell_type]
    sns.histplot(x='autofluorescence', y=f'res_{marker}', data=subset.reset_index(), )
    plt.axhline(min_residual, c='r')
    plt.show()
        
    
    print(named_color_map)
    cell_samples = df.loc[(case_id, tumor_parses.loc[case_id].index.values),:]

    crit = cell_samples.value_counts([f'fov','filtered_cell_type',],sort=False).loc[(slice(None),cell_type)] > 25
    filtered_indices = get_indices(crit)
    
    alt_crit = cell_samples.value_counts([f'fov',f'res_{marker}_pos_subset',],sort=False).loc[(slice(None),3)] > min_pos_cells
    alt_indices = get_indices(alt_crit)
    
    if len(alt_indices) >= n_samples:
        print('Found examples')
        tumor_parses_subset = tumor_parses.loc[(case_id, alt_indices),:]
        print(len(tumor_parses_subset))
    else:
        print('cant find enough examples')
        tumor_parses_subset = tumor_parses.loc[(case_id, filtered_indices),:]
        print(len(tumor_parses_subset))


    parse_samples, seg, data, fill_mask, fov_calls, store = visualize_fov_samples_postcutoff(case_id, df, tumor_parses_subset,
                                                                                             channels=channels, lower_clip_percentile=5, upper_clip_percentile=95,
                                                                                             cell_label_col=f'res_{marker}_pos_subset', n_samples=n_samples,
                                     mask_key={0:0, 1:1, 2:2}, full_color_map=color_map, return_samples=True )

In [None]:
n_samples = 5
min_pos_cells = 25
marker = 'pd1'
cell_type = 'filtered_cell_label_cd8+'
min_residual = 0.089
# channels = [5,2,3]
channels = [4,3,2]

for case_id in candidates.sample(3).index.values:
    print('\n\n\n', case_id)
    df = labeled_data_updated.loc[[case_id]]
    print(df.shape)

    print('Rerunning AF Residuals')
    rerun =  pd.concat([run_af_regression(subdf) for cell_type, subdf in df.groupby('filtered_cell_type')])
    for col in qptiff_channels[:-1]:
        rerun[f'res_{col}'] = rerun[f'residual_{col}_cond_autofluorescence']
    df = rerun
    
    df[f'res_{marker}_pos'] = df[f'res_{marker}'] > min_residual
    df[f'res_{marker}_pos_subset'] = (df[f'filtered_cell_type'] == cell_type) & df[f'res_{marker}_pos']
    df[f'res_{marker}_pos_subset'] = df[f'res_{marker}_pos_subset'].astype(int)+2
    df.loc[df[f'filtered_cell_type'] != cell_type, f'res_{marker}_pos_subset'] = 0
    

    set_rc(6,6)
    subset = df.loc[df['meta'] == 'Tumor (H&E)']
    subset = subset.loc[subset['filtered_cell_type'] == cell_type]
    sns.histplot(x='autofluorescence', y=f'res_{marker}', data=subset.reset_index(), )
    plt.axhline(min_residual, c='r')
    plt.show()
        
    
    print(named_color_map)
    cell_samples = df.loc[(case_id, tumor_parses.loc[case_id].index.values),:]

    crit = cell_samples.value_counts([f'fov','filtered_cell_type',],sort=False).loc[(slice(None),cell_type)] > 25
    filtered_indices = get_indices(crit)
    
    alt_crit = cell_samples.value_counts([f'fov',f'res_{marker}_pos_subset',],sort=False).loc[(slice(None),3)] > min_pos_cells
    alt_indices = get_indices(alt_crit)
    
    if len(alt_indices) >= n_samples:
        print('Found examples')
        tumor_parses_subset = tumor_parses.loc[(case_id, alt_indices),:]
        print(len(tumor_parses_subset))
    else:
        print('cant find enough examples')
        tumor_parses_subset = tumor_parses.loc[(case_id, filtered_indices),:]
        print(len(tumor_parses_subset))


    parse_samples, seg, data, fill_mask, fov_calls, store = visualize_fov_samples_postcutoff(case_id, df, tumor_parses_subset,
                                                                                             channels=channels, lower_clip_percentile=5, upper_clip_percentile=95,
                                                                                             cell_label_col=f'res_{marker}_pos_subset', n_samples=n_samples,
                                     mask_key={0:0, 1:1, 2:2}, full_color_map=color_map, return_samples=True )

In [None]:
# n_samples = 5
# marker = 'pd1'
# cell_type = 'filtered_cell_label_cd8+'
# min_residual = 0.4
# # channels = [5,2,3]
# channels = [4,3,2]

# for case_id in candidates.sample(3).index.values:
#     print('\n\n\n', case_id)
#     df = labeled_data_updated.loc[[case_id]]
#     print(df.shape)

# #     print('Rerunning AF Residuals')
# #     rerun =  pd.concat([run_af_regression(subdf) for cell_type, subdf in df.groupby('filtered_cell_type')])
# #     for col in qptiff_channels[:-1]:
# #         rerun[f'res_{col}'] = rerun[f'residual_{col}_cond_autofluorescence']
# #     df = rerun
    
#     df[f'res_{marker}_pos'] = df[f'res_{marker}'] > min_residual
#     df[f'res_{marker}_pos_subset'] = (df[f'filtered_cell_type'] == cell_type) & df[f'res_{marker}_pos']
#     df[f'res_{marker}_pos_subset'] = df[f'res_{marker}_pos_subset'].astype(int)+2
#     df.loc[df[f'filtered_cell_type'] != cell_type, f'res_{marker}_pos_subset'] = 0
    

#     set_rc(6,6)
#     subset = df.loc[df['meta'] == 'Tumor (H&E)']
#     subset = subset.loc[subset['filtered_cell_type'] == cell_type]
#     sns.histplot(x='autofluorescence', y=f'res_{marker}', data=subset.reset_index(), )
#     plt.axhline(min_residual, c='r')
#     plt.show()
        
    
#     print(named_color_map)
#     cell_samples = df.loc[(case_id, tumor_parses.loc[case_id].index.values),:]

#     crit = cell_samples.value_counts([f'fov','filtered_cell_type',],sort=False).loc[(slice(None),cell_type)] > 25
#     filtered_indices = get_indices(crit)
    
#     alt_crit = cell_samples.value_counts([f'fov',f'res_{marker}_pos_subset',],sort=False).loc[(slice(None),3)] > 5
#     alt_indices = get_indices(alt_crit)
    
#     if len(alt_indices) >= n_samples:
#         print('Found examples')
#         tumor_parses_subset = tumor_parses.loc[(case_id, alt_indices),:]
#         print(len(tumor_parses_subset))
#     else:
#         print('cant find enough examples')
#         tumor_parses_subset = tumor_parses.loc[(case_id, filtered_indices),:]
#         print(len(tumor_parses_subset))


#     parse_samples, seg, data, fill_mask, fov_calls, store = visualize_fov_samples_postcutoff(case_id, df, tumor_parses_subset,
#                                                                                              channels=channels, lower_clip_percentile=5, upper_clip_percentile=95,
#                                                                                              cell_label_col=f'res_{marker}_pos_subset', n_samples=n_samples,
#                                      mask_key={0:0, 1:1, 2:2}, full_color_map=color_map, return_samples=True )

In [None]:
rerun.shape

In [None]:
subset = get_cd8(rerun)
marker = 'pd1'
cutoffs = []
for case_id in candidates.index.values:
    print(case_id)
    tmp = subset.loc[case_id]
    tmp = tmp.loc[tmp['meta'] == 'Tumor (H&E)']
    lb = tmp[f'res_{marker}'].quantile(0.01)
    ub = tmp[f'res_{marker}'].quantile(0.99)

    crit = (tmp[f'res_{marker}'] < ub) & (tmp[f'res_{marker}'] > lb)
    tmp = tmp.loc[crit]

    a = run_gating_flex(tmp, f'res_{marker}', 2)
    print(a)

    cut = tmp.groupby(f'gmm_res_{marker}')[f'res_{marker}'].min().max()
    cutoffs.append(cut)
    set_rc(5,5)
    sns.histplot(x=f'res_{marker}', data=tmp.reset_index())
    plt.axvline(cut, c='r')
    plt.show()
pd.Series(cutoffs).describe()

In [None]:
subset = get_cd8(rerun)
marker = 'pd1'
cutoffs = []
for case_id in candidates.index.values:
    print(case_id)
    tmp = subset.loc[case_id]
    tmp = tmp.loc[tmp['meta'] == 'Tumor (H&E)']
    lb = tmp[f'res_{marker}'].quantile(0.01)
    ub = tmp[f'res_{marker}'].quantile(0.99)

    crit = (tmp[f'res_{marker}'] < ub) & (tmp[f'res_{marker}'] > lb)
    tmp = tmp.loc[crit]

    a = run_gating_flex(tmp, f'res_{marker}', 2)
    print(a)

    cut = tmp.groupby(f'gmm_res_{marker}')[f'res_{marker}'].min().max()
    cutoffs.append(cut)
    # set_rc(5,5)
    # sns.histplot(x=f'res_{marker}', data=tmp.reset_index())
    # plt.axvline(cut, c='r')
    # plt.show()
pd.Series(cutoffs).describe()

In [None]:
rerun.head()

In [None]:
subset = get_cd8(labeled_data_updated)
marker = 'pd1'
cutoffs = []
for case_id in candidates.index.values:
    print(case_id)
    tmp = subset.loc[case_id]
    tmp = tmp.loc[tmp['meta'] == 'Tumor (H&E)']
    lb = tmp[f'res_{marker}'].quantile(0.01)
    ub = tmp[f'res_{marker}'].quantile(0.99)

    crit = (tmp[f'res_{marker}'] < ub) & (tmp[f'res_{marker}'] > lb)
    tmp = tmp.loc[crit]
    
    print('Rerunning Residuals')
    rerun =  pd.concat([run_af_regression(subdf) for cell_type, subdf in tmp.groupby('filtered_cell_type')])
    for col in qptiff_channels[:-1]:
        rerun[f'res_{col}'] = rerun[f'residual_{col}_cond_autofluorescence']
    tmp = rerun
    

    a = run_gating_flex(tmp, f'res_{marker}', 2)
    print(a)

    cut = tmp.groupby(f'gmm_res_{marker}')[f'res_{marker}'].min().max()
    cutoffs.append(cut)
    # set_rc(5,5)
    # sns.histplot(x=f'res_{marker}', data=tmp.reset_index())
    # plt.axvline(cut, c='r')
    # plt.show()
pd.Series(cutoffs).describe()

In [None]:
subset = get_cd8(labeled_data_updated)
marker = 'pd1'
cutoffs = []
for case_id in candidates.index.values:
    print(case_id)
    tmp = subset.loc[case_id]
    tmp = tmp.loc[tmp['meta'] == 'Tumor (H&E)']
    
    print('Rerunning Residuals')
    rerun =  pd.concat([run_af_regression(subdf) for cell_type, subdf in tmp.groupby('filtered_cell_type')])
    for col in qptiff_channels[:-1]:
        rerun[f'res_{col}'] = rerun[f'residual_{col}_cond_autofluorescence']
    tmp = rerun
    
    lb = tmp[f'res_{marker}'].quantile(0.01)
    ub = tmp[f'res_{marker}'].quantile(0.99)

    crit = (tmp[f'res_{marker}'] < ub) & (tmp[f'res_{marker}'] > lb)
    tmp = tmp.loc[crit]
    
    a = run_gating_flex(tmp, f'res_{marker}', 2)
    print(a)

    cut = tmp.groupby(f'gmm_res_{marker}')[f'res_{marker}'].min().max()
    cutoffs.append(cut)
    # set_rc(5,5)
    # sns.histplot(x=f'res_{marker}', data=tmp.reset_index())
    # plt.axvline(cut, c='r')
    # plt.show()
pd.Series(cutoffs).describe()

In [None]:
subset = get_tumor(labeled_data_updated)
marker = 'pdl1'
cutoffs = []
for case_id in candidates.index.values:
    print(case_id)
    tmp = subset.loc[case_id]
    tmp = tmp.loc[tmp['meta'] == 'Tumor (H&E)']
    
    print('Rerunning Residuals')
    rerun =  pd.concat([run_af_regression(subdf) for cell_type, subdf in tmp.groupby('filtered_cell_type')])
    for col in qptiff_channels[:-1]:
        rerun[f'res_{col}'] = rerun[f'residual_{col}_cond_autofluorescence']
    tmp = rerun
    
    lb = tmp[f'res_{marker}'].quantile(0.01)
    ub = tmp[f'res_{marker}'].quantile(0.99)

    crit = (tmp[f'res_{marker}'] < ub) & (tmp[f'res_{marker}'] > lb)
    tmp = tmp.loc[crit]
    
    a = run_gating_flex(tmp, f'res_{marker}', 2)
    print(a)

    cut = tmp.groupby(f'gmm_res_{marker}')[f'res_{marker}'].min().max()
    cutoffs.append(cut)
    # set_rc(5,5)
    # sns.histplot(x=f'res_{marker}', data=tmp.reset_index())
    # plt.axvline(cut, c='r')
    # plt.show()
pd.Series(cutoffs).describe()

In [None]:
%%capture
residual_rerun =  pd.concat([run_af_regression(subdf) for (case_id, cell_type), subdf in labeled_data_updated.groupby(['case_id','filtered_cell_type'])])
for col in qptiff_channels[:-1]:
    residual_rerun[f'res_{col}'] = residual_rerun[f'residual_{col}_cond_autofluorescence']

In [None]:
residual_rerun.shape

In [None]:
subset = get_tumor(residual_rerun)
marker = 'pdl1'
cutoffs = []
for case_id in candidates.index.values:
    print(case_id)
    tmp = subset.loc[case_id]
    tmp = tmp.loc[tmp['meta'] == 'Tumor (H&E)']
    
    lb = tmp[f'res_{marker}'].quantile(0.01)
    ub = tmp[f'res_{marker}'].quantile(0.99)

    crit = (tmp[f'res_{marker}'] < ub) & (tmp[f'res_{marker}'] > lb)
    tmp = tmp.loc[crit]
    
    a = run_gating_flex(tmp, f'res_{marker}', 2)
    print(a)

    cut = tmp.groupby(f'gmm_res_{marker}')[f'res_{marker}'].min().max()
    cutoffs.append(cut)
    set_rc(5,5)
    sns.histplot(x=f'res_{marker}', data=tmp.reset_index())
    plt.axvline(cut, c='r')
    plt.show()
pd.Series(cutoffs).describe()

In [None]:
subset = get_tumor(residual_rerun)
marker = 'pdl1'
cutoffs = []
for case_id in candidates.index.values:
    print(case_id)
    tmp = subset.loc[case_id]
    # tmp = tmp.loc[tmp['meta'] == 'Tumor (H&E)']
    
    lb = tmp[f'res_{marker}'].quantile(0.01)
    ub = tmp[f'res_{marker}'].quantile(0.99)

    crit = (tmp[f'res_{marker}'] < ub) & (tmp[f'res_{marker}'] > lb)
    tmp = tmp.loc[crit]
    
    a = run_gating_flex(tmp, f'res_{marker}', 2)
    print(a)

    cut = tmp.groupby(f'gmm_res_{marker}')[f'res_{marker}'].min().max()
    cutoffs.append(cut)
    set_rc(5,5)
    sns.histplot(x=f'res_{marker}', data=tmp.reset_index())
    plt.axvline(cut, c='r')
    plt.show()
pd.Series(cutoffs).describe()

In [None]:
subset = get_cd8(residual_rerun)
marker = 'pd1'
cutoffs = []
for case_id in candidates.index.values:
    print(case_id)
    tmp = subset.loc[case_id]
    tmp = tmp.loc[tmp['meta'] == 'Tumor (H&E)']
    
    lb = tmp[f'res_{marker}'].quantile(0.01)
    ub = tmp[f'res_{marker}'].quantile(0.99)

    crit = (tmp[f'res_{marker}'] < ub) & (tmp[f'res_{marker}'] > lb)
    tmp = tmp.loc[crit]
    
    a = run_gating_flex(tmp, f'res_{marker}', 2)
    print(a)

    cut = tmp.groupby(f'gmm_res_{marker}')[f'res_{marker}'].min().max()
    cutoffs.append(cut)
    set_rc(5,5)
    sns.histplot(x=f'res_{marker}', data=tmp.reset_index())
    plt.axvline(cut, c='r')
    plt.show()
pd.Series(cutoffs).describe()

In [None]:
sns.histplot(x=f'res_{marker}', data=subset.reset_index())
plt.axvline(0.185, c='r')


In [None]:
subset = get_cd8(residual_rerun)
marker = 'pd1'
cutoffs = []
for case_id in candidates.index.values:
    print(case_id)
    tmp = subset.loc[case_id]
    # tmp = tmp.loc[tmp['meta'] == 'Tumor (H&E)']
    
    lb = tmp[f'res_{marker}'].quantile(0.01)
    ub = tmp[f'res_{marker}'].quantile(0.99)

    crit = (tmp[f'res_{marker}'] < ub) & (tmp[f'res_{marker}'] > lb)
    tmp = tmp.loc[crit]
    
    a = run_gating_flex(tmp, f'res_{marker}', 2)
    print(a)

    cut = tmp.groupby(f'gmm_res_{marker}')[f'res_{marker}'].min().max()
    cutoffs.append(cut)
    set_rc(5,5)
    sns.histplot(x=f'res_{marker}', data=tmp.reset_index())
    plt.axvline(cut, c='r')
    plt.show()
pd.Series(cutoffs).describe()

In [None]:
n_samples = 5
min_pos_cells = 25
marker = 'pd1'
cell_type = 'filtered_cell_label_cd8+'
min_residual = 0.133567
# channels = [5,2,3]
channels = [4,3,2]

for case_id in candidates.sample(3).index.values:
    print('\n\n\n', case_id)
    df = residual_rerun.loc[[case_id]]
    print(df.shape)

    df[f'res_{marker}_pos'] = df[f'res_{marker}'] > min_residual
    df[f'res_{marker}_pos_subset'] = (df[f'filtered_cell_type'] == cell_type) & df[f'res_{marker}_pos']
    df[f'res_{marker}_pos_subset'] = df[f'res_{marker}_pos_subset'].astype(int)+2
    df.loc[df[f'filtered_cell_type'] != cell_type, f'res_{marker}_pos_subset'] = 0
    

    set_rc(6,6)
    subset = df.loc[df['meta'] == 'Tumor (H&E)']
    subset = subset.loc[subset['filtered_cell_type'] == cell_type]
    sns.histplot(x='autofluorescence', y=f'res_{marker}', data=subset.reset_index(), )
    plt.axhline(min_residual, c='r')
    plt.show()
        
    
    print(named_color_map)
    cell_samples = df.loc[(case_id, tumor_parses.loc[case_id].index.values),:]

    crit = cell_samples.value_counts([f'fov','filtered_cell_type',],sort=False).loc[(slice(None),cell_type)] > 25
    filtered_indices = get_indices(crit)
    
    alt_crit = cell_samples.value_counts([f'fov',f'res_{marker}_pos_subset',],sort=False).loc[(slice(None),3)] > min_pos_cells
    alt_indices = get_indices(alt_crit)
    
    if len(alt_indices) >= n_samples:
        print('Found examples')
        tumor_parses_subset = tumor_parses.loc[(case_id, alt_indices),:]
        print(len(tumor_parses_subset))
    else:
        print('cant find enough examples')
        tumor_parses_subset = tumor_parses.loc[(case_id, filtered_indices),:]
        print(len(tumor_parses_subset))


    parse_samples, seg, data, fill_mask, fov_calls, store = visualize_fov_samples_postcutoff(case_id, df, tumor_parses_subset,
                                                                                             channels=channels, lower_clip_percentile=5, upper_clip_percentile=95,
                                                                                             cell_label_col=f'res_{marker}_pos_subset', n_samples=n_samples,
                                     mask_key={0:0, 1:1, 2:2}, full_color_map=color_map, return_samples=True )

In [None]:
n_samples = 5
min_pos_cells = 25
marker = 'pdl1'
cell_type = 'filtered_cell_label_tumor+'
min_residual = 0.108897
channels = [5,2,3]
# channels = [4,3,2]

for case_id in candidates.sample(frac=1).index.values:
    print('\n\n\n', case_id)
    df = residual_rerun.loc[[case_id]]
    print(df.shape)

    df[f'res_{marker}_pos'] = df[f'res_{marker}'] > min_residual
    df[f'res_{marker}_pos_subset'] = (df[f'filtered_cell_type'] == cell_type) & df[f'res_{marker}_pos']
    df[f'res_{marker}_pos_subset'] = df[f'res_{marker}_pos_subset'].astype(int)+2
    df.loc[df[f'filtered_cell_type'] != cell_type, f'res_{marker}_pos_subset'] = 0
    

    set_rc(6,6)
    subset = df.loc[df['meta'] == 'Tumor (H&E)']
    subset = subset.loc[subset['filtered_cell_type'] == cell_type]
    sns.histplot(x='autofluorescence', y=f'res_{marker}', data=subset.reset_index(), )
    plt.axhline(min_residual, c='r')
    plt.show()
        
    
    print(named_color_map)
    cell_samples = df.loc[(case_id, tumor_parses.loc[case_id].index.values),:]

    crit = cell_samples.value_counts([f'fov','filtered_cell_type',],sort=False).loc[(slice(None),cell_type)] > 25
    filtered_indices = get_indices(crit)
    
    alt_crit = cell_samples.value_counts([f'fov',f'res_{marker}_pos_subset',],sort=False).loc[(slice(None),3)] > min_pos_cells
    alt_indices = get_indices(alt_crit)
    
    if len(alt_indices) >= n_samples:
        print('Found examples')
        tumor_parses_subset = tumor_parses.loc[(case_id, alt_indices),:]
        print(len(tumor_parses_subset))
    else:
        print('cant find enough examples')
        tumor_parses_subset = tumor_parses.loc[(case_id, filtered_indices),:]
        print(len(tumor_parses_subset))


    parse_samples, seg, data, fill_mask, fov_calls, store = visualize_fov_samples_postcutoff(case_id, df, tumor_parses_subset,
                                                                                             channels=channels, lower_clip_percentile=5, upper_clip_percentile=95,
                                                                                             cell_label_col=f'res_{marker}_pos_subset', n_samples=n_samples,
                                     mask_key={0:0, 1:1, 2:2}, full_color_map=color_map, return_samples=True )

In [None]:
subtype_cutoffs = {
    'residual_pd1_in_cd8+': 0.133567,
    'residual_pdl1_in_tumor+':0.108897,
}

In [None]:
residual_rerun['filtered_cell_type'].unique()

In [None]:
residual_rerun['detailed_cell_type'] = residual_rerun['filtered_cell_type']

In [None]:
crit = (residual_rerun['filtered_cell_type'] == 'filtered_cell_label_cd8+') & (residual_rerun['res_pd1'] >= subtype_cutoffs['residual_pd1_in_cd8+'])
residual_rerun.loc[crit, 'detailed_cell_type'] = 'CD8+ PD1 High'

crit = (residual_rerun['filtered_cell_type'] == 'filtered_cell_label_cd8+') & (residual_rerun['res_pd1'] < subtype_cutoffs['residual_pd1_in_cd8+'])
residual_rerun.loc[crit, 'detailed_cell_type'] = 'CD8+ PD1 Low'


crit = (residual_rerun['filtered_cell_type'] == 'filtered_cell_label_tumor+') & (residual_rerun['res_pdl1'] >= subtype_cutoffs['residual_pdl1_in_tumor+'])
residual_rerun.loc[crit, 'detailed_cell_type'] = 'Tumor+ PDL1 High'

crit = (residual_rerun['filtered_cell_type'] == 'filtered_cell_label_tumor+') & (residual_rerun['res_pdl1'] < subtype_cutoffs['residual_pdl1_in_tumor+'])
residual_rerun.loc[crit, 'detailed_cell_type'] = 'Tumor+ PDL1 Low'


In [None]:
residual_rerun.value_counts(['detailed_cell_type'])

In [None]:
f_update

In [None]:
f_rerun = '/mnt/disks/image_data/immunoprofile/ccrcc_subset_processing/ccrcc_ip14_arcsinh_quant_agg_with_basic_cutoffs__rerun_correct_grade_seg__hybrid_manual_global_louvain__morph_flag__cellsubtypes.pkl'
residual_rerun.to_pickle(f_rerun)