## Configuration

In [1]:
from collections import Counter

import pandas as pd

FANMOD_path = "C:\\Users\\User\\source\\repos\\fanmod-cmd\\out\\build\\x64-release"
FANMOD_exe = "LocalFANMOD.exe"

raw_data_folder = './../../data'
raw_data_folder_dataset = './TNBC_MIBITOF'
output_dir = './../../fanmod_output'
cache_dir = './../../parse_cache'
results_dir = './../../results/TNBC_MIBITOF'
motif_size = 3
iterations = 1000
force_run_fanmod = False
force_parse = False

In [2]:
cells_type = {
 0: 'B cells',
 1: 'CD3 T cells',
 2: 'CD4 t cells',
 3: 'CD8 T cells',
 4: 'DC',
 5: 'DC/Mono',
 6: 'Endothelial',
 7: 'Immune other',
 8: 'Macrophages',
 9: 'Mesenchyme',
 10: 'Mono/Neu',
 11: 'NK cells',
 12: 'Neutrophils',
 13: 'Tregs',
 14: 'Tumor',
 15: 'Unidentified'
}

Distant_metastases_class = {
    'POSITIVE': 'POSITIVE',
    'NEGATIVE': 'NEGATIVE',
}

survival_rate_cutoff = 1000

def map_survival_rate_to_class(survival_rate: int):
    if survival_rate<survival_rate_cutoff:
        return 'POSITIVE'
    else:
        return 'NEGATIVE'

class_to_color = {
    'POSITIVE': 'green',
    'NEGATIVE': 'orange',
}

## Steps
1. Initiate CISM object
2. Load datasets
3. Initiate CISMTask

In [3]:
from cism.cism import CISM

In [4]:
cism = CISM(fanmod_exe=FANMOD_exe,
            fanmod_path=FANMOD_path,
            network_dataset_root_path=raw_data_folder,
            fanmod_output_root_path=output_dir,
            fanmod_cache_root_path=cache_dir,
            motif_size=motif_size,
            iterations=iterations)

# adding Melanoma dataset
cism.add_dataset(raw_data_folder_dataset, 'Disease', 'TNBC', force_run_fanmod=force_run_fanmod, force_parse=force_parse, n_jobs=1, quantile_threshold=1)

  0%|          | 0/42 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

In [5]:
import pandas as pd

survival_days_df = pd.read_csv(raw_data_folder + raw_data_folder_dataset + '/patient_class_survival_days.csv', names=['Patient', 'Survival Days'])

In [6]:
from cism.cism import TissueStateDiscriminativeMotifs
from cism.cism import DiscriminativeFeatureKey

discriminator = TissueStateDiscriminativeMotifs(cism=cism,
                                                tissue_state_csv_path=raw_data_folder + raw_data_folder_dataset + '/patient_class_survival_days.csv',
                                                tissue_state_to_string=None,
                                                common_cells_type=cells_type,
                                                tissue_state_func=map_survival_rate_to_class)

In [7]:
discriminator.get_patients_class(classes=None).groupby('patient_class').count()

Unnamed: 0_level_0,patient_class_id
patient_class,Unnamed: 1_level_1
NEGATIVE,30
POSITIVE,7


In [8]:
discriminator.get_patients_class(classes=None).sort_values('patient_class')

Unnamed: 0,patient_class_id,patient_class
TNBC1,2612,NEGATIVE
TNBC29,1319,NEGATIVE
TNBC3,3130,NEGATIVE
TNBC31,1009,NEGATIVE
TNBC32,1568,NEGATIVE
TNBC33,1738,NEGATIVE
TNBC34,2832,NEGATIVE
TNBC28,3767,NEGATIVE
TNBC35,2759,NEGATIVE
TNBC37,2853,NEGATIVE


In [13]:
results_table = None

In [14]:
from cism.cism import HardDiscriminativeFC, SoftDiscriminativeFC, DiscriminativeFeatureKey
import itertools

for left_class, right_class in itertools.combinations(['POSITIVE', 'NEGATIVE'], 2):
    for th in [0.2, 0.3, 0.4, 0.45, 0.5, 0.6, 0.7, 0.75, 0.8]:
        try:
            print(f'task: {left_class} - {right_class}')
            featureConf = HardDiscriminativeFC(
                extract_by=DiscriminativeFeatureKey.STRUCTURE_AND_CELL_IDENTITIES,
                use_cells_type_composition=False,
                use_motifs=True,
                shared_percentage=th,
                max_class_features=30,
                labels=[left_class, right_class])

            motif_space_features = (discriminator
                                    .get_features(feature_conf=featureConf,
                                                  exclude_patients=[],
                                                  n_jobs=1))

            optimal_result = (discriminator
                              .analyze_motifs(feature_conf=featureConf,
                                              exclude_patients=[],
                                              n_jobs=1))

            print(f'classes: {[left_class, right_class]} '
                  f'th:{th} score: {optimal_result.get_roc_auc_score()}')

            all_motif_features = []
            for idx, row in motif_space_features.iterrows():
                all_motif_features += row['features']
            all_motif_features = list(set(all_motif_features))

            motif_freq = []
            motif_count = []
            motifs_dataset = discriminator.cism.motifs_dataset
            for motif_id in all_motif_features:
                motif_mean_freq = motifs_dataset[motifs_dataset['ID'] == motif_id]['Freq'].mean()
                motif_mean_count = motifs_dataset[motifs_dataset['ID'] == motif_id]['Count'].mean()
                motif_patients_count = motifs_dataset[motifs_dataset['ID'] == motif_id]['Patient_uId'].nunique()
                motif_freq.append(motif_mean_freq)
                motif_count.append(motif_mean_count)

            results_table = pd.concat([pd.DataFrame(
                                            {'th': th,
                                             'task': left_class + '-' + right_class,
                                             'left_class': left_class,
                                             'right_class': right_class,
                                             'roc_auc_score': optimal_result.get_roc_auc_score(),
                                             'all_motif_features': all_motif_features,
                                             'motif_mean_freq': motif_freq,
                                             'motif_mean_count': motif_count,
                                             'patient_count': motif_patients_count}),
                                             results_table], ignore_index=True)
        except Exception as e:
            print(e.with_traceback())
            break

task: POSITIVE - NEGATIVE


  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

classes: ['POSITIVE', 'NEGATIVE'] th:0.2 score: 0.6309523809523809
task: POSITIVE - NEGATIVE


  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

classes: ['POSITIVE', 'NEGATIVE'] th:0.3 score: 0.8142857142857143
task: POSITIVE - NEGATIVE


  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

classes: ['POSITIVE', 'NEGATIVE'] th:0.4 score: 0.819047619047619
task: POSITIVE - NEGATIVE


  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

classes: ['POSITIVE', 'NEGATIVE'] th:0.45 score: 0.8523809523809524
task: POSITIVE - NEGATIVE


  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

classes: ['POSITIVE', 'NEGATIVE'] th:0.5 score: 0.7333333333333333
task: POSITIVE - NEGATIVE


  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

classes: ['POSITIVE', 'NEGATIVE'] th:0.6 score: 0.7333333333333334
task: POSITIVE - NEGATIVE


  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

classes: ['POSITIVE', 'NEGATIVE'] th:0.7 score: 0.7333333333333333
task: POSITIVE - NEGATIVE


  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

classes: ['POSITIVE', 'NEGATIVE'] th:0.75 score: 0.7333333333333334
task: POSITIVE - NEGATIVE


  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

TypeError: BaseException.with_traceback() takes exactly one argument (0 given)

In [15]:
results_table.to_csv(results_dir+f'/benchmark_across_th_{motif_size}_max_motifs.csv')