#### what are the strages to run CISM?

#### 1.Creating the Graph Object and building it
The Graph object contains the follows:
* cells_csv: the df of the cells data
* common_cells_mapper: the mapper of the cells
* colnames_mapper_dict: the column names mapper to tell the graph builder where exactly is the information of the cells types and patient id

After creating the object, we need to call the build_graph method, which takes the following args:
* path_to_output_dir (str): where to output the files
* max_distance (int): the maximum distance between cells to be considered neighbours (the resolution is 0.5 µm).
* exclude_cell_type (str): ???? 

#### 2. Initiate the CISM
The CISM class initiates with the follwoing args:
* fanmod_path (str): filesystem path to fanmod+ tool.
* fanmod_exe (str): the filename of fanmod+ executable.
* network_dataset_root_path (str): filesystem path to the network dataset root directory.
* fanmod_output_root_path (str): filesystem path to output fanmod+ artifacts in leda format
* fanmod_cache_root_path (str): filesystem path to output fanmod+ cache artifacts
* motif_size (int): the number of nodes in each motif
* iterations (int): the number of random networks to be generated for assessing motif significance

After initiating the CISM object itself, we run the add_dataset method. which receives:
* dataset_folder (str): the relative filesystem sub-folder of the dataset
* dataset_type (str): the dataset type
* dataset_name (str): the dataset name
* n_jobs (int): number of jobs allows running in parallel.
* prefer (str): joblib Parallel prefer parameter.
* force_run_fanmod (bool): whether to override fanmod+ existing results.
* force_parse (bool): whether to override existing parse files of fanmod+ outputs.

#### 3. Create the discriminator object for investigating motifs
We create the TissueStateDiscriminativeMotifs object, which holds many methods for investigating our motifs.
This class is constructed by the following attrbiutes:
* CISM - the cism object we built in the previous stage
* tissue_state_csv_path (str) - 
* tissue_state_to_string (str) - 
* common_cells_type (dict)
* tissue_state_func (func) - a method needed for transforming the column  

#### 4. run the main loop of discrimination


In [1]:
# imports
import pandas as pd
import pathlib
import seaborn as sns
import numpy as np
import sys
import os

In [2]:
# constants
raw_csv_file = r"./for_amos_23_12_24.csv" # updated data for amos with new cell mapping
root_data_dir =  '../../data'
output_graphs_dir = '/Barak_testing_with_new_data/'

raw_patient_class_file_name = 'patient_class.csv'
raw_patient_class_file = f'./{raw_patient_class_file_name}'

FANMOD_path = "C://Users//milsh//OneDrive//שולחן העבודה//FANMODPlus//out//build//x64-debug" # Where Fanmod.exe is located
FANMOD_exe = "LocalFANMOD.exe"
output_dir = './../../fanmod_output'
cache_dir = './../../parse_cache'

# We need this part because of importing issues of CISM project objects
sys.path.append('../')
os.chdir(pathlib.Path.cwd())

#### Stage 1

In [3]:
raw_df = pd.read_csv(filepath_or_buffer=raw_csv_file, index_col=0)
raw_df

Unnamed: 0,fov,patient number,meta_pred,grouped_meta_phenotype,cell_id,Group,centroid-0,centroid-1
0,FOV10,3,Unidentified,Unidentified,5,NP,3.343373,333.198795
1,FOV10,3,CD4 T cell,Mem CD4 T cell,11,NP,2.814159,1764.212389
2,FOV10,3,Vessels,Hevs,15,NP,3.445860,356.878981
3,FOV10,3,CD8 T cell,Other CD8 T cell,26,NP,3.671756,1738.076336
4,FOV10,3,CD4 T cell,Mem CD4 T cell,27,NP,3.485714,1789.857143
...,...,...,...,...,...,...,...,...
1652269,FOV98,104,Bcell,Germinal center cell,12395,PP,2022.567308,1446.775641
1652270,FOV98,104,CD4 T cell,Naive CD4 T cell,12401,PP,2024.406977,405.038760
1652271,FOV98,104,CD4 T cell,Naive CD4 T cell,12437,PP,2028.045000,496.570000
1652272,FOV98,104,Bcell,B cell,12457,PP,2032.712766,1208.500000


In [4]:
raw_df['meta_pred'].unique()

array(['Unidentified', 'CD4 T cell', 'Vessels', 'CD8 T cell', 'Immune',
       'Mac', 'Stroma', 'Bcell', 'DCs', 'Tumor'], dtype=object)

In [5]:
raw_df.groupby('Group')['fov'].nunique()

Group
NN    40
NP    36
PN    42
PP    59
Name: fov, dtype: int64

In [8]:
raw_df = pd.read_csv(filepath_or_buffer=raw_csv_file, index_col=0)

# mapping between the colomn in the data to my wanted mapping
# In tutorial it is between number (pred column which is number) to a string
# We here use mapping between string to another string
cells_mapper = {
    'Bcell': 'B cell',
    'CD4 APC': 'CD4 APC',
    'CD4 T cell': 'CD4 T cell',
    'CD4 Treg': 'CD4 Treg',
    'CD8 T cell': 'CD8 T cell',
    'DCs': 'DCs',
    'Germinal Center B cell': 'Germinal Center B cell',
    'Hevs': 'Hevs',
    'Mac': 'Mac',
    'Memory CD4 T Cell': 'Memory CD4 T Cell',
    'NK cell': 'NK cell',
    'Neutrophil': 'Neutrophil',
    'Stroma': 'Stroma',
    'Tumor': 'Tumor',
    'Unidentified': 'Unidentified',
    'Vessels': 'Vessels',
    'Immune': 'Immune'
}
colnames_mapper_dict = {'cell_types' : 'meta_pred', #here we configure which column holds the information of cell type
                        'patient_id' : 'patient number'}


In [23]:
from cism.graph.create_formatted_graph import GraphBuilder

g = GraphBuilder(raw_df, cells_mapper, colnames_mapper_dict)

# create the folder if not exist
pathlib.Path(root_data_dir + output_graphs_dir).mkdir(exist_ok=True, parents=True)

g.build_graph(path_to_output_dir=root_data_dir + output_graphs_dir, max_distance=100, exclude_cell_type=None)

KeyboardInterrupt: 

In [9]:
from cism.graph.create_formatted_graph import GraphBuilder

g = GraphBuilder(raw_df, cells_mapper, colnames_mapper_dict)

cells_type = {v:k for k, v in g.common_cell_type_mapper.items()}
cells_type

{0: 'B cell',
 1: 'CD4 T cell',
 2: 'CD8 T cell',
 3: 'DCs',
 4: 'Immune',
 5: 'Mac',
 6: 'Stroma',
 7: 'Tumor',
 8: 'Unidentified',
 9: 'Vessels'}

### Stage 2

In [10]:
from cism.cism import CISM

motif_size = 3
iterations = 1000

cism = CISM(fanmod_exe=FANMOD_exe,
            fanmod_path=FANMOD_path,
            network_dataset_root_path=root_data_dir,
            fanmod_output_root_path=output_dir,
            fanmod_cache_root_path=cache_dir,
            motif_size=motif_size,
            iterations=iterations)


# adding the dataset
cism.add_dataset(output_graphs_dir, 
                 'Disease', 
                 'Melanoma',
                 force_run_fanmod=False, 
                 force_parse=False, 
                 n_jobs=12)

  0%|          | 0/177 [00:00<?, ?it/s]

C://Users//milsh//OneDrive//שולחן העבודה//FANMODPlus//out//build//x64-debug/LocalFANMOD.exe -i ../../data/Barak_testing_with_new_data//Patient_100_FOVFOV308.txt -o ./../../fanmod_output/Barak_testing_with_new_data//3//100_FOVFOV308.csv -r 1000 -s 3 --colored_vertcies
C://Users//milsh//OneDrive//שולחן העבודה//FANMODPlus//out//build//x64-debug/LocalFANMOD.exe -i ../../data/Barak_testing_with_new_data//Patient_100_FOVFOV306.txt -o ./../../fanmod_output/Barak_testing_with_new_data//3//100_FOVFOV306.csv -r 1000 -s 3 --colored_vertcies
C://Users//milsh//OneDrive//שולחן העבודה//FANMODPlus//out//build//x64-debug/LocalFANMOD.exe -i ../../data/Barak_testing_with_new_data//Patient_102_FOVFOV84.txt -o ./../../fanmod_output/Barak_testing_with_new_data//3//102_FOVFOV84.csv -r 1000 -s 3 --colored_vertcies
C://Users//milsh//OneDrive//שולחן העבודה//FANMODPlus//out//build//x64-debug/LocalFANMOD.exe -i ../../data/Barak_testing_with_new_data//Patient_100_FOVFOV302.txt -o ./../../fanmod_output/Barak_testin

C://Users//milsh//OneDrive//שולחן העבודה//FANMODPlus//out//build//x64-debug/LocalFANMOD.exe -i ../../data/Barak_testing_with_new_data//Patient_112_FOVFOV212.txt -o ./../../fanmod_output/Barak_testing_with_new_data//3//112_FOVFOV212.csv -r 1000 -s 3 --colored_vertcies
C://Users//milsh//OneDrive//שולחן העבודה//FANMODPlus//out//build//x64-debug/LocalFANMOD.exe -i ../../data/Barak_testing_with_new_data//Patient_114_FOVFOV322.txt -o ./../../fanmod_output/Barak_testing_with_new_data//3//114_FOVFOV322.csv -r 1000 -s 3 --colored_vertcies
C://Users//milsh//OneDrive//שולחן העבודה//FANMODPlus//out//build//x64-debug/LocalFANMOD.exe -i ../../data/Barak_testing_with_new_data//Patient_114_FOVFOV324.txt -o ./../../fanmod_output/Barak_testing_with_new_data//3//114_FOVFOV324.csv -r 1000 -s 3 --colored_vertcies
C://Users//milsh//OneDrive//שולחן העבודה//FANMODPlus//out//build//x64-debug/LocalFANMOD.exe -i ../../data/Barak_testing_with_new_data//Patient_115_FOVFOV326.txt -o ./../../fanmod_output/Barak_test

KeyboardInterrupt: 

### Stage 3

In [None]:
# Create the tissue_state_csv which includes the patients id and his class (NN/NP etc..)
df = raw_df.drop(['fov', 'meta_pred', 'grouped_meta_phenotype', 'cell_id', 'centroid-0', 'centroid-1'], axis=1)
df.drop_duplicates(subset=['patient number', 'Group'], inplace=True)
df.to_csv(raw_patient_class_file_name, index=False)

In [None]:
from cism.cism import TissueStateDiscriminativeMotifs
from cism.cism import DiscriminativeFeatureKey

# In TNBC, this func used to be make the TNBC data which is continous to be 0 (Short) or 1 (Long)
# here, we dont really need it
tissue_state_func =  

discriminator = TissueStateDiscriminativeMotifs(cism=cism,
                                                tissue_state_csv_path=raw_patient_class_file,
                                                tissue_state_to_string=None,
                                                common_cells_type=cells_type)

### Stage 4

In [17]:
from cism.cism import HardDiscriminativeFC, SoftDiscriminativeFC, DiscriminativeFeatureKey
import itertools

results_table = None

for left_class, right_class in itertools.combinations(['NN', 'NP'], 2):
    for th in [0.2, 0.3, 0.4, 0.42, 0.44, 0.46, 0.48, 0.5, 0.6, 0.7, 0.72, 0.74, 0.76, 0.8]:
        try:
            print(f'task: {left_class} - {right_class}')
            featureConf = HardDiscriminativeFC(
                extract_by=DiscriminativeFeatureKey.STRUCTURE_AND_CELL_IDENTITIES,
                use_cells_type_composition=False,
                use_motifs=True,
                shared_percentage=th,
                max_class_features=30,
                labels=[left_class, right_class])

            motif_space_features = (discriminator
                                    .get_features(feature_conf=featureConf,
                                                  exclude_patients=[],
                                                  n_jobs=1))

            optimal_result = (discriminator
                              .analyze_motifs(feature_conf=featureConf,
                                              exclude_patients=[],
                                              n_jobs=1))

            print(f'classes: {[left_class, right_class]} '
                  f'th:{th} score: {optimal_result.get_roc_auc_score()}')

            all_motif_features = []
            for idx, row in motif_space_features.iterrows():
                all_motif_features += row['features']
            all_motif_features = list(set(all_motif_features))

            motif_freq = []
            motif_count = []
            motifs_dataset = discriminator.cism.motifs_dataset
            for motif_id in all_motif_features:
                motif_mean_freq = motifs_dataset[motifs_dataset['ID'] == motif_id]['Freq'].mean()
                motif_mean_count = motifs_dataset[motifs_dataset['ID'] == motif_id]['Count'].mean()
                motif_freq.append(motif_mean_freq)
                motif_count.append(motif_mean_count)

            results_table = pd.concat([pd.DataFrame(
                                            {'th': th,
                                             'task': left_class + '-' + right_class,
                                             'left_class': left_class,
                                             'right_class': right_class,
                                             'roc_auc_score': optimal_result.get_roc_auc_score(),
                                             'all_motif_features': all_motif_features,
                                             'motif_mean_freq': motif_freq,
                                             'motif_mean_count': motif_count}),
                                             results_table], ignore_index=True)
        except:
            break