# Community Detection Process
* Last Updated: 1-March-2023
* Pipeline for determining gamma range to run community detection on
* Factors that contribute to determining gamma range:
    - (1) Percentage of nodes in the top 8 modules
    - (2) How well a partition recovers the reference partition
    - (3) How much of the Auditory network gets separated into it's own module

In [None]:
from collections import Counter
import pickle as pkl
import os
import sys
import Towlson_group_code.data_io as myFunc
import Towlson_group_code.community_detection.functional_brain_community as brain_community
import OAM.OAM_functions as ov_fct

print(sys.version)
BRAINNET_PATH = '../Ovarian_hormone/BrainNet_Viewer/'
PICKLE_PATH = '../Ovarian_hormone/pickles/'
FIGURE_PATH = '../Ovarian_hormone/Figures/'

In [None]:
def split_ranges(grange):
    j = 0
    ranges = []
    while j < len(grange):
        lower = grange[j]
        upper = grange[j+1]
        n = round((upper-lower)/0.001)
        j += 2
        if n == 50:
            ranges.append((lower, upper+0.001))
        else:
            while n > 50:
                ranges.append((lower, lower + 50*0.001))
                lower = lower + 50*0.001
                if lower == upper:
                    ranges.append((lower, upper+0.001))
                    n = 0
                    break
                else:
                    n = round((upper-lower)/0.001)
            if n > 0:
                ranges.append((lower, upper+0.001))
    return ranges

def community_detection2(connectome, gamma):
    partitions, modularity = brain_community.get_partitions(connectome, gamma, B='negative_asym', rep = 20)
    return partitions, modularity

def find_icns(reference_partition, reference_FN_assignment, partition):
    ref_modules = ov_fct.get_modules(reference_partition, reference_FN_assignment)
    my_modules, partition_norm = ov_fct.get_all_modules(partition)
    ref_preference = ov_fct.get_preference_list(ref_modules, my_modules)
    pref = ov_fct.get_preference_list(my_modules, ref_modules)
    matching = ov_fct.get_poly_matching(pref, ref_preference, poly_threshold=0.6)
    merged_partition = ov_fct.merge_poly_modules(partition_norm, matching)
    new_modules = ov_fct.get_modules(merged_partition, reference_FN_assignment)

    inverse_map = {v: k for k, v in reference_FN_assignment.items()}
    fn_overlap = {}
    avg_overlap = 0
    for fn_name in ['Visual', 'Somamotor', 'DMN', 'Auditory']:
        fn_idx = inverse_map[fn_name]
        overlap = ref_modules[inverse_map[fn_name]].intersection(new_modules[fn_idx])
        denom = len(ref_modules[inverse_map[fn_name]])
        if denom == 0:
            print(f"The reference partition is missing the {fn_name} network?!")
            return [], {},0
        overlap = (len(overlap)*1.0)/(denom*1.0)
        fn_overlap[fn_name] = overlap
        if fn_name in ['Visual', 'Auditory']:
            avg_overlap += overlap
    return merged_partition, fn_overlap, avg_overlap

def find_optimal_range(reference_partition, reference_FN_assignment, connectome):
    min_g = 1.1
    max_g = 1.3
    i = 0
    history = []
    error = False
    while (max_g-min_g)/0.001 > 100:
        partitions, modularity  = community_detection2(connectome, min_g)
        min_partition = brain_community.get_best_partition(partitions)
        if len(Counter(min_partition)) < 8:
            min_g += 0.05
            max_g += 0.05
            continue
        min_merged_partition, min_fn_overlap, min_avg_overlap = find_icns(reference_partition, reference_FN_assignment, min_partition)
        if len(min_merged_partition) == 0:
            error = True
            break

        partitions, modularity = community_detection2(connectome, max_g)
        max_partition = brain_community.get_best_partition(partitions)
        if len(Counter(max_partition)) < 8:
            min_g += 0.1
            max_g += 0.1
            continue
        max_merged_partition, max_fn_overlap, max_avg_overlap = find_icns(reference_partition, reference_FN_assignment, max_partition)
        if len(min_merged_partition) == 0:
            error = True
            break

        top_8 = [y for x,y in Counter(min_partition).most_common(8)]
        top_8_sum = sum(top_8)
        print(f"\t Min gamma {min_g}: {len(Counter(min_partition))} communities. {top_8_sum*100/1054:0.2f}% are in top 8.", min_fn_overlap)
        if (top_8_sum*100/1054) > 70:
            history.append((min_g, top_8_sum*100/1054, min_fn_overlap['Auditory'], sum(min_fn_overlap.values())))

        top_8 = [y for x,y in Counter(max_partition).most_common(8)]
        top_8_sum = sum(top_8)
        print(f"\t Max gamma {max_g}: {len(Counter(max_partition))} communities. {top_8_sum*100/1054:0.2f}% are in top 8.", max_fn_overlap)
        top_8_max = top_8_sum*100/1054
        if (top_8_sum*100/1054) > 70:
            history.append((max_g, top_8_sum*100/1054, max_fn_overlap['Auditory'], sum(max_fn_overlap.values())))

        i += 1
        # Do we see good Visual+Auditory at min_g?
        if min_fn_overlap['Visual']>=0.75 and min_fn_overlap['Auditory'] >= 0.5:
            # print("Good overlap @ min: ", min_fn_overlap)
            # print(f"\t with {len(Counter(min_partition))} communities go merged into {len(Counter(min_merged_partition))}")
            max_g = min_g + 0.12
            break
        elif max_fn_overlap['Visual']>=0.75 and max_fn_overlap['Auditory'] >= 0.5:
            # print("Good overlap @ max: ", max_fn_overlap)
            # print(f"\t with {len(Counter(max_partition))} communities go merged into {len(Counter(max_merged_partition))}")
            min_g += 0.05
        else:
            if top_8_max < 70:
                # Go through the history list and find the best gamma we have
                best_history = sorted(history, key = lambda x: x[2], reverse=True)[0]
                if best_history[2] < 0.01:
                    best_history = sorted(history, key = lambda x: x[3], reverse=True)[0]
                # print("Best history: ", best_history)
                if best_history[1] >= 75:
                    return best_history[0] - 0.06, best_history[0] + 0.06
                else:
                    return best_history[0] - 0.1, best_history[0] + 0.02
            else:
                min_g = max_g
                max_g += 0.12
                print(f"Skipping new range to: {min_g} - {max_g}")

    if error:
        return -1, -1

    if (max_g - min_g)/0.001 < 120:
        return max_g - 0.12, max_g
    return min_g, max_g

def load_connectome(idx, phase):
    path = f'../Ovarian_hormone/pickles/individual_connectomes/{idx}/'
    for root, dirs, files in os.walk(path):
        for file in files:
            if not file.endswith('.pkl'):
                continue
            if f'{phase}' in file:
                with open(root+file, 'rb') as f:
                    return pkl.load(f)
    return []

ef_input_params = myFunc.load_from_pickle(PICKLE_PATH+"/individual_connectomes/", "ef_input_params.pkl")
ef_name_to_idx = {x[0]: x[1] for x in ef_input_params.values()}

Load every cycle in and use the previous phase partition as the reference partition. For the EF phase of each
individual, the reference partition is the best EF partition of the averaged EF phase connectome.

In [None]:
all_cycles = myFunc.load_from_pickle('../Ovarian_hormone/pickles/', 'all_cycles.pkl')

reference_FN_assignment = ov_fct.AVG_EF_FN_AUDITORY
# reference_partition = myFunc.load_from_pickle(PICKLE_PATH, f'EF_best_partition_auditory.pkl')
input_params = {}
fname_i = 1
c = 0
bad_refs = []
# input_params = myFunc.load_from_pickle('../Ovarian_hormone/pickles/individual_connectomes/', 'ef_input_params.pkl')
for ef_name, lf_name, ml_name in all_cycles:
    ref_name = ef_name
    network_name = ml_name
    print(c, ref_name, network_name)
    idx = ef_name_to_idx[ef_name+".pkl"]
    c += 1
    # ********************************************************************
    reference_partition = myFunc.load_from_pickle(PICKLE_PATH+"/best_subject_auditory/", f'{ref_name}_auditory.pkl')
    connectome = load_connectome(idx, network_name)
    print(f"Processing {network_name}")
    # ********************************************************************

    min_r, max_r = find_optimal_range(reference_partition, reference_FN_assignment, connectome)
    if (min_r == -1):
        bad_refs.append((c, ref_name))
        continue
    gamma_ranges = split_ranges([min_r, max_r])
    print("Returned ranges: ", min_r, max_r, gamma_ranges)
    if len(gamma_ranges) != 3:
        print("****** WARNING ******")
        print("\n"*2)
    input_params[fname_i] = [network_name+".pkl", idx, gamma_ranges]
    fname_i += 1

    print("\n"*2)
    # myFunc.save_to_pickle(input_params, '../Ovarian_hormone/pickles/individual_connectomes/', 'ml_input_params_2.pkl')

---
# Community Detection of Yeo et al.
Code for assembling partition info from LUT file for Yeo et al's cortical parcellation.
- DO NOT USE FOR OTHER PURPOSES

### Calculating color distance
Conclusion:
- If delta_e (color distance) is > 10, color changed.
- Extracting partition list of Yeo et al.'s 7 network partition: new_lut

In [None]:
from colormath.color_objects import sRGBColor, LabColor
from colormath.color_conversions import convert_color
from colormath.color_diff import delta_e_cie2000

def update_color_sample(a_color, my_sample, my_lut):
    # Check if new color is inside color sample. If it is, we've seen this color before.
    new_color = True
    community_num = -1
    for j, color in enumerate(my_sample):
        delta_c = delta_e_cie2000(a_color, color)
        if delta_c < 10:
            new_color = False
            my_lut.append(j+1)
            community_num = j+1
            break
    if new_color:
        my_sample.append(a_color)
        community_num = len(my_sample)
        my_lut.append(community_num)
    return my_sample, my_lut, community_num

# lut_f = '../Ovarian_hormone/Brain_Atlas/Schaefer2018_1000Parcels_7Networks_order.lut'
# lut_f = '../Ovarian_hormone/Brain_Atlas/Schaefer2018_1000Parcels_17Networks_order.lut'
lut_f = '../PREVENT_Study/Brain_Atlas/Schaefer2018_200Parcels_7Networks_order.lut'
f = open(lut_f, "r")
file_content = f.readlines()
prev_lab_color = None
# List of unique colors in LUT file
color_sample = []
new_lut = []
current_community = -1
for i, l in enumerate(file_content):
    line = l.strip()
    line = line.split(" ")
    if i == 0:
        c = sRGBColor(float(line[1]), float(line[2]), float(line[3]))
        prev_lab_color = convert_color(c, LabColor)
        # First color always unique. Add to color sample.
        color_sample.append(prev_lab_color)
        current_community = len(color_sample)
        new_lut.append(current_community)
        continue
    r = float(line[1])
    g = float(line[2])
    b = float(line[3])
    current_color = sRGBColor(r, g, b)
    current_lab_color = convert_color(current_color, LabColor)
    delta_e = delta_e_cie2000(current_lab_color, prev_lab_color)
    if delta_e > 10:
        # print(i, i+1, line[4], delta_e)
        color_sample, new_lut, current_community = update_color_sample(current_lab_color, color_sample, new_lut)
    else:
        # Same color => same community
        new_lut.append(current_community)
    prev_lab_color = current_lab_color

In [None]:
# Yeo et al's partition constructed. Saving:
PICKLE_PATH = '../PREVENT_Study/pickles/'
myFunc.save_to_pickle(new_lut, PICKLE_PATH, 'yeo_7_network_partition_200')

# Comparing Yeo et al's 7 partition network to our partitions
Calculate NMI value between Yeo's partition with our best partitions.
- [X] EF
- [X] LF
- [X] ML

In [None]:
from sklearn.metrics.cluster import normalized_mutual_info_score
my_best_partition_files = ['EF_best_partition_76-26-09-2022.pkl', 'LF_best_partition_91-26-09-2022.pkl', 'ML_best_partition_7-26-09-2022.pkl']
# yeo_partition_file = 'yeo_7_network_partition.pkl'
yeo_partition_file = 'yeo_17_network_partition.pkl'
yeo_partition = myFunc.load_from_pickle(PICKLE_PATH, yeo_partition_file)

for file in my_best_partition_files:
    my_partition = myFunc.load_from_pickle(PICKLE_PATH, file)
    my_cortical = my_partition[:1000]
    nmi = normalized_mutual_info_score(yeo_partition, my_cortical)
    print(f"{file[:2]} partition has a NMI value of {nmi} compared to Yeo's 17 network partition.*")

print("*WARNING: comparison done above only uses cortical data since Yeo et al. did not have subcortical data.")

# Subject Level Community Detection preparation for ARC
## Print out correctly divided gamma ranges for running on ARC
On ARC if the community detection goes above 50 gamma checks, it may end due to memory out of bound or time exceeded.
Hence it is best to divide the desired gamma range to subranges of maximum 50 gammas. Script below will take the desired range
and split it accordingly to create the start and end input conditions for running subject level community detection on ARC.

In [None]:
PICKLE_PATH = '../Ovarian_hormone/pickles/individual_connectomes/'
redo_gamma_ranges = myFunc.load_from_pickle(PICKLE_PATH,'gamma_ranges_3.pkl')

# gamma_ranges = myFunc.load_from_pickle(PICKLE_PATH,'gamma_ranges_1.pkl')
IDX_LIST = [0,1,10,12,13,14,15,18,2,20,22,23,27,28,29,3,4,5,6,7,8,9]

def load_data_filenames(idx, N):
    path = f'../Ovarian_hormone/ARC/subject_level/Rough_Gamma_Results/{idx}'
    data_list = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if not file.endswith('.pkl'):
                continue
            if f'gamma-{N}' in file:
                data_list.append(file)
    return data_list

def split_ranges(grange):
    j = 0
    ranges = []
    while j < len(grange):
        lower = grange[j]
        upper = grange[j+1]
        n = round((upper-lower)/0.001)
        j += 2
        if n == 50:
            ranges.append((lower, upper+0.001))
        else:
            while n > 50:
                ranges.append((lower, lower + 50*0.001))
                lower = lower + 50*0.001
                if lower == upper:
                    ranges.append((lower, upper+0.001))
                    n = 0
                    break
                else:
                    n = round((upper-lower)/0.001)
            if n > 0:
                ranges.append((lower, upper+0.001))
    return ranges

i = 0
input_params = {}
# redo_list = list(redo_gamma_ranges.keys())
redo_list = [fn[:13] for fn, r in redo_gamma_ranges.items() if len(r) > 0]
print(redo_list)
for idx in IDX_LIST:
    data_list = load_data_filenames(idx, 1)
    for fname in data_list:
        name = name = fname[:13]
        # ranges = gamma_ranges[fname]
        if name in redo_list:
            ranges = redo_gamma_ranges[name+"_result.pkl"]
            new_ranges = split_ranges(ranges)
            input_params[i] = [name+".pkl", idx, new_ranges]
            print(i, idx, name, new_ranges)
        i += 1

myFunc.save_to_pickle(input_params, path=PICKLE_PATH, pickle_name='input_params_3.pkl')
print("\n"*2)
mystr = 'sbatch --array='
for k in list(input_params.keys()):
    mystr += str(k)+','
    if k % 10 == 0 and k != 0:
        print(mystr[:-1] + " ")
        mystr = 'sbatch --array='
print(mystr[:-1] + " ")