# Objective

The objective of this notebook is to analyze the 3 types of graph (FA, GM, RS) separately by applying node metrics with different thresholds from 0 to 0.95 in steps of 0.05 to then analyze with which treshold the graph is capable of giving us more information regarding their MS state

In [1]:
#IMPORTS
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import copy
import pickle
from scipy.stats import t
import seaborn as sns
from scipy import stats
import inspect
import pickle
import warnings
warnings.filterwarnings('ignore')

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas

# LOADS

In [2]:
def load():
    """"
    load loads all the providen data

    :return clinic: pandas DataFrame with the information in 'clinic.csv'
    :return adj_mat: a list with all ajacency matrices loaded in numpy structures
    """
    FAv1=np.load('FAs_v1-corrected.npy')
    FAv2=np.load('FAs_v2-corrected.npy')
    GMv1C=np.load('GMs_v1-no-thr_corrected.npy')
    GMv1=np.load('GMs_v1-no-thr_no-corrected.npy')
    GMv2C=np.load('GMs_v2-thr_corrected.npy')
    GMv2=np.load('GMs_v2-thr_no-corrected.npy')
    RS=np.load('RSs_corrected_abs.npy')
    RSnA=np.load('RSs_corrected_no-abs.npy')
    RSnC=np.load('RSs_no-corrected_abs.npy')
    RSnCnA=np.load('RSs_no-corrected_no-abs.npy')
    clinic=pd.read_csv('clinic.csv')
    adj_mat=[FAv1,FAv2,GMv1C,GMv1,GMv2C,GMv2,RS,RSnA,RSnC,RSnCnA]
    return clinic, adj_mat

In [3]:
adj_mat_str=['FAv1','FAv2','GMv1C','GMv1','GMv2C','GMv2','RS','RSnA','RSnC','RSnCnA']
nodes=pd.read_csv('noms_nodes.csv')

# Graph Creation

In [4]:
# The functions create the graphs by adjacency matrix with the specifyed treshold
def clinicPandaFA(clinic, adj_mat, adj_mat_str, thrFA):
    """"
    clinicPandaFA create the graphs of the corrected FA adjacency matrix with the specifyed treshold

    :clinic: pandas DataFrame with patients + volunteers data
    :adj_mat: a list with all ajacency matrices loaded in numpy structures
    :adj_mat_str: a list of strings with all ajacency matrices names
    :thrFA: minimum threshold for edges weights in the graph
    :return clinic: pandas DataFrame with the created FA graphs appended
    
    """
    for matrix in range(10):
        if matrix==1:
            graphs=[]
            for k in range(165):
                for i in range(76):
                    for j in range(76):
                        if adj_mat[matrix][k][i][j]<thrFA:
                            adj_mat[matrix][k][i][j]=0.

                graphs.append(nx.Graph(adj_mat[matrix][k])) 

            clinic[adj_mat_str[matrix]]=copy.deepcopy(graphs)
    return clinic
def clinicPandaGM(clinic, adj_mat, adj_mat_str, thrGM):
    """"
    clinicPandaGM create the graphs of the corrected GM adjacency matrix with the specifyed treshold

    :clinic: pandas DataFrame with patients + volunteers data
    :adj_mat: a list with all ajacency matrices loaded in numpy structures
    :adj_mat_str: a list of strings with all ajacency matrices names
    :thrGM: minimum threshold for edges weights in the graph
    :return clinic: pandas DataFrame with the created GM graphs appended
    
    """
    for matrix in range(10):
        if matrix==4:
            graphs=[]
            for k in range(165):
                for i in range(76):
                    for j in range(76):
                        if adj_mat[matrix][k][i][j]<thrGM:
                            adj_mat[matrix][k][i][j]=0.

                graphs.append(nx.Graph(adj_mat[matrix][k]))
            clinic[adj_mat_str[matrix]]=copy.deepcopy(graphs)

    return clinic
def clinicPandaRS(clinic, adj_mat, adj_mat_str, thrRS):
    """"
    clinicPandaRS create the graphs of the corrected RS adjacency matrix with the specifyed treshold

    :clinic: pandas DataFrame with patients + volunteers data
    :adj_mat: a list with all ajacency matrices loaded in numpy structures
    :adj_mat_str: a list of strings with all ajacency matrices names
    :thrFA: minimum threshold for edges weights in the graph
    :return clinic: pandas DataFrame with the created RS graphs appended
    
    """
    for matrix in range(10):
        if matrix==6:
            graphs=[]
            for k in range(165):
                for i in range(76):
                    for j in range(76):
                        if adj_mat[matrix][k][i][j]<thrRS:
                            adj_mat[matrix][k][i][j]=0.

                graphs.append(nx.Graph(adj_mat[matrix][k]))
            clinic[adj_mat_str[matrix]]=copy.deepcopy(graphs)

    return clinic

# Node Strength Implementation

In [5]:
#NetworkX does not have Node Strength metric
def node_strength(G, node):
    """"
    node_strength computes node strength node metric 

    :G: NetworkX graph object
    :node: node index of the graph
    :return total: result of the metric computation
    
    """
    total = 0
    for i in G.edges(node):
        total += G.get_edge_data(*i)['weight']
    return total

# Metrics Execution

In [6]:
#Calculates the metrics, applying student's t-test and saving passed t-tests according to p-values
def metricsCalculation(clinic,matrix, metric, pvalue, p_passed, p_passed_all,p_passed_partial):
    """"
    metricsCalculation Calculates the metrics, applying student's t-test and saving passed t-tests
    according to p-values

    :clinic: pandasDataframe with patients + volunteers data + graph objects
    :matrix: String with the matrix type name
    :metrics: metric to compute
    :pvalue: pvalue to pass student's t-test
    :p_passed: empty list
    :p_passed_all: empty list
    :p_passed_partial: empty list
    :return p_passed: list to save all the passed student's t-test
    :return p_passed_all: list to save cases where all the student's t-test in a node have been passed
    :return p_passed_partial: list to save cases where more than one student's t-test in a node have been passed
    
    """
    #Lists to save metric results
    sans=[[] for i in range(76)]
    malalts0=[[] for i in range(76)]
    malalts1=[[] for i in range(76)]
    malalts2=[[] for i in range(76)]
    
    #Node strength case is different
    if metric=='node_strength':
        #165 cases
        for j in range(165):
            #76 brain parts
            for i in range(76):
                #metric calculation separeted in MS groups
                strenght=node_strength(clinic[matrix].iloc[j], i)
                if clinic['mstype'].iloc[j]==-1:
                    sans[i].append(strenght)
                elif clinic['mstype'].iloc[j]==0:
                    malalts0[i].append(strenght)
                elif clinic['mstype'].iloc[j]==1:
                    malalts1[i].append(strenght)
                elif clinic['mstype'].iloc[j]==2:
                    malalts2[i].append(strenght)
        
        nodeCar = copy.deepcopy(nodes)
        
        
        for i in range(76):
            nodeName=nodeCar.iloc[i]['region_name']
            #Student's t-test for all pair of groups
            s0=stats.ttest_ind(sans[i], malalts0[i])[1]
            s1=stats.ttest_ind(sans[i], malalts1[i])[1]
            s2=stats.ttest_ind(sans[i], malalts2[i])[1]
            m01=stats.ttest_ind(malalts0[i], malalts1[i])[1]
            m02=stats.ttest_ind(malalts0[i], malalts2[i])[1]
            m12=stats.ttest_ind(malalts1[i], malalts2[i])[1]

            stats_iteration=[s0, s1, s2, m01, m02, m12]
            
            #Avoid nans
            for p in range(6):
                if np.isnan(stats_iteration[p]):
                    stats_iteration[p]=1
            count=0
            aux=0
            for s in stats_iteration:
                #If passed student's t-test according to p-value
                if s<pvalue:
                    p_passed.append([copy.copy(matrix),nodeName, 'node_strength', aux,s])
                    count+=1
                aux+=1
            #If all the test passed in a node
            if count==6:
                p_passed_all.append([copy.copy(matrix),nodeName,'node_strength',copy.copy(stats_iteration)])
            
            #If more than one test passed in a node
            if count>1:
                p_passed_partial.append([copy.copy(matrix),nodeName,'node_strength',copy.copy(stats_iteration)])


    else:
        #165 cases
        for j in range(165): 
            #metric calculation separeted in MS groups
            dicti=metric(clinic[matrix].iloc[j])
            if clinic['mstype'].iloc[j]==-1:
                #76 brain parts
                for i in range(76):
                    sans[i].append(dicti[i])
            elif clinic['mstype'].iloc[j]==0:
                for i in range(76):
                    malalts0[i].append(dicti[i])
            elif clinic['mstype'].iloc[j]==1:
                for i in range(76):
                    malalts1[i].append(dicti[i])
            elif clinic['mstype'].iloc[j]==2:
                for i in range(76):
                    malalts2[i].append(dicti[i])
        
            nodeCar = copy.deepcopy(nodes)
            for i in range(76):
                nodeName=nodeCar.iloc[i]['region_name']
                #Student's t-test for all pair of groups
                s0=stats.ttest_ind(sans[i], malalts0[i])[1]
                s1=stats.ttest_ind(sans[i], malalts1[i])[1]
                s2=stats.ttest_ind(sans[i], malalts2[i])[1]
                m01=stats.ttest_ind(malalts0[i], malalts1[i])[1]
                m02=stats.ttest_ind(malalts0[i], malalts2[i])[1]
                m12=stats.ttest_ind(malalts1[i], malalts2[i])[1]

                stats_iteration=[s0, s1, s2, m01, m02, m12]

                #Avoid nans
                for p in range(6):
                    if np.isnan(stats_iteration[p]):
                        stats_iteration[p]=1

                count=0
                aux=0
                for s in stats_iteration:
                    #If passed student's t-test according to p-value
                    if s<pvalue:
                        p_passed.append([copy.copy(matrix),nodeName, copy.copy(str(metric).split(' ')[1]), aux,s])
                        count+=1
                    aux+=1
                #If all the test passed in a node
                if count==6:
                    p_passed_all.append([copy.copy(matrix),nodeName,copy.copy(str(metric).split(' ')[1]),copy.copy(stats_iteration)])
                #If more than one test passed in a node
                if count>1:
                    p_passed_partial.append([copy.copy(matrix),nodeName,copy.copy(str(metric).split(' ')[1]),copy.copy(stats_iteration)])
                    
    return p_passed, p_passed_all,p_passed_partial

In [7]:
def mainFA(thrFA, metrics,  pvalue):
    """"
    mainFA executes all the process for graph analysis for matrix FA in a concrete treshold

    :thrFA: float in a range[0-0.95] that mark the minimum weight of a edge
    :metrics: list of all the metrics  to evaluate
    :pvalue: pvalue to pass student's t-test
    :return p_passed: list to save all the passed student's t-test
    :return p_passed_all: list to save cases where all the student's t-test in a node have been passed
    :return p_passed_partial: list to save cases where more than one student's t-test in a node have been passed
    
    """
    clinic, adj_mat=load()
    p_passed=[]
    p_passed_all=[]
    p_passed_partial=[]
    clinic=clinicPandaFA(clinic, adj_mat, adj_mat_str, thrFA)
    for metric in metrics:
        print(str(metric))
        metricsCalculation(clinic, 'FAv2', metric,pvalue,p_passed, p_passed_all,p_passed_partial)

    return(p_passed, p_passed_all, p_passed_partial)

def mainGM(thrGM, metrics, pvalue):
    """"
    mainGM executes all the process for graph analysis for matrix GM in a concrete treshold

    :thrGM: float in a range[0-0.95] that mark the minimum weight of a edge
    :metrics: list of all the metrics  to evaluate
    :pvalue: pvalue to pass student's t-test
    :return p_passed: list to save all the passed student's t-test
    :return p_passed_all: list to save cases where all the student's t-test in a node have been passed
    :return p_passed_partial: list to save cases where more than one student's t-test in a node have been passed
    
    """
    clinic, adj_mat=load()
    p_passed=[]
    p_passed_all=[]
    p_passed_partial=[]
    clinic=clinicPandaGM(clinic, adj_mat, adj_mat_str,thrGM)
    print('GM')
    for metric in metrics:
        print(str(metric))
        metricsCalculation(clinic, 'GMv2C', metric,pvalue,p_passed, p_passed_all,p_passed_partial)

    return(p_passed, p_passed_all, p_passed_partial)
def mainRS(thrRS, metrics, pvalue):
    """"
    mainRS executes all the process for graph analysis for matrix RS in a concrete treshold

    :thrRS: float in a range[0-0.95] that mark the minimum weight of a edge
    :metrics: list of all the metrics  to evaluate
    :pvalue: pvalue to pass student's t-test
    :return p_passed: list to save all the passed student's t-test
    :return p_passed_all: list to save cases where all the student's t-test in a node have been passed
    :return p_passed_partial: list to save cases where more than one student's t-test in a node have been passed
    
    """
    clinic, adj_mat=load()
    p_passed=[]
    p_passed_all=[]
    p_passed_partial=[]
    clinic=clinicPandaRS(clinic, adj_mat, adj_mat_str,thrRS )
    print('RS')
    for metric in metrics:
        print(str(metric))
        metricsCalculation(clinic, 'RS', metric,pvalue,p_passed, p_passed_all,p_passed_partial)   
    return(p_passed, p_passed_all, p_passed_partial)

# Function calls

In [8]:
#Execution of graph analysis for matrix FA with 20 thresholds with 3 different p-value
pv=[0.05,0.01,0.001]
metrics=['node_strength',nx.degree_centrality,nx.closeness_centrality,nx.betweenness_centrality,nx.clustering,nx.square_clustering,nx.pagerank]
resultatsFA=[]
for i in range(3):
    resultatsFA.append([pv[i]])
    for thr in range(0, 20):
        pssd,psdd_all,psdd_partial=mainFA(thr/20,metrics,pv[i])
        resultatsFA[i].append([copy.deepcopy(pssd),copy.deepcopy(psdd_all),copy.deepcopy(psdd_partial)])
        
#Save results        
with open("resultatsFA.pkl", "wb") as fp:   #Pickling
    pickle.dump(resultatsFA, fp)

node_strength
<function degree_centrality at 0x7f79f0a2b820>
<function closeness_centrality at 0x7f79f0a1d940>
<function betweenness_centrality at 0x7f79f0a15c10>
<function clustering at 0x7f79f09e71f0>
<function square_clustering at 0x7f79f09e7310>
<function pagerank at 0x7f79f09c3700>
node_strength
<function degree_centrality at 0x7f79f0a2b820>
<function closeness_centrality at 0x7f79f0a1d940>
<function betweenness_centrality at 0x7f79f0a15c10>
<function clustering at 0x7f79f09e71f0>
<function square_clustering at 0x7f79f09e7310>
<function pagerank at 0x7f79f09c3700>
node_strength
<function degree_centrality at 0x7f79f0a2b820>
<function closeness_centrality at 0x7f79f0a1d940>
<function betweenness_centrality at 0x7f79f0a15c10>
<function clustering at 0x7f79f09e71f0>
<function square_clustering at 0x7f79f09e7310>
<function pagerank at 0x7f79f09c3700>
node_strength
<function degree_centrality at 0x7f79f0a2b820>
<function closeness_centrality at 0x7f79f0a1d940>
<function betweenness_cen

In [9]:
#Execution of graph analysis for matrix GM with 20 thresholds with 3 different p-value
pv=[0.05,0.01,0.001]
metrics=['node_strength',nx.degree_centrality,nx.closeness_centrality,nx.betweenness_centrality,nx.clustering,nx.square_clustering,nx.pagerank]
resultatsGM=[]
for i in range(3):
    resultatsGM.append([pv[i]])
    for thr in range(0, 20):
        pssd,psdd_all,psdd_partial=mainGM(thr/20,metrics, pv[i])
        resultatsGM[i].append([copy.deepcopy(pssd),copy.deepcopy(psdd_all),copy.deepcopy(psdd_partial)])
        
#Save Results        
with open("resultatsGM.pkl", "wb") as fp:   #Pickling
    pickle.dump(resultatsGM, fp)

GM
node_strength
<function degree_centrality at 0x7f79f0a2b820>
<function closeness_centrality at 0x7f79f0a1d940>
<function betweenness_centrality at 0x7f79f0a15c10>
<function clustering at 0x7f79f09e71f0>
<function square_clustering at 0x7f79f09e7310>
<function pagerank at 0x7f79f09c3700>
GM
node_strength
<function degree_centrality at 0x7f79f0a2b820>
<function closeness_centrality at 0x7f79f0a1d940>
<function betweenness_centrality at 0x7f79f0a15c10>
<function clustering at 0x7f79f09e71f0>
<function square_clustering at 0x7f79f09e7310>
<function pagerank at 0x7f79f09c3700>
GM
node_strength
<function degree_centrality at 0x7f79f0a2b820>
<function closeness_centrality at 0x7f79f0a1d940>
<function betweenness_centrality at 0x7f79f0a15c10>
<function clustering at 0x7f79f09e71f0>
<function square_clustering at 0x7f79f09e7310>
<function pagerank at 0x7f79f09c3700>
GM
node_strength
<function degree_centrality at 0x7f79f0a2b820>
<function closeness_centrality at 0x7f79f0a1d940>
<function bet

In [10]:
#Execution of graph analysis for matrix RS with 20 thresholds with 3 different p-value
pv=[0.05,0.01,0.001]
metrics=['node_strength',nx.degree_centrality,nx.closeness_centrality,nx.betweenness_centrality,nx.clustering,nx.square_clustering,nx.pagerank]
resultatsRS=[]
for i in range(3):
    resultatsRS.append([pv[i]])
    for thr in range(0, 20):
        pssd,psdd_all,psdd_partial=mainRS(thr/20,metrics,pv[i])
        resultatsRS[i].append([copy.deepcopy(pssd),copy.deepcopy(psdd_all),copy.deepcopy(psdd_partial)])
        
#Save results
with open("resultatsRS.pkl", "wb") as fp:   #Pickling
    pickle.dump(resultatsRS, fp)

RS
node_strength
<function degree_centrality at 0x7f79f0a2b820>
<function closeness_centrality at 0x7f79f0a1d940>
<function betweenness_centrality at 0x7f79f0a15c10>
<function clustering at 0x7f79f09e71f0>
<function square_clustering at 0x7f79f09e7310>
<function pagerank at 0x7f79f09c3700>
RS
node_strength
<function degree_centrality at 0x7f79f0a2b820>
<function closeness_centrality at 0x7f79f0a1d940>
<function betweenness_centrality at 0x7f79f0a15c10>
<function clustering at 0x7f79f09e71f0>
<function square_clustering at 0x7f79f09e7310>
<function pagerank at 0x7f79f09c3700>
RS
node_strength
<function degree_centrality at 0x7f79f0a2b820>
<function closeness_centrality at 0x7f79f0a1d940>
<function betweenness_centrality at 0x7f79f0a15c10>
<function clustering at 0x7f79f09e71f0>
<function square_clustering at 0x7f79f09e7310>
<function pagerank at 0x7f79f09c3700>
RS
node_strength
<function degree_centrality at 0x7f79f0a2b820>
<function closeness_centrality at 0x7f79f0a1d940>
<function bet