In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from adjustText import adjust_text
import numpy as np
from scipy import stats
import scipy

In [2]:
colors = ["#66CDAA","#4682B4","#AB63FA","#FFA15A"]
group_num = "group_5"
df_main = pd.read_excel("/Users/yilewang/workspaces/data4project/prediction_project/Multigroups.xlsx",sheet_name=group_num)

### Workflow

1. take the z-score
2. identify cases with z-score > 1.6
3. get the frequency of being an outlier in all features
4. get the final list

In [3]:
df_zscore = pd.read_excel("/Users/yilewang/workspaces/data4project/prediction_project/prediction_data_zscores.xlsx")
# for each variable, find the one larger than 1.6.
df_zscore_hyper = df_zscore.copy()
upper_bound = 1.6 # 90 percentile
# # ignition
# for i in range(2, 18):
#     df_zscore_hyper[df_zscore_hyper.columns[i]] = df_zscore[df_zscore.columns[i]].apply(lambda x: 1 if x > upper_bound else 0)
# TVB parameters and simulation
for i in range(18, 34):
    df_zscore_hyper[df_zscore_hyper.columns[i]] = df_zscore[df_zscore.columns[i]].apply(lambda x: 1 if x > upper_bound else 0)
# Lateralization Index
for i in range(34, 38):
    df_zscore_hyper[df_zscore_hyper.columns[i]] = df_zscore[df_zscore.columns[i]].apply(lambda x: 1 if x < -1*upper_bound else 0)
# # structural
# for i in range(38, 64):
#     df_zscore_hyper[df_zscore_hyper.columns[i]] = df_zscore[df_zscore.columns[i]].apply(lambda x: 1 if x > upper_bound else 0)


In [5]:
df_zscore_hyper["rest_sum"] = df_zscore_hyper.iloc[:,18:38].sum(axis=1)
df_zscore_hyper['subgroups'] = df_zscore_hyper['rest_sum']
df_zscore_hyper[df_zscore_hyper['group'] == "MCI"][['caseid', 'subgroups']]

Unnamed: 0,caseid,subgroups
26,0578A,0
27,0720A,3
28,0743A,0
29,1070A,0
30,1428A,3
31,1669A,3
32,1775A,3
33,2019A,1
34,2050A,1
35,2166A,0


In [6]:
df_zscore_hyper[df_zscore_hyper['group'] == "NC"][['caseid', 'subgroups']]

Unnamed: 0,caseid,subgroups
10,0316A,0
11,0318A,2
12,0319A,0
13,0423A,0
14,0556A,0
15,0587A,1
16,0591A,3
17,0705A,0
18,0795A,0
19,0819A,1


### get the list of predicted converters in MCI

In [9]:
predicted_converters_MCI = ['0720A',
'1669A',
'2166A',
'3117A',
'4628A',
'5914A',
'6516A',
'7873A',
'2019A',
'2050A',
'4216A']

In [10]:
def get_overlap(pd_df, predicted_converters= predicted_converters_MCI):
    # get the overlap between predicted converters and the subgroups
    for case in pd_df.caseid:
        if case in predicted_converters:
            print(case)

In [12]:
high_hyperexcitability = df_zscore_hyper[df_zscore_hyper['group'] == 'MCI'][df_zscore_hyper['subgroups'] > 0][['caseid', 'subgroups']]
print(high_hyperexcitability)
get_overlap(high_hyperexcitability)

   caseid  subgroups
27  0720A          3
30  1428A          3
31  1669A          3
32  1775A          3
33  2019A          1
34  2050A          1
36  2385A          3
37  2391A          1
45  4602A          1
46  4628A          2
49  5781A          2
51  6516A          1
54  7827A          1
55  7841A          1
60  9270A          2
0720A
1669A
2019A
2050A
4628A
6516A


  high_hyperexcitability = df_zscore_hyper[df_zscore_hyper['group'] == 'MCI'][df_zscore_hyper['subgroups'] > 0][['caseid', 'subgroups']]


### Summary

Metrics: 
- Gc
- Go
- G_max
- G_max - Gc
- Gamma_Freq
- Theta_Freq
- Gamma_Amp
- Theta_Amp
- Delay
- LI (Lateralization Index)

If groups = 5, then
- SNC
- NC
- MCI-converter
- MCI-non_converter
- AD

If groups = 6, then
- SNC
- NC
- MCI-converter_hypoexcitability
- MCI-converter_hyperexcitability
- MCI-non_converter
- AD

If groups = 7, then
- SNC
- NC-converter
- NC-non-converter
- MCI-converter_hypoexcitability
- MCI-converter_hyperexcitability
- MCI-non_converter
- AD

In [29]:
### If group = 5

# get the list of caseid in high_hyperexcitability or caseid in predicted_converters_MCI
MCI_converters = np.unique(np.concatenate((high_hyperexcitability['caseid'], predicted_converters_MCI)))
# create a pd dataframe for MCI_converters
MCI_converters = pd.DataFrame(MCI_converters, columns = ["caseid"])
MCI_converters['group'] = "MCI_converters"

# get the list of caseid in MCI excluding the group_5
_MCI = df_zscore_hyper[df_zscore_hyper['group'] == 'MCI'][['caseid', 'subgroups']]
MCI_nonconverters = _MCI[~_MCI['caseid'].isin(MCI_converters["caseid"])]

MCI_nonconverters['group'] = "MCI_nonconverters"
MCI_nonconverters = MCI_nonconverters[['caseid', 'group']]

# put them together
AD = df_zscore_hyper[df_zscore_hyper['group'] == 'AD'][['caseid', 'group']][:-1]
NC = df_zscore_hyper[df_zscore_hyper['group'] == 'NC'][['caseid', 'group']]
SNC = df_zscore_hyper[df_zscore_hyper['group'] == 'SNC'][['caseid', 'group']]

# concatenate them
df_group5 = pd.concat([SNC, NC, MCI_converters, MCI_nonconverters, AD], ignore_index=True)

print(df_group5)

   caseid group
0   2820A   SNC
1   3168A   SNC
2   3358A   SNC
3   3610A   SNC
4   4073A   SNC
..    ...   ...
69  3255A    AD
70  4612A    AD
71  5368A    AD
72  5571A    AD
73  7673A    AD

[74 rows x 2 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  MCI_nonconverters['group'] = "MCI_nonconverters"


In [34]:

### If group = 6

# get the list in high_hyperexcitability excluding the predicted converters
MCI_hyperexcitability = high_hyperexcitability[~high_hyperexcitability['caseid'].isin(predicted_converters_MCI)]
MCI_hyperexcitability['group'] = "MCI_hyperexcitability"
MCI_hyperexcitability = MCI_hyperexcitability[['caseid', 'group']]

# get the hypoexcitability
MCI_hypoexcitability = pd.DataFrame({"caseid": predicted_converters_MCI, "group": "MCI_hypoexcitability"})

# get the list of caseid in MCI excluding the MCI_hyperexcitability and MCI_hypoexcitability
_MCI = df_zscore_hyper[df_zscore_hyper['group'] == 'MCI'][['caseid', 'subgroups']]
MCI_nonconverters = _MCI[~_MCI['caseid'].isin(MCI_hyperexcitability['caseid'])]
MCI_nonconverters = MCI_nonconverters[~MCI_nonconverters['caseid'].isin(MCI_hypoexcitability['caseid'])]
MCI_nonconverters['group'] = "MCI_nonconverters"
MCI_nonconverters = MCI_nonconverters[['caseid', 'group']]

# put them together
AD = df_zscore_hyper[df_zscore_hyper['group'] == 'AD'][['caseid', 'group']][:-1]
NC = df_zscore_hyper[df_zscore_hyper['group'] == 'NC'][['caseid', 'group']]
SNC = df_zscore_hyper[df_zscore_hyper['group'] == 'SNC'][['caseid', 'group']]

# concatenate them
df_group6 = pd.concat([SNC, NC, MCI_hyperexcitability, MCI_hypoexcitability, MCI_nonconverters, AD], ignore_index=True)

print(df_group6)


   caseid group
0   2820A   SNC
1   3168A   SNC
2   3358A   SNC
3   3610A   SNC
4   4073A   SNC
..    ...   ...
69  3255A    AD
70  4612A    AD
71  5368A    AD
72  5571A    AD
73  7673A    AD

[74 rows x 2 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  MCI_hyperexcitability['group'] = "MCI_hyperexcitability"


In [42]:
### If group = 7

NC_hyperexcitability = df_zscore_hyper[df_zscore_hyper['group'] == 'NC'][df_zscore_hyper['subgroups'] > 0][['caseid', 'subgroups']]
NC_hyperexcitability['group'] = "NC_hyperexcitability"

# get the list of caseid in NC excluding the high_hyperexcitability_NC
_NC = df_zscore_hyper[df_zscore_hyper['group'] == 'NC'][['caseid', 'subgroups']]
NC_nonconverters = _NC[~_NC['caseid'].isin(NC_hyperexcitability['caseid'])]
NC_nonconverters['group'] = "NC_nonconverters"
NC_nonconverters = NC_nonconverters[['caseid', 'group']]

# get the list in high_hyperexcitability excluding the predicted converters
MCI_hyperexcitability = high_hyperexcitability[~high_hyperexcitability['caseid'].isin(predicted_converters_MCI)]
MCI_hyperexcitability['group'] = "MCI_hyperexcitability"
MCI_hyperexcitability = MCI_hyperexcitability[['caseid', 'group']]

# get the hypoexcitability
MCI_hypoexcitability = pd.DataFrame({"caseid": predicted_converters_MCI, "group": "MCI_hypoexcitability"})

# get the list of caseid in MCI excluding the MCI_hyperexcitability and MCI_hypoexcitability
_MCI = df_zscore_hyper[df_zscore_hyper['group'] == 'MCI'][['caseid', 'subgroups']]
MCI_nonconverters = _MCI[~_MCI['caseid'].isin(MCI_hyperexcitability['caseid'])]
MCI_nonconverters = MCI_nonconverters[~MCI_nonconverters['caseid'].isin(MCI_hypoexcitability['caseid'])]
MCI_nonconverters['group'] = "MCI_nonconverters"
MCI_nonconverters = MCI_nonconverters[['caseid', 'group']]

# put them together
AD = df_zscore_hyper[df_zscore_hyper['group'] == 'AD'][['caseid', 'group']][:-1]
SNC = df_zscore_hyper[df_zscore_hyper['group'] == 'SNC'][['caseid', 'group']]

# concatenate them
df_group7 = pd.concat([SNC, NC_nonconverters, NC_hyperexcitability, MCI_hyperexcitability, MCI_hypoexcitability, MCI_nonconverters, AD], ignore_index=True)
df_group7 = df_group7[['caseid', 'group']]
print(df_group7)

   caseid group
0   2820A   SNC
1   3168A   SNC
2   3358A   SNC
3   3610A   SNC
4   4073A   SNC
..    ...   ...
69  3255A    AD
70  4612A    AD
71  5368A    AD
72  5571A    AD
73  7673A    AD

[74 rows x 2 columns]


  NC_hyperexcitability = df_zscore_hyper[df_zscore_hyper['group'] == 'NC'][df_zscore_hyper['subgroups'] > 0][['caseid', 'subgroups']]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  NC_nonconverters['group'] = "NC_nonconverters"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  MCI_hyperexcitability['group'] = "MCI_hyperexcitability"
