In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import warnings
import matplotlib.patches as mpatches

In [None]:
warnings.filterwarnings('ignore')
sns.set_style('darkgrid')

## 1. PREPARE INTERACTION MATRIX

### 1.1. read matrix

In [None]:
'''file_donorA = 'interaction_matrices/donorA_interaction_5lags_clr.csv'
file_donorB = 'interaction_matrices/donorB_interaction_5lags_clr.csv'
file_M = 'interaction_matrices/male_interaction_5lags_clr.csv'
file_F = 'interaction_matrices/female_interaction_5lags_clr.csv'''

file_donorA = 'interaction_matrices/donorA_interaction_lag2_clr.csv'
file_donorB = 'interaction_matrices/donorB_interaction_lag2_clr.csv'
file_M = 'interaction_matrices/male_interaction_lag2_clr.csv'
file_F = 'interaction_matrices/female_interaction_lag2_clr.csv'

In [None]:
def read_interaction_matrix(file, lag=5):
    
    '''read interaction matrix created
    by fitting a BigVAR model.'''
    
    interaction_matrix = pd.read_csv(file, index_col = [0])
    interaction_matrix = interaction_matrix.set_index(['otu'])

    #drop intercept
    interaction_matrix = interaction_matrix.drop(['V1'], axis = 1)
    #name columns
    names = [col + "_" + str(i) for i in range(1,lag+1) for col in interaction_matrix.index]
    interaction_matrix.columns = names
    interaction_matrix = interaction_matrix.astype(float)
    return interaction_matrix

In [None]:
donorA_M = read_interaction_matrix(file_donorA, 2)
donorB_M = read_interaction_matrix(file_donorB, 2)
male_M = read_interaction_matrix(file_M, 2)
female_M = read_interaction_matrix(file_F, 2)

### 1.2 change [n x n * lag] matrix into pairwise dataframe and drop interactions with coeff = 0

In [None]:
def make_pairwise_interactions(df, subject):
    
    '''create based on interactions matrix
    a df whith columns: 'coeff', 'lag', 
    'interactor','target']'''
    
    interactions_df = []
    for idx in df.index:
        target_df = df.loc[idx]

        try:
        
            interaction_lag_df = pd.DataFrame(target_df).reset_index()
            interaction_lag_df['target'] = target_df.name
            interaction_lag_df.columns = ['interactor', 'coeff', 'target']
            interaction_lag_df[['interactor','lag']] = interaction_lag_df['interactor'].str.split('_',expand=True)
            interactions_df.append(interaction_lag_df)
        except:
            pass
    interactions_df=pd.concat(interactions_df)
    #remove 0 coeffs
    interactions_df = interactions_df[interactions_df['coeff'] !=0]
    interactions_df['subject'] = subject
    interactions_df= interactions_df.reset_index().drop(['index'], axis=1)
    interactions_df['abs_coeff'] = np.abs(interactions_df['coeff'])
    
    return interactions_df

In [None]:
interaction_lag_df_donorA = make_pairwise_interactions(donorA_M, 'donorA')
interaction_lag_df_donorB = make_pairwise_interactions(donorB_M, 'donorB')
interaction_lag_df_male   = make_pairwise_interactions(male_M, 'male')
interaction_lag_df_female = make_pairwise_interactions(female_M, 'female')

### 1.3 remove weak interactions - .25 quantile

In [None]:
interactions_df = interaction_lag_df_donorA.append(interaction_lag_df_donorB).append(interaction_lag_df_male).append(interaction_lag_df_female)

#treshold
interactions_df['abs_coeff'] = np.abs(interactions_df['coeff'])
treshold = (interactions_df['abs_coeff']).quantile(.1)

In [None]:
def filter_interactions_above_treshold(interaction_df):
        
    filtered_df = interaction_df[(interaction_df['abs_coeff'] > treshold)].reset_index().drop(['index'], axis=1)
    filtered_df['n'] = filtered_df.index
    
    return filtered_df

In [None]:
filtered_interaction_lag_df_donorA = filter_interactions_above_treshold(interaction_lag_df_donorA)
filtered_interaction_lag_df_donorB = filter_interactions_above_treshold(interaction_lag_df_donorB)
filtered_interaction_lag_df_male   = filter_interactions_above_treshold(interaction_lag_df_male)
filtered_interaction_lag_df_female = filter_interactions_above_treshold(interaction_lag_df_female)

# 2. ANALYSE INTERACTIONS

In [None]:
INTERACTIONS_DF = filtered_interaction_lag_df_donorA.append(filtered_interaction_lag_df_donorB).append(filtered_interaction_lag_df_male).append(filtered_interaction_lag_df_female)

### 2.1 count number of interactions per subject

In [None]:
n_interactions_df = INTERACTIONS_DF.groupby(['subject']).size().reset_index()

kwargs={'alpha':.7}

plt.figure(figsize = [6, 3])
sns.barplot(x = n_interactions_df['subject'],
            y = n_interactions_df[0],
            ci=None,
            edgecolor = 'black',
            linestyle = '-.',
            palette = 'Set3',
            **kwargs
           )

plt.title('number of interactions among 4 subjects')
plt.tight_layout()
plt.ylabel('number of interactions among otus')
#plt.savefig('plots/number_of_interactions.png')

### 2.2 count number of interactions per subejct per lag

In [None]:
n_interactions_lag_df = INTERACTIONS_DF.groupby(['subject', 'lag']).size().reset_index()

plt.figure(figsize = [8, 3])
ax = sns.barplot(x = n_interactions_lag_df['subject'],
                 y = n_interactions_lag_df[0],
                hue = n_interactions_lag_df['lag'],
                ci=None,
                edgecolor = 'black',
                linestyle = '-.',
                palette = 'Set3',
                **kwargs
                )
ax.legend(bbox_to_anchor=(1.05, 1.0))
plt.title('number of interactions in each lag among 4 subjects')
plt.tight_layout()
plt.ylabel('number of interactions among otus')
#plt.savefig('plots/number_of_interactions_per_lag_per_subject.png')

### 2.3 plot strength of interactions per subject per lag

In [None]:
coeff_strength_df = pd.DataFrame(INTERACTIONS_DF.groupby(['subject', 'lag', 'abs_coeff']).size()).reset_index()#.unstack(fill_value=0)

plt.figure(figsize = [8, 3])
ax = sns.barplot(data = coeff_strength_df,
            x = 'subject',
            y = 'abs_coeff',
            hue = 'lag',
            palette="Set3", 
            linewidth=.6,
            edgecolor = 'black',
            linestyle="-.",
            ci=None,
            **kwargs)
ax.legend(bbox_to_anchor=(1.15, 1.0))
plt.title('strength of interactions in each lag among 4 subjects')
plt.tight_layout()
plt.ylabel('strength of interactions among otus')
#plt.savefig('plots/strength_of_interactions_per_lag_per_subject.png')

### 2.4 plot direction of interactions 

In [None]:
filtered_interaction_lag_df_donorA['coeff_sign'] = np.where(filtered_interaction_lag_df_donorA['coeff']<0, 'negative', 'positive')
filtered_interaction_lag_df_donorB['coeff_sign'] = np.where(filtered_interaction_lag_df_donorB['coeff']<0, 'negative', 'positive')
filtered_interaction_lag_df_male['coeff_sign'] = np.where(filtered_interaction_lag_df_male['coeff']<0, 'negative', 'positive')
filtered_interaction_lag_df_female['coeff_sign'] = np.where(filtered_interaction_lag_df_female['coeff']<0, 'negative', 'positive')

In [None]:
coefficient_direction_df=filtered_interaction_lag_df_donorA.append(filtered_interaction_lag_df_donorB).append(filtered_interaction_lag_df_male).append(filtered_interaction_lag_df_female)

In [None]:
coeff_dir_df = pd.DataFrame(coefficient_direction_df.groupby(['subject', 'lag', 'coeff_sign']).size()).reset_index()#.unstack(fill_value=0)

plt.figure(figsize = [8, 3])
ax = sns.boxplot(data = coeff_dir_df,
                 x = 'subject',
                 y = coeff_dir_df[0],
                 hue = 'coeff_sign',
                 palette="Set3", 
                 linewidth=.8,
                 width=.7)

ax.legend(bbox_to_anchor=(1.2, 1.0))
plt.title('interaction direction per subject')
plt.tight_layout()
plt.ylabel('number of interactions among otus')
#plt.savefig('plots/interactions_direction.png')

### 2.5 autocorrelation

### 2.5.1 autocorrelation strength

In [None]:
def find_autocorr(df):
    
    autoregression_df =[]
    for t in df['target'].unique():
        autoreg = df[(df['interactor'] == t) & (df['target'] == t)]
        autoregression_df.append(autoreg)
    autoregression_df = pd.concat(autoregression_df)
    
    return autoregression_df

In [None]:
coefficient_direction_df['idx'] = coefficient_direction_df.index
autoregression_df = find_autocorr(coefficient_direction_df)

In [None]:
autoregression_df_grouped = pd.DataFrame(autoregression_df.groupby(['subject', 'lag', 'coeff']).size()).reset_index()


plt.figure(figsize  = [10, 5])
ax=sns.swarmplot(data  = autoregression_df_grouped,
              x     = 'subject',
              y     = 'coeff',
              hue   = 'lag',
              dodge = True,
              alpha = .6,
              s     = 5,
              palette   = 'Set2',
              edgecolor = 'black',
              linewidth =.3
             )
plt.title('strength of autoregression in otus per subject index per lag')
plt.tight_layout()
plt.ylabel('autoregression coeff')
ax.legend(bbox_to_anchor=(1.1, 1.0))
plt.savefig('plots/autoregression_svar_lag2.png')

In [None]:
#find autoregressive interactions
auto_idx = autoregression_df['idx']
coefficient_direction_df['interaction'] = np.where(coefficient_direction_df['idx'].isin(auto_idx), 'autogressive', 'non-autogressive')

### plot scatterplot of autoregression vs other

In [None]:
df_autoreg = coefficient_direction_df[coefficient_direction_df['interaction'] == 'autogressive'].reset_index()
df_nonautoreg = coefficient_direction_df[coefficient_direction_df['interaction'] == 'non-autogressive'].reset_index()

In [None]:
def plot_autocorr_dist(coeff_df, subject):
    
    subject_df  = coeff_df[coeff_df['subject'] == subject]
    
    otu = []
    auto = []
    nonauto = []
    for t in subject_df['target'].unique():
        nonauto_mean = subject_df[(subject_df['target'] == t) 
                                              & (subject_df['interaction'] == 'non-autogressive')]['abs_coeff'].mean()
        auto_mean = subject_df[(subject_df['target'] == t) 
                                              & (subject_df['interaction'] == 'autogressive')]['abs_coeff'].mean()
        otu.append(t)
        auto.append(auto_mean)
        nonauto.append(nonauto_mean)

    df = pd.DataFrame(list(zip(otu, auto, nonauto)), columns = ['otu', 'auto', 'non-auto'])

    #lineplot
    plt.figure(figsize = [10, 3])
    kwargs = {'s':50, 'alpha':.6, 'edgecolor':'black'}

    sns.lineplot(x = df.index, y=df['auto'], color='black', linestyle='-.')
    sns.scatterplot(x = df.index, y=df['auto'], color='black', **kwargs)
    sns.lineplot(x = df.index, y=df['non-auto'], linewidth = .7)
    sns.scatterplot(x = df.index, y=df['non-auto'], **kwargs)
    
    black_patch = mpatches.Patch(color='black', label='auto-causality coeff')
    blue_patch = mpatches.Patch(color='lightblue', label='inter-causality coeff')
    plt.legend(handles=[black_patch, blue_patch])
    plt.title(subject)
    plt.savefig('plots/auto_vs_nonauto_{}_lineplot.png'.format(subject))
    plt.xlabel('otu')
    plt.show()

    #scatterplot
    kwargs = {'s':120, 'alpha':.6, 'edgecolor':'black'}
    plt.figure(figsize = [5, 5])

    sns.scatterplot(data = df, y = 'auto', x = 'non-auto', **kwargs)
    plt.xlim(0, 0.25)
    plt.ylim(0, 0.25)
    plt.title(subject)
    plt.savefig('plots/auto_vs_nonauto_{}_scatterplot.png'.format(subject))
    plt.show()


In [None]:
for subject in coefficient_direction_df['subject'].unique():
    
    plot_autocorr_dist(coefficient_direction_df, subject)

In [None]:
for subject in df_autoreg['subject'].unique():
    
    plt.figure(figsize = [10, 3])

    kwargs = {'s':120, 'alpha':.2, 'edgecolor':'white'}
    sns.scatterplot(data = df_autoreg[df_autoreg['subject'] == subject], x ='lag',  y= 'abs_coeff',  **kwargs)
    sns.scatterplot(data = df_nonautoreg[df_nonautoreg['subject'] == subject], x ='lag',  y= 'abs_coeff', color = 'orange', **kwargs)
    plt.title('auto and inter causation coefficients {}'.format(subject))
    plt.savefig('plots/lag1_auto_inter_causality_{}.png'.format(subject))
    plt.show()

### 2.5.2 autocorrelation index

In [None]:
def calculate_autoregressive_idx(df, subject):
    
    df = df[df['subject'] == subject]

    AUTO_IDX=[]
    COL = []
    for target in df['target'].unique():

        target_df = df[df['target'] == target]
        divider = target_df['abs_coeff'].sum()
        auto = target_df[target_df['interaction'] == 'autogressive']['abs_coeff'].sum()

        autoregressive_idx = auto/divider
        AUTO_IDX.append(autoregressive_idx)
        COL.append(target)

    autoreg_idx_df = pd.DataFrame(list(zip(COL, AUTO_IDX)), columns = ['target', 'autoregression_idx'])
    autoreg_idx_df['subject'] = subject
    
    return autoreg_idx_df

In [None]:
auto_idx_donorA = calculate_autoregressive_idx(coefficient_direction_df, 'donorA')
auto_idx_donorB = calculate_autoregressive_idx(coefficient_direction_df, 'donorB')
auto_idx_male   = calculate_autoregressive_idx(coefficient_direction_df, 'male')
auto_idx_female = calculate_autoregressive_idx(coefficient_direction_df, 'female')

auto_idx_df     = auto_idx_donorA.append(auto_idx_donorB).append(auto_idx_male).append(auto_idx_female)

In [None]:
plt.figure(figsize = [10, 5])
sns.swarmplot(data=auto_idx_df,
              x= 'subject',
              y='autoregression_idx',
              dodge=True,
              alpha = .6,
              s = 7,
              palette = 'Set2',
              edgecolor = 'black',
              linewidth=.3
             )
plt.title('autoregressive index per otu')
plt.tight_layout()
plt.savefig('plots/autoregression_per_otu_lag1.png')

In [None]:
# autocorrelation on sVAR lag 1 i lag 2 jak nasz model rózni sie od zmodyfikowanego naiva (naiwna regresja). czy reszta mikrobiomu jest kluczowa czy jest dodatkiem.