## Faith Phylogenetic Diversity plots

In [2]:
# Import Python packages
import pandas as pd
import numpy as np
import biom
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import cycle
import os
from matplotlib.colors import ListedColormap
from matplotlib.colors import to_rgba

In [8]:
# Define the taxa level
taxa_level = 'Genus'

In [7]:
# Load the metadata
metadata_path = '../Data/Metadata/updated_clean_ant_skin_metadata.tab'
metadata = pd.read_csv(metadata_path, sep='\t')
metadata['case_type'].value_counts()

case_type
case-nonlesional skin       111
case-anterior nares         108
case-lesional skin          107
control-anterior nares       89
control-nonlesional skin     87
Name: count, dtype: int64

In [9]:
# Define paths to the collapsed taxa tables
biom_paths = {
    '16S_V4': '../Data/Tables/Relative_Abundance_Tables/df_16S_filtered_feature_table_rare_Genus_relative_abundance.biom'
}

In [10]:
# Predefined color palette for specific families
if taxa_level == "Genus":
    taxa_colors = {
        ' g__Cutibacterium': '#ffa505',  # Bright orange
        ' g__Staphylococcus': '#92f0f0',      # Fluorescent light blue
        ' g__Streptococcus': '#FF0000',    # Red
        ' g__Corynebacterium': '#ffe59a',        # Pastel yellow
        ' g__Lawsonella': '#70a8dc',         # Light blue
        ' g__Veillonella': '#c5bce0',         # Pastel purplish
        ' g__Micrococcus':'#f4cccd',           # Pastel yellow
        ' g__Alloprevotella': '#bcbcbc',        # Light gray
        ' g__Lactobacillus': '#daead3',     # Pale mint green
        ' g__Neisseria': '#f6475f',         # Redish pink
        'Others': '#ededed'                 # White
    }

In [11]:
# Additional Python packages for alpha diversity analyses
from skbio.diversity import alpha_diversity
from scipy.stats import mannwhitneyu

In [13]:
def darken_color(color, amount=0.3):
    """
    Darken a given color by a certain amount.
    
    Parameters:
    - color: The base color (as a hex string or color name).
    - amount: The amount to darken the color by (default: 0.3).
    
    Returns:
    - A darkened color as a hex string.
    """
    c = to_rgba(color)
    return (c[0] * (1 - amount), c[1] * (1 - amount), c[2] * (1 - amount), c[3])

In [19]:
def plot_faith_pd_histo(biom_path, metadata_path, group_col, title_suffix):
    # Load metadata as a DataFrame from the file path
    metadata = pd.read_csv(metadata_path, sep='\t')
    metadata = metadata.set_index('#sample-id')
    
    # Load biom table
    feature_table = pd.read_csv("~/16S_AD_South-Africa/Data/Tables/Faith_PD Tables/vector.tsv", sep="\t")
    
    # set the sample ID of feature_table to be the index
    feature_table = feature_table.set_index(feature_table.columns[0])
    
    # Merge'Faith_PD values with metadata
    common_samples = metadata.index.intersection(feature_table.index)
    metadata = metadata.loc[common_samples].copy()
    metadata['Faith_PD'] = feature_table.loc[common_samples]
    
    # Set the custom order for the groups: Healthy first, then Acne Non-lesional, then Acne Lesional
    desired_order = ['control-nonlesional skin','case-nonlesional skin', 'case-lesional skin', 'control-anterior nares', 'case-anterior nares']

    # Set the color palette for the groups in the correct order
    palette = {
        'control-nonlesional skin': '#3333B3',     # Dark Blue color for Healthy
        'case-nonlesional skin': '#5cbccb',     # Blue color for AD Non-Lesional
        'case-lesional skin': '#f16c52',       # Red color for AD Lesional
        'control-anterior nares': '#008000',   # Green for healthy Nasal
        'case-anterior nares': '#FFC0CB'        # Pink for AD Nasal
    }

    # Create a new column for severity category based on local lesion severity
    # metadata['severity_category'] = pd.cut(metadata['age_months'],
    #                                     bins=[0, 2, 4, 6],  # Low (1-2), Moderate (3-4), High (5-6)
    #                                     labels=['low', 'moderate', 'high'])

    # Define a custom palette for severity categories within Acne_L group
    # severity_palette = {
    #     'low': '#F1948A',      # Light red for low severity
    #     'moderate': '#EC7063',  # Red for moderate severity
    #     'high': '#C0392B'       # Dark red for high severity
    # }

    # Create a more rectangular plot (e.g., 8 inches wide and 12 inches tall)
    plt.figure(figsize=(10, 10))

    # Plot the'Faith_PD diversity across the groups using a boxplot with custom colors and order
    ax = sns.boxplot(x=group_col, y='Faith_PD', data=metadata, palette=palette, order=desired_order)

    # Darken the colors for the strip plot manually
    darker_palette = {key: darken_color(color) for key, color in palette.items()}

    # Plot the general stripplot with darkened colors
    sns.stripplot(x=group_col, y='Faith_PD', data=metadata, palette=darker_palette, jitter=True, dodge=False, ax=ax, linewidth=0.6, order=desired_order)

    # Add a second stripplot specifically for Acne_L, coloring by severity
    # sns.stripplot(x=metadata[metadata[group_col] == 'Acne_L'][group_col], 
    #               y=metadata[metadata[group_col] == 'Acne_L']['Faith_PD'], 
    #               hue=metadata[metadata[group_col] == 'Acne_L']['severity_category'],
    #               palette=severity_palette, jitter=True, dodge=False, ax=ax, linewidth=0.6)

    # Adjust the legend for the severity categories
    handles, labels = ax.get_legend_handles_labels()
    severity_handles = handles[-3:]  # Get the last three handles, which correspond to severity levels
    severity_labels = ['Low (1-2)', 'Moderate (3-4)', 'High (5-6)']  # Rename the labels

    # Move the legend outside the boxplot to the right
    # ax.legend(severity_handles, severity_labels, title='Lesional Severity Score', 
    #         loc='center left', bbox_to_anchor=(1.0, 0.5), ncol=1, frameon=True, title_fontsize=16, fontsize=14)

    # Add the title and labels
    plt.title(f'16S rRNA ({title_suffix}) Faith Phylogenetic Diversity', fontsize=18)
    plt.xlabel(' ')
    plt.ylabel('Faith PD', fontsize=16)

    # Set x-axis labels and sample size retained after rarefaction for V1-V3 and V4
    if key == '16S_V4':
        new_labels = ['Healthy skin','AD NL skin', 'AD L skin', 'Healthy anterior nares', 'AD anterior nares']
        
    plt.xticks(ticks=range(len(new_labels)), labels=new_labels, rotation=45, ha='center', fontsize=16)

    # Pairwise significance testing using Mann-Whitney U test
    groups = desired_order
    p_values = {}
    
    # Heights to draw the annotation lines∂
    y_max = max(metadata['Faith_PD']) + 0.1
    height_step = 1.5  # Height step between lines
    
    # Perform pairwise comparisons
    for i, group1 in enumerate(groups):
        for j, group2 in enumerate(groups):
            if i < j:
                # Get the'Faith_PD values for each group
                group1_values = metadata[metadata[group_col] == group1]['Faith_PD']
                group2_values = metadata[metadata[group_col] == group2]['Faith_PD']
                
                # Perform Mann-Whitney U test
                stat, p = mannwhitneyu(group1_values, group2_values, alternative='two-sided')
                p_values[f'{group1} vs {group2}'] = p
                
                # Determine the significance label based on p-value thresholds
                if p >= 0.05:
                    label = f"{p:.2e}"
                elif p < 0.001:
                    label = '***  ' + f"{p:.2e}"
                elif p < 0.01:
                    label = '**  ' + f"{p:.2e}"
                else:
                    label = '*  ' + f"{p:.2e}"
                
                # Get x coordinates of the boxplots
                x1, x2 = i, j
                y = y_max + height_step  # Vertical position for the horizontal line
                
                # Draw horizontal line and annotate the significance label
                plt.plot([x1, x1, x2, x2], [y, y + 0.1, y + 0.1, y], lw=1, color='black')
                #plt.plot([x1, x1, x2, x2], [y, y + 5, y + 5, y], lw=1, color='black')
                plt.text((x1 + x2) * 0.5, y + 0.2, label, ha='center', va='bottom', fontsize=12)
                
                # Update y_max for the next comparison
                y_max += height_step + 0.5

        # Save the figure
        plt.savefig(f'../Plots/Analysis_figures/Diversity/{key}_{taxa_level}_FaithPD.png', dpi=600, bbox_inches='tight', pad_inches=0.1)  # Save as png
        plt.savefig(f'../Plots/Analysis_figures/Diversity/{key}_{taxa_level}_FaithPD.svg')  # Save as svg

    # Print pairwise p-values in scientific notation
    print("Pairwise Mann-Whitney U test p-values:")
    for comparison, p_value in p_values.items():
        print(f"{comparison}: p-value = {p_value:.2e}")


In [20]:
# Plot Alpha Diversity plots for both V1-V3 and V4
for key, biom_path in biom_paths.items():
    plot_faith_pd_histo(
        biom_path=biom_path,
        metadata_path=metadata_path,
        group_col='case_type',
        title_suffix='V4' if key == '16S_V4' else ''
    )


  sns.stripplot(x=group_col, y='Faith_PD', data=metadata, palette=darker_palette, jitter=True, dodge=False, ax=ax, linewidth=0.6, order=desired_order)
  with pd.option_context('mode.use_inf_as_na', True):
  with pd.option_context('mode.use_inf_as_na', True):
  data_subset = grouped_data.get_group(pd_key)


Pairwise Mann-Whitney U test p-values:
control-nonlesional skin vs case-nonlesional skin: p-value = 2.21e-04
control-nonlesional skin vs case-lesional skin: p-value = 7.43e-03
control-nonlesional skin vs control-anterior nares: p-value = 3.04e-16
control-nonlesional skin vs case-anterior nares: p-value = 8.50e-15
case-nonlesional skin vs case-lesional skin: p-value = 6.62e-02
case-nonlesional skin vs control-anterior nares: p-value = 8.91e-13
case-nonlesional skin vs case-anterior nares: p-value = 3.22e-11
case-lesional skin vs control-anterior nares: p-value = 3.33e-16
case-lesional skin vs case-anterior nares: p-value = 1.86e-15
control-anterior nares vs case-anterior nares: p-value = 5.82e-01
