# Alpha Diversity (Faith PD)

In [21]:
# Import Python packages
import pandas as pd
import numpy as np
import biom
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import cycle
import os
from matplotlib.colors import ListedColormap
from matplotlib.colors import to_rgba
from scipy.stats import mannwhitneyu
from skbio.diversity import alpha_diversity
from matplotlib.patches import Ellipse
import matplotlib.transforms as transforms
from matplotlib.patches import Circle
from matplotlib.colors import to_hex
import matplotlib.ticker as ticker
import statsmodels.api as sm
from biom import load_table
from statsmodels.stats.multitest import multipletests

In [22]:
# Read in table at collapsed genera level
biom_path = f'../Data/Tables/Absolute_Abundance_Tables/209766_filtered_by_prevalence_1pct_rare_filtered_Genus-ASV-non-collapse.biom'

biom_tbl = load_table(biom_path)
df = pd.DataFrame(biom_tbl.to_dataframe().T)
df

Unnamed: 0,g__Streptococcus_ASV-1,g__Streptococcus_ASV-2,g__Corynebacterium_ASV-1,g__Corynebacterium_ASV-3,g___ASV-3,g__Cutibacterium_ASV-1,g___ASV-16,g___ASV-25,g___ASV-27,g__Cutibacterium_ASV-2,...,g__Copromonas_ASV-2,g__Microvirga_ASV-1,g__Leptotrichia_A_993758_ASV-15,g___ASV-126,g__Leptotrichia_A_993758_ASV-16,g__Capnocytophaga_820688_ASV-7,g__UBA6175_ASV-2,g__Comamonas_F_589250_ASV-4,g__Bosea_ASV-2,g___ASV-119
900344,156.0,95.0,23.0,17.0,1.0,2.0,1.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
900459,21.0,30.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
900221,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
900570,18.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
900092,174.0,104.0,10.0,1.0,0,0,0,0,2.0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
900294,6.0,2.0,0,0,0,41.0,0,0,0,32.0,...,0,0,0,0,0,0,0,0,0,0
9003972,38.0,21.0,0,0,1.0,18.0,0,0,0,12.0,...,0,0,0,0,0,0,0,0,0,0
900097,3.0,0,0,0,0,3.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
900498,4.0,6.0,0,0,0,13.0,0,6.0,0,13.0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
# Load the metadata
metadata_path = '../Data/Metadata/updated_clean_ant_skin_metadata.tab'
metadata = pd.read_csv(metadata_path, sep='\t')
metadata['o_scorad_adj'] = metadata['o_scorad'].fillna(0)

metadata

Unnamed: 0,#sample-id,PlateNumber,PlateLocation,i5,i5Sequence,i7,i7Sequence,identifier,Sequence,Plate ID,...,sex,enrolment_date,enrolment_season,hiv_exposure,hiv_status,household_size,o_scorad,FWD_filepath,REV_filepath,o_scorad_adj
0,Ca009ST_L,1,A1,SA501,ATCGTACG,SA701,CGAGAGTT,SA701SA501,CGAGAGTT-ATCGTACG,1.010000e+21,...,male,4/16/2015,Autumn,Unexposed,negative,4.0,40,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,40
1,900221,1,B1,SA502,ACTATCTG,SA701,CGAGAGTT,SA701SA502,CGAGAGTT-ACTATCTG,1.010000e+21,...,female,8/11/2015,Winter,Unexposed,negative,7.0,34,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,34
2,Ca010EB_L,1,C1,SA503,TAGCGAGT,SA701,CGAGAGTT,SA701SA503,CGAGAGTT-TAGCGAGT,1.010000e+21,...,female,11/20/2014,Spring,Unexposed,negative,7.0,21,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,21
3,900460,1,D1,SA504,CTGCGTGT,SA701,CGAGAGTT,SA701SA504,CGAGAGTT-CTGCGTGT,1.010000e+21,...,female,9/23/2015,Spring,Unexposed,,4.0,40,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,40
4,900051,1,E1,SA505,TCATCGAG,SA701,CGAGAGTT,SA701SA505,CGAGAGTT-TCATCGAG,1.010000e+21,...,male,4/21/2015,Autumn,Unexposed,negative,7.0,41,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,41
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
497,Ca006ON_L_2,6,H1,SA508,GACACCGT,SB701,CTCGACTT,SB701SA508,CTCGACTT-GACACCGT,1.010000e+21,...,female,3/25/2015,Autumn,Unexposed,negative,3.0,34,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,34
498,Ca006ON_NL,6,F2,SA506,CGTGAGTG,SB702,CGAAGTAT,SB702SA506,CGAAGTAT-CGTGAGTG,1.010000e+21,...,female,3/25/2015,Autumn,Unexposed,negative,3.0,34,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,34
499,Ca006ON_NL_2,6,H2,SA508,GACACCGT,SB702,CGAAGTAT,SB702SA508,CGAAGTAT-GACACCGT,1.010000e+21,...,female,3/25/2015,Autumn,Unexposed,negative,3.0,34,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,34
500,Ca006ON_PN,6,F3,SA506,CGTGAGTG,SB703,TAGCAGCT,SB703SA506,TAGCAGCT-CGTGAGTG,1.010000e+21,...,female,3/25/2015,Autumn,Unexposed,negative,3.0,34,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,34


In [24]:
metadata['individual_case_location'] = metadata['case_type'] + ' ' + metadata['area']
metadata['individual_case_location'].value_counts() 

individual_case_location
case-lesional skin Umtata             61
case-nonlesional skin Umtata          61
case-anterior nares Umtata            61
control-anterior nares Umtata         55
control-nonlesional skin Umtata       52
case-nonlesional skin Cape Town       50
case-anterior nares Cape Town         47
case-lesional skin Cape Town          46
control-nonlesional skin Cape Town    35
control-anterior nares Cape Town      34
Name: count, dtype: int64

In [25]:
def darken_color(color, amount=0.3):
    """
    Darken a given color by a certain amount.
    
    Parameters:
    - color: The base color (as a hex string or color name).
    - amount: The amount to darken the color by (default: 0.3).
    
    Returns:
    - A darkened color as a hex string.
    """
    c = to_rgba(color)
    return (c[0] * (1 - amount), c[1] * (1 - amount), c[2] * (1 - amount), c[3])

In [26]:
def brighten_color(color, amount=0.3):
    """
    Brighten a color by increasing its RGB intensity without blending with white.
    
    Parameters:
    - color: color name or hex code
    - amount: brightness boost factor (0 = no change, 1 = full brightness)

    Returns:
    - Hex string of the brightened color.
    """
    rgba = to_rgba(color)
    r = min(rgba[0] * (1 + amount), 1.0)
    g = min(rgba[1] * (1 + amount), 1.0)
    b = min(rgba[2] * (1 + amount), 1.0)
    return to_hex((r, g, b, rgba[3]))

In [27]:
def plot_faith_pd_histo_split(metadata, group_col):
    def brighten_color(color, amount=0.3):
        import matplotlib.colors as mc
        import colorsys
        try:
            c = mc.cnames[color]
        except:
            c = color
        c = colorsys.rgb_to_hls(*mc.to_rgb(c))
        return colorsys.hls_to_rgb(c[0], 1 - amount * (1 - c[1]), c[2])

    def add_pairwise_annotations(ax, groups, df, group_col, y_col):
        """Perform pairwise tests and annotate figure with Bonferroni-corrected p-values."""
        comparisons = []
        pvals = []
        stats = []

        for i in range(len(groups)):
            for j in range(i + 1, len(groups)):
                g1, g2 = groups[i], groups[j]
                vals1 = df[df[group_col] == g1][y_col]
                vals2 = df[df[group_col] == g2][y_col]
                stat, p = mannwhitneyu(vals1, vals2, alternative='two-sided')
                comparisons.append((i, j))
                pvals.append(p)
                stats.append(stat)
        
        for (i, j), p, stat in zip(comparisons, pvals, stats):
            print(f"{groups[i]} vs {groups[j]}: p={p:.2}, U={stat:.0f}")

        # Correct p-values
        _, pvals_corr, _, _ = multipletests(pvals, method='bonferroni')

        y_max = df[y_col].max()
        height = 1
        spacing = 1.5  # increase this to add more vertical space
        for (i, j), p_corr, stat in zip(comparisons, pvals_corr, stats):
            if p_corr < 0.05:
                y = y_max + spacing
                ax.plot([i, i, j, j], [y, y + 0.25, y + 0.25, y], lw=1, color='black')
                label = f"p={p_corr:.2}   U={stat:.0f}"
                ax.text((i + j) / 2, y + 0.3, label, ha='center', va='bottom', fontsize=9)
                y_max += spacing + 0.5  # ensure the next one stacks above


    metadata = metadata.set_index('#sample-id')

    feature_table = pd.read_csv("../Data/Faith_PD/vector.tsv", sep="\t")
    feature_table = feature_table.set_index(feature_table.columns[0])
    feature_table = feature_table[feature_table.index.isin(metadata.index)]

    common_samples = metadata.index.intersection(feature_table.index)
    metadata = metadata.loc[common_samples].copy()
    metadata['Faith_PD'] = feature_table.loc[common_samples]
    metadata['o_scorad'] = pd.to_numeric(metadata['o_scorad'], errors='coerce').astype(float)

    lesional_groups = ['case-lesional skin Cape Town', 'case-lesional skin Umtata']
    # for group in lesional_groups:
    #     condition = (metadata[group_col] == group) & (metadata['o_scorad'] > 40)
    #     metadata = metadata[~condition]

    # ============ CAPE TOWN ============
    cape_groups = [
        'control-nonlesional skin Cape Town',
        'case-nonlesional skin Cape Town',
        'case-lesional skin Cape Town'
    ]
    cape_palette = {
        'control-nonlesional skin Cape Town': '#7FBCEB',
        'case-nonlesional skin Cape Town': '#FAD5A5',
        'case-lesional skin Cape Town': '#C9A34F',
    }
    cape_data = metadata[metadata[group_col].isin(cape_groups)].copy()
    group_counts = cape_data[group_col].value_counts().to_dict()
    print(group_counts)
    new_labels_ct = [
        f"H\n(n={group_counts.get('control-nonlesional skin Cape Town', 0)})",
        f"ADNL\n(n={group_counts.get('case-nonlesional skin Cape Town', 0)})",
        f"ADL\n(n={group_counts.get('case-lesional skin Cape Town', 0)})"
    ]

    fig_ct, ax_ct = plt.subplots(figsize=(2.5, 4.5))
    sns.boxplot(x=group_col, y='Faith_PD', data=cape_data, palette=cape_palette,
                order=cape_groups, ax=ax_ct)
    brighter_ct = {k: brighten_color(v, amount=0.3) for k, v in cape_palette.items()}
    sns.stripplot(x=group_col, y='Faith_PD', data=cape_data, palette=brighter_ct,
                  jitter=True, dodge=False, linewidth=0.6, order=cape_groups, ax=ax_ct)

    ax_ct.set_title('Cape Town', fontsize=16, y=1)
    ax_ct.set_xlabel('')
    ax_ct.set_ylabel('Faith PD', fontsize=13)
    ax_ct.set_xticks(range(len(new_labels_ct)))
    ax_ct.set_xticklabels(new_labels_ct, ha='center', fontsize=13)
    ax_ct.yaxis.set_major_locator(ticker.MultipleLocator(5))
    ax_ct.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{int(x)}'))

    add_pairwise_annotations(ax_ct, cape_groups, cape_data, group_col, 'Faith_PD')

    fig_ct.savefig('../Figures/Supplementary/Suppl_Fig_3A.png', dpi=600, bbox_inches='tight', pad_inches=0.1)

    # ============ UMTATA ============
    umtata_groups = [
        'control-nonlesional skin Umtata',
        'case-nonlesional skin Umtata',
        'case-lesional skin Umtata'
    ]
    umtata_palette = {
        'control-nonlesional skin Umtata': '#66C2EE',
        'case-nonlesional skin Umtata': '#FAD5A5',
        'case-lesional skin Umtata': '#F0806B',
    }

    umtata_data = metadata[metadata[group_col].isin(umtata_groups)].copy()
    umtata_healthy = umtata_data[umtata_data[group_col] == 'control-nonlesional skin Umtata']
    umtata_diseased_non = umtata_data[umtata_data[group_col] == 'case-nonlesional skin Umtata']
    umtata_diseased_les = umtata_data[umtata_data[group_col] == 'case-lesional skin Umtata']

    umtata_healthy_sub = umtata_healthy.sample(n=22, random_state=42)
    umtata_data = pd.concat([umtata_healthy_sub, umtata_diseased_non, umtata_diseased_les])

    group_counts = umtata_data[group_col].value_counts().to_dict()
    new_labels_um = [
        f"H\n(n={group_counts.get('control-nonlesional skin Umtata', 0)})",
        f"ADNL\n(n={group_counts.get('case-nonlesional skin Umtata', 0)})",
        f"ADL\n(n={group_counts.get('case-lesional skin Umtata', 0)})"
    ]

    fig_um, ax_um = plt.subplots(figsize=(2.5, 4.5))
    sns.boxplot(x=group_col, y='Faith_PD', data=umtata_data, palette=umtata_palette,
                order=umtata_groups, ax=ax_um)
    brighter_um = {k: brighten_color(v, amount=0.3) for k, v in umtata_palette.items()}
    sns.stripplot(x=group_col, y='Faith_PD', data=umtata_data, palette=brighter_um,
                  jitter=True, dodge=False, linewidth=0.6, order=umtata_groups, ax=ax_um)

    ax_um.set_title('Umtata', fontsize=16, y=1)
    ax_um.set_xlabel('')
    ax_um.set_ylabel('Faith PD', fontsize=13)
    ax_um.set_xticks(range(len(new_labels_um)))
    ax_um.set_xticklabels(new_labels_um, ha='center', fontsize=13)
    ax_um.yaxis.set_major_locator(ticker.MultipleLocator(10))
    ax_um.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{int(x)}'))

    add_pairwise_annotations(ax_um, umtata_groups, umtata_data, group_col, 'Faith_PD')

    fig_um.savefig('../Figures/Supplementary/Suppl_Fig_3C.png', dpi=600, bbox_inches='tight', pad_inches=0.1)


In [28]:
# Plot Alpha Diversity plots for both V1-V3 and V4
plot_faith_pd_histo_split(
    metadata=metadata,
    group_col='individual_case_location'
)


{'case-nonlesional skin Cape Town': 38, 'case-lesional skin Cape Town': 37, 'control-nonlesional skin Cape Town': 22}
control-nonlesional skin Cape Town vs case-nonlesional skin Cape Town: p=0.73, U=395
control-nonlesional skin Cape Town vs case-lesional skin Cape Town: p=0.31, U=342
case-nonlesional skin Cape Town vs case-lesional skin Cape Town: p=0.27, U=598
control-nonlesional skin Umtata vs case-nonlesional skin Umtata: p=0.0011, U=694
control-nonlesional skin Umtata vs case-lesional skin Umtata: p=0.0091, U=793
case-nonlesional skin Umtata vs case-lesional skin Umtata: p=0.16, U=908


  sns.stripplot(x=group_col, y='Faith_PD', data=cape_data, palette=brighter_ct,
  with pd.option_context('mode.use_inf_as_na', True):
  with pd.option_context('mode.use_inf_as_na', True):
  data_subset = grouped_data.get_group(pd_key)
  sns.stripplot(x=group_col, y='Faith_PD', data=umtata_data, palette=brighter_um,
  with pd.option_context('mode.use_inf_as_na', True):
  with pd.option_context('mode.use_inf_as_na', True):
  data_subset = grouped_data.get_group(pd_key)


In [29]:
def plot_faith_pd_histo_HvsAD(metadata, group_col):
    import matplotlib.pyplot as plt
    import seaborn as sns
    import pandas as pd
    import matplotlib.ticker as ticker
    from scipy.stats import mannwhitneyu

    metadata = metadata.set_index('#sample-id')
    feature_table = pd.read_csv("../Data/Faith_PD/vector.tsv", sep="\t")
    feature_table = feature_table.set_index(feature_table.columns[0])
    feature_table = feature_table[feature_table.index.isin(metadata.index)]

    common_samples = metadata.index.intersection(feature_table.index)
    metadata = metadata.loc[common_samples].copy()
    metadata['Faith_PD'] = feature_table.loc[common_samples]

    # Define region_group labels
    metadata['region_group'] = None
    metadata.loc[metadata[group_col] == 'control-nonlesional skin Cape Town', 'region_group'] = 'H_CapeTown'
    metadata.loc[metadata[group_col] == 'control-nonlesional skin Umtata', 'region_group'] = 'H_Umtata'
    metadata.loc[metadata[group_col].isin([
        'case-nonlesional skin Cape Town',
        'case-lesional skin Cape Town']), 'region_group'] = 'AD_CapeTown'
    metadata.loc[metadata[group_col].isin([
        'case-nonlesional skin Umtata',
        'case-lesional skin Umtata']), 'region_group'] = 'AD_Umtata'

    # ============ H vs H ============
    h_data = metadata[metadata['region_group'].isin(['H_CapeTown', 'H_Umtata'])].copy()

    # Subsample Umtata Healthy to n=22 (with replacement)
    h_umtata = h_data[h_data['region_group'] == 'H_Umtata']
    h_umtata = h_umtata.sample(n=22, random_state=42, replace=True)

    h_cape = h_data[h_data['region_group'] == 'H_CapeTown']
    h_data = pd.concat([h_umtata, h_cape])  # Umtata first
    h_palette = {'H_Umtata': '#66C2EE', 'H_CapeTown': '#7FBCEB'}
    h_order = ['H_Umtata', 'H_CapeTown']
    h_counts = h_data['region_group'].value_counts().to_dict()
    print(h_counts)

    fig_h, ax_h = plt.subplots(figsize=(2.5, 4.5))
    sns.boxplot(x='region_group', y='Faith_PD', data=h_data, palette=h_palette,
                order=h_order, ax=ax_h)
    sns.stripplot(x='region_group', y='Faith_PD', data=h_data, palette=h_palette,
                  jitter=True, dodge=False, linewidth=0.6, order=h_order, ax=ax_h)

    ax_h.set_title('Healthy Diversity', fontsize=14, y=1.02)
    ax_h.set_xlabel('')
    ax_h.set_ylabel('Faith PD', fontsize=13)
    ax_h.set_xticklabels([
        f"Umtata\n(n={h_counts.get('H_Umtata', 0)})",
        f"Cape Town\n(n={h_counts.get('H_CapeTown', 0)})"
    ], fontsize=12)
    ax_h.yaxis.set_major_locator(ticker.MultipleLocator(5))

    stat, p = mannwhitneyu(
        h_data[h_data['region_group'] == 'H_CapeTown']['Faith_PD'].dropna(),
        h_data[h_data['region_group'] == 'H_Umtata']['Faith_PD'].dropna(),
        alternative='two-sided'
    )
    ax_h.text(0.5, h_data['Faith_PD'].max() - 4, f"p={p:.2}\nU={stat:.0f}",
              ha='center', va='bottom', fontsize=10)

    fig_h.savefig('../Figures/Main/Fig_2A.png', dpi=600, bbox_inches='tight', pad_inches=0.1)

    # ============ AD vs AD ============
    ad_data = metadata[metadata['region_group'].isin(['AD_CapeTown', 'AD_Umtata'])].copy()

    # Subsample Umtata ADL to n=75 (with replacement)
    adl_umtata = metadata[
        (metadata[group_col] == 'case-lesional skin Umtata')
    ]
    adl_umtata_sub = adl_umtata.sample(n=75, random_state=42, replace=True)

    adnl_umtata = metadata[
        (metadata[group_col] == 'case-nonlesional skin Umtata')
    ]
    ad_umtata = pd.concat([adl_umtata_sub, adnl_umtata])
    ad_umtata['region_group'] = 'AD_Umtata'

    ad_cape = ad_data[ad_data['region_group'] == 'AD_CapeTown']
    ad_data = pd.concat([ad_umtata, ad_cape])  # Umtata first
    ad_palette = {'AD_Umtata': '#F0806B', 'AD_CapeTown': '#f1b970'}
    ad_order = ['AD_Umtata', 'AD_CapeTown']
    ad_counts = ad_data['region_group'].value_counts().to_dict()
    print(ad_counts)
    
    fig_ad, ax_ad = plt.subplots(figsize=(2.5, 4.5))
    sns.boxplot(x='region_group', y='Faith_PD', data=ad_data, palette=ad_palette,
                order=ad_order, ax=ax_ad)
    sns.stripplot(x='region_group', y='Faith_PD', data=ad_data, palette=ad_palette,
                  jitter=True, dodge=False, linewidth=0.6, order=ad_order, ax=ax_ad)

    ax_ad.set_title('AD Les Diversity', fontsize=14, y=1.02)
    ax_ad.set_xlabel('')
    ax_ad.set_ylabel('Faith PD', fontsize=13)
    ax_ad.set_xticklabels([
        f"Umtata\n(n={ad_counts.get('AD_Umtata', 0)})",
        f"Cape Town\n(n={ad_counts.get('AD_CapeTown', 0)})"
    ], fontsize=12)
    ax_ad.yaxis.set_major_locator(ticker.MultipleLocator(5))

    stat, p = mannwhitneyu(
        ad_data[ad_data['region_group'] == 'AD_CapeTown']['Faith_PD'].dropna(),
        ad_data[ad_data['region_group'] == 'AD_Umtata']['Faith_PD'].dropna(),
        alternative='two-sided'
    )
    ax_ad.text(0.5, ad_data['Faith_PD'].max() - 4, f"p={p:.2}\nU={stat:.0f}",
               ha='center', va='bottom', fontsize=10)

    fig_ad.savefig('../Figures/Main/Fig_2C.png', dpi=600, bbox_inches='tight', pad_inches=0.1)

        # ============ ADNL vs ADNL ============
    adnl_data = metadata[
        metadata[group_col].isin([
            'case-nonlesional skin Cape Town',
            'case-nonlesional skin Umtata'
        ])
    ].copy()

    # Subsample Umtata ADNL to n=60 (with replacement)
    adnl_umtata = adnl_data[adnl_data[group_col] == 'case-nonlesional skin Umtata']
    adnl_umtata = adnl_umtata.sample(n=38, random_state=17, replace=True)
    adnl_umtata['region_group'] = 'ADNL_Umtata'

    adnl_cape = adnl_data[adnl_data[group_col] == 'case-nonlesional skin Cape Town']
    adnl_cape['region_group'] = 'ADNL_CapeTown'

    adnl_data = pd.concat([adnl_umtata, adnl_cape])
    adnl_palette = {'ADNL_Umtata': '#FAD5A5', 'ADNL_CapeTown': '#FAD5A5'}
    adnl_order = ['ADNL_Umtata', 'ADNL_CapeTown']
    adnl_counts = adnl_data['region_group'].value_counts().to_dict()
    print(adnl_counts)

    fig_adnl, ax_adnl = plt.subplots(figsize=(2.5, 4.5))
    sns.boxplot(x='region_group', y='Faith_PD', data=adnl_data, palette=adnl_palette,
                order=adnl_order, ax=ax_adnl)
    sns.stripplot(x='region_group', y='Faith_PD', data=adnl_data, palette=adnl_palette,
                  jitter=True, dodge=False, linewidth=0.6, order=adnl_order, ax=ax_adnl)

    ax_adnl.set_title('AD Non-les Diversity', fontsize=14, y=1.02)
    ax_adnl.set_xlabel('')
    ax_adnl.set_ylabel('Faith PD', fontsize=13)
    ax_adnl.set_xticklabels([
        f"Umtata\n(n={adnl_counts.get('ADNL_Umtata', 0)})",
        f"Cape Town\n(n={adnl_counts.get('ADNL_CapeTown', 0)})"
    ], fontsize=12)
    ax_adnl.yaxis.set_major_locator(ticker.MultipleLocator(5))

    stat, p = mannwhitneyu(
        adnl_data[adnl_data['region_group'] == 'ADNL_CapeTown']['Faith_PD'].dropna(),
        adnl_data[adnl_data['region_group'] == 'ADNL_Umtata']['Faith_PD'].dropna(),
        alternative='two-sided'
    )
    ax_adnl.text(0.5, adnl_data['Faith_PD'].max() - 4, f"p={p:.2}\nU={stat:.0f}",
                 ha='center', va='bottom', fontsize=10)

    fig_adnl.savefig('../Figures/Main/Fig_2B.png', dpi=600, bbox_inches='tight', pad_inches=0.1)



In [30]:
# Plot Alpha Diversity plots for both V1-V3 and V4
plot_faith_pd_histo_HvsAD(
    metadata=metadata,
    group_col='individual_case_location'
)


{'H_Umtata': 22, 'H_CapeTown': 22}
{'AD_Umtata': 117, 'AD_CapeTown': 75}


  sns.stripplot(x='region_group', y='Faith_PD', data=h_data, palette=h_palette,
  with pd.option_context('mode.use_inf_as_na', True):
  with pd.option_context('mode.use_inf_as_na', True):
  data_subset = grouped_data.get_group(pd_key)
  sns.stripplot(x='region_group', y='Faith_PD', data=ad_data, palette=ad_palette,
  with pd.option_context('mode.use_inf_as_na', True):
  with pd.option_context('mode.use_inf_as_na', True):
  data_subset = grouped_data.get_group(pd_key)


{'ADNL_Umtata': 38, 'ADNL_CapeTown': 38}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adnl_cape['region_group'] = 'ADNL_CapeTown'
  sns.stripplot(x='region_group', y='Faith_PD', data=adnl_data, palette=adnl_palette,
  with pd.option_context('mode.use_inf_as_na', True):
  with pd.option_context('mode.use_inf_as_na', True):
  data_subset = grouped_data.get_group(pd_key)
