## Alpha Diversity by Individual AD Status and Region

In [186]:
# Import Python packages
import pandas as pd
import numpy as np
import biom
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import cycle
import os
from matplotlib.colors import ListedColormap
from matplotlib.colors import to_rgba
from scipy.stats import mannwhitneyu
from skbio.diversity import alpha_diversity
from matplotlib.patches import Ellipse
import matplotlib.transforms as transforms
from matplotlib.patches import Circle
from matplotlib.colors import to_hex
from matplotlib.colors import to_rgba


In [187]:
# Load the metadata
metadata_path = '../Data/Metadata/updated_clean_ant_skin_metadata.tab'
metadata = pd.read_csv(metadata_path, sep='\t')
metadata['case_type'].value_counts()

case_type
case-nonlesional skin       111
case-anterior nares         108
case-lesional skin          107
control-anterior nares       89
control-nonlesional skin     87
Name: count, dtype: int64

In [188]:
# Create mapping dictionary for case types to individual cases
case_type_mapping = {
    'case-nonlesional skin': 'AD skin',
    'case-anterior nares': 'AD nares', 
    'case-lesional skin': 'AD skin',
    'control-anterior nares': 'H nares',
    'control-nonlesional skin': 'H skin'
}

# Create new column using the mapping
metadata['individual_case'] = metadata['case_type'].map(case_type_mapping)

metadata['individual_case_location'] = metadata['individual_case'] + ' ' + metadata['area']
metadata['individual_case_location'].value_counts()

individual_case_location
AD skin Umtata        122
AD skin Cape Town      96
AD nares Umtata        61
H nares Umtata         55
H skin Umtata          52
AD nares Cape Town     47
H skin Cape Town       35
H nares Cape Town      34
Name: count, dtype: int64

In [189]:
metadata.to_csv('../Data/Metadata/updated_clean_ant_skin_metadata_microbiome_type-location.tab', sep='\t')

In [190]:
def darken_color(color, amount=0.3):
    """
    Darken a given color by a certain amount.
    
    Parameters:
    - color: The base color (as a hex string or color name).
    - amount: The amount to darken the color by (default: 0.3).
    
    Returns:
    - A darkened color as a hex string.
    """
    c = to_rgba(color)
    return (c[0] * (1 - amount), c[1] * (1 - amount), c[2] * (1 - amount), c[3])

In [191]:
def brighten_color(color, amount=0.3):
    """
    Brighten a color by increasing its RGB intensity without blending with white.
    
    Parameters:
    - color: color name or hex code
    - amount: brightness boost factor (0 = no change, 1 = full brightness)

    Returns:
    - Hex string of the brightened color.
    """
    rgba = to_rgba(color)
    r = min(rgba[0] * (1 + amount), 1.0)
    g = min(rgba[1] * (1 + amount), 1.0)
    b = min(rgba[2] * (1 + amount), 1.0)
    return to_hex((r, g, b, rgba[3]))

In [192]:
def plot_faith_pd_histo(metadata, group_col):
    metadata = metadata.set_index('#sample-id')

    # Load Faith PD data
    feature_table = pd.read_csv(
        "../Analyses/Xebec/Output/results/alpha_div/phylo/faith_pd/vector.tsv", sep="\t"
    )
    feature_table = feature_table.set_index(feature_table.columns[0])


    # Keep only overlapping samples
    common_samples = metadata.index.intersection(feature_table.index)
    metadata = metadata.loc[common_samples].copy()
    metadata['Faith_PD'] = feature_table.loc[common_samples]

    # Desired group order
    desired_order = ['H nares Cape Town', 'AD nares Cape Town', 'H nares Umtata', 'AD nares Umtata']

    # Color palette
    palette = {
        'H nares Cape Town': '#A7C7E7',
        'H nares Umtata': '#ADD8E6',
        'AD nares Cape Town': '#d2b48c',
        'AD nares Umtata': '#fa8072'
    }

    # Add group counts for x-axis labels
    group_counts = metadata[group_col].value_counts().to_dict()
    new_labels = [
        f"H child\n(n={group_counts.get('H nares Cape Town', 0)})",
        f"AD child\n(n={group_counts.get('AD nares Cape Town', 0)})",
        f"H child\n(n={group_counts.get('H nares Umtata', 0)})",
        f"AD child\n(n={group_counts.get('AD nares Umtata', 0)})"
    ]

    # Start figure
    plt.figure(figsize=(8, 4.5))
    ax = sns.boxplot(x=group_col, y='Faith_PD', data=metadata, palette=palette, order=desired_order)

    # Darker dots on top
    # darker_palette = {k: darken_color(v) for k, v in palette.items()}
    brighter_palette = {k: brighten_color(v, amount=0.3) for k, v in palette.items()}

    sns.stripplot(x=group_col, y='Faith_PD', data=metadata, palette=brighter_palette,
                  jitter=True, dodge=False, ax=ax, linewidth=0.6, order=desired_order)

    # Customize axes and labels
    plt.suptitle(f'Alpha Diversity by Region', fontsize=20, y = 1.03)
    plt.title(f'Cape Town                      Umtata', fontsize=16, y = 1)

    plt.xlabel(' ')
    plt.ylabel('Faith PD', fontsize=16)
    plt.xticks(ticks=range(len(new_labels)), labels=new_labels, ha='center', fontsize=16)
    ax.set_ylim(0, 35)

    # Pairwise significance testing (only p <= 0.05 shown)
    groups = desired_order
    p_values = {}
    y_max = metadata['Faith_PD'].max() - 1
    height_step = 0.4

    for i, group1 in enumerate(groups):
        for j, group2 in enumerate(groups):
            if i < j:
                vals1 = metadata[metadata[group_col] == group1]['Faith_PD']
                vals2 = metadata[metadata[group_col] == group2]['Faith_PD']
                stat, p = mannwhitneyu(vals1, vals2, alternative='two-sided')
                p_values[f'{group1} vs {group2}'] = p

                if p <= 0.05:
                    if p < 0.001:
                        label = '***  ' + f"{p:.2}"
                    elif p < 0.01:
                        label = '**  ' + f"{p:.2}"
                    else:
                        label = '*  ' + f"{p:.2}"

                    x1, x2 = i, j
                    y = y_max
                    plt.plot([x1, x1, x2, x2], [y, y + 0.1, y + 0.1, y], lw=1, color='black')
                    plt.text((x1 + x2) * 0.5, y + 0.1, label, ha='center', va='bottom', fontsize=10)
                    y_max += height_step

    # Save the figure
    plt.savefig('../Plots/Analysis_figures/Diversity/CapeTown_Umtata_FaithPD_nares.png', dpi=600, bbox_inches='tight', pad_inches=0.1)
    plt.savefig('../Plots/Analysis_figures/Diversity/CapeTown_Umtata_FaithPD_nares.svg')

    # Print p-values
    print("Pairwise Mann-Whitney U test p-values:")
    for comparison, p_value in p_values.items():
        print(f"{comparison}: p-value = {p_value:.2e}")

In [193]:
# Plot Alpha Diversity plots for both V1-V3 and V4
plot_faith_pd_histo(
    metadata=metadata,
    group_col='individual_case_location'
)


  sns.stripplot(x=group_col, y='Faith_PD', data=metadata, palette=brighter_palette,
  with pd.option_context('mode.use_inf_as_na', True):
  with pd.option_context('mode.use_inf_as_na', True):
  data_subset = grouped_data.get_group(pd_key)


Pairwise Mann-Whitney U test p-values:
H nares Cape Town vs AD nares Cape Town: p-value = 5.74e-02
H nares Cape Town vs H nares Umtata: p-value = 1.67e-02
H nares Cape Town vs AD nares Umtata: p-value = 3.11e-02
AD nares Cape Town vs H nares Umtata: p-value = 4.40e-01
AD nares Cape Town vs AD nares Umtata: p-value = 8.35e-01
H nares Umtata vs AD nares Umtata: p-value = 5.34e-01


## Beta Diversity by Individual AD Status and Region

In [194]:
# import Python packages
import pandas as pd
import numpy as np
from numpy import array
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.gridspec import GridSpec
import scipy
import scipy.stats as ss
from skbio.stats.distance import permanova
import biom
from biom import load_table
from biom.util import biom_open
from gemelli.rpca import rpca

### See RPCA standalone python tutorial: 
# https://github.com/biocore/gemelli/blob/master/ipynb/tutorials/RPCA-moving-pictures-standalone-cli-and-api.ipynb

In [195]:
# # read in biom table
# biom_tbl = biom.load_table("/Users/annanguyen/16S_AD_South-Africa/Data/Tables/Relative_Abundance_Tables/df_16S_filtered_feature_table_rare_Genus_relative_abundance.biom")

# print(biom_tbl.ids(axis= 'sample'))
# len(biom_tbl.ids(axis= 'sample'))
# type(biom_tbl.ids(axis = 'sample'))

# Function to load BIOM table, collapse by taxa, sort rows by row sum, remove specified samples, and convert to relative abundance
def load_biom_table(biom_path, metadata_path):
    # Load BIOM table and convert to a DataFrame
    table = biom.load_table(biom_path)
    df = pd.DataFrame(table.matrix_data.toarray(),
                      index=table.ids(axis='observation'),
                      columns=table.ids(axis='sample'))
    
    # df = df.T
    # Sort rows by row sum in descending order
    df['row_sum'] = df.sum(axis=1)
    df = df.sort_values(by='row_sum', ascending=False)
    
    # Drop the 'row_sum' column before proceeding
    df = df.drop(columns=['row_sum'])

    # Replace ' g__' rows with ' g__Unknown'
    df.index = df.index.map(lambda x: ' g__Unknown' if x == ' g__' else x)

    # Remove '15564.' prefix from columns
    df.columns = df.columns.str.replace('15564.', '')

     # Load metadata as a DataFrame from the file path
    metadata = pd.read_csv(metadata_path, sep='\t')
    metadata['#sample-id'] = metadata['#sample-id'].str.replace('_', '')

    # Set Sample-ID as the index for the metadata dataframe 
    metadata = metadata.set_index('#sample-id')

    # Subset metadata to only contain the same samples as in the BIOM df
    metadata_sub = metadata.loc[df.columns]

    # Get a list of the sample ids that are just nares
    metadataindex = metadata_sub[metadata_sub['case_type'].str.endswith('skin')].index.tolist()
    
    
    # Drop all sample-ids that correspond to nares in the BIOM table DataFrame
    df = df.drop(columns= metadataindex)

    # Subsets the Metadata to only contain the sample ids that are in the newly subsetted biom table DataFrame with only Skin
    metadata_sub = metadata.loc[df.columns]

    
    #returns BIOM table DataFrame and the metadata that has been subsetted to only contain skin
    return df, metadata_sub


# converts the subsetted skin only BIOM table DataFrame back into a BIOM to run RPCA
def convert_df_to_biom(table, biom_output_file):
    obs_ids = table.index
    samp_ids = table.columns
    biom_table = biom.table.Table(table.values, observation_ids=obs_ids, sample_ids=samp_ids) # Convert df back to a biom table
    #biom_output_file = f"../tables/{name}_subset.biom" # Path to output file

    with biom_open(biom_output_file, 'w') as f: # This does the actual saving!
        biom_table.to_hdf5(f, generated_by="subsetted tables")

    return biom_table



In [196]:
# specify the path to the BIOM table and Metadata
# biom_path = "../Data/Tables/Relative_Abundance_Tables/df_16S_filtered_feature_table_rare_Genus_relative_abundance.biom"
biom_path = "../Data/Tables/Absolute_Abundance_Tables/209766_filtered_feature_table.biom"
metadata_path = '../Data/Metadata/updated_clean_ant_skin_metadata_microbiome_type-location.tab'
skin_only_table, metadata_sub = load_biom_table(biom_path, metadata_path)

skin_only_table

Unnamed: 0,900459,900092,900391,900466,9003932,900556,900612,900301,900563,900237,...,900069,900142,900616,Co004LNPN,900328a,900547,900086,900304,900580,900484
GTGCCAGCAGCCGCGGTAATACGTAGGTCCCGAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTAGATAAGTCTGAAGTTAAAGGCTG,118.0,3106.0,688.0,9133.0,2107.0,3417.0,6.0,82.0,611.0,6.0,...,5110.0,13048.0,6.0,1101.0,372.0,1825.0,37.0,3406.0,7314.0,2575.0
GTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTATCCGGAATTATTGGGCGTAAAGCGCGCGTAGGCGGTTTTTTAAGTCTGATGTGAAAGCCCA,14.0,0.0,0.0,18.0,0.0,16.0,0.0,33673.0,0.0,0.0,...,0.0,0.0,0.0,29.0,7.0,33.0,35.0,22.0,354.0,0.0
GTGCCAGCAGCCGCGGTAATACGGAGGGTGCGAGCGTTAATCGGAATAACTGGGCGTAAAGGGCACGCAGGCGGTTATTTAAGTGAGGTGTGAAAGCCCC,432.0,0.0,0.0,0.0,1209.0,1308.0,719.0,2.0,30.0,76.0,...,0.0,58.0,0.0,0.0,279.0,6122.0,0.0,79.0,7.0,458.0
GTGCCAGCCGCCGCGGTAATACGTAGGTGGCAAGCGTTATCCGGAATTATTGGGCGTAAAGCGCGCGTAGGCGGTTTTTTAAGTCTGATGTGAAAGCCCA,19.0,0.0,0.0,0.0,0.0,0.0,0.0,19375.0,0.0,12.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,280.0,0.0
GTGCCAGCCGCCGCGGTAATACGTAGGTCCCGAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTAGATAAGTCTGAAGTTAAAGGCTG,106.0,1707.0,0.0,5406.0,857.0,1204.0,0.0,0.0,377.0,0.0,...,0.0,4334.0,4.0,0.0,0.0,0.0,0.0,0.0,5114.0,2150.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GTGCCAGCAGCCGCGGTAATACGGAGGGTCCGAGCGTTATCCGGAATCATTGGGTTTAAAGGGTCCGTAGGCGGCTTTATAAGTCAGTGGTGAAATCCGG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
GTGCCAGCAGCCGCGGTAATACGTAGGGTGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCTGTGAAATTCCG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
GTGCCAGCCGCCGCGGTAATACGTAGGGCGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGGTGTGAAAACTCG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
GTGCCAGAGCCGCGGTAATACGTAGGTCCCGAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTAGATAAGTCTGAAGTTAAAGGCTGT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [197]:
# Create the path to the skin BIOM file by using the prefix of the original BIOM file
nares_only_biom_path = biom_path.removesuffix('.biom') + '_nares_only.biom'

# Print the path that was created
print(f'{nares_only_biom_path = }')

# Converting df to BIOM table and saving the biom table to the path specified above
biom_tbl = convert_df_to_biom(skin_only_table, nares_only_biom_path)

nares_only_biom_path = '../Data/Tables/Absolute_Abundance_Tables/209766_filtered_feature_table_nares_only.biom'


In [198]:
# perform RPCA with auto rank estimation
np.seterr(divide = 'ignore')
ordination, distance = rpca(biom_tbl)

# extract and view sample ordinations from RPCA result
spca_df = ordination.samples

# Add a case type column into the spca_df using the meta_data and matching by indices by the join function
spca_df = spca_df.join(metadata_sub['individual_case_location'])

spca_df

Unnamed: 0,PC1,PC2,PC3,individual_case_location
900459,-0.010591,-0.088512,0.049942,AD nares Umtata
900092,0.045115,0.070730,-0.080180,AD nares Cape Town
900391,-0.039030,0.000598,-0.045942,H nares Umtata
900466,0.000768,0.004813,-0.166650,AD nares Umtata
9003932,0.087608,-0.028640,-0.023018,H nares Umtata
...,...,...,...,...
900547,0.086773,-0.084143,0.028597,H nares Cape Town
900086,-0.059003,0.072043,0.070186,AD nares Cape Town
900304,0.045750,-0.024506,-0.042610,AD nares Umtata
900580,0.018258,0.175107,-0.096730,AD nares Cape Town


In [199]:
# extract and view feature ordinations from RPCA result
fpca_df = ordination.features
fpca_df.head()

Unnamed: 0,PC1,PC2,PC3
GTGCCAGCAGCCGCGGTAATACGTAGGTCCCGAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTAGATAAGTCTGAAGTTAAAGGCTG,0.098794,-0.397388,-0.528729
GTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTATCCGGAATTATTGGGCGTAAAGCGCGCGTAGGCGGTTTTTTAAGTCTGATGTGAAAGCCCA,-0.171917,-0.118308,0.028445
GTGCCAGCAGCCGCGGTAATACGGAGGGTGCGAGCGTTAATCGGAATAACTGGGCGTAAAGGGCACGCAGGCGGTTATTTAAGTGAGGTGTGAAAGCCCC,0.488259,-0.48781,0.358997
GTGCCAGCCGCCGCGGTAATACGTAGGTGGCAAGCGTTATCCGGAATTATTGGGCGTAAAGCGCGCGTAGGCGGTTTTTTAAGTCTGATGTGAAAGCCCA,-0.128159,-0.070724,0.010761
GTGCCAGCCGCCGCGGTAATACGTAGGTCCCGAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTAGATAAGTCTGAAGTTAAAGGCTG,0.186306,-0.185556,-0.648762


In [200]:
def permanova_on_case_type_subset(df, dist_matrix, case_type_subset):
    """
    Perform PERMANOVA on a subset of the data.
    
    Parameters:
    - df: DataFrame with metadata, must include the grouping variable
    - dist_matrix: DistanceMatrix object from scikit-bio
    - case_type_subset: list of case_type groups to include in the test
    
    Returns:
    - PERMANOVA result (dict-like with p-value, test statistic, etc.)
    """
    # Subset the DataFrame
    print(df)
    subset_df = df[df['individual_case_location'].isin(case_type_subset)]
    
    # Get the matching IDs and subset distance matrix
    ids = subset_df.index
    print(ids)
    sub_dm = dist_matrix.filter(ids, strict=False)
    
    # Run PERMANOVA
    result = permanova(sub_dm, grouping=subset_df['individual_case_location'], permutations=999)
    
    return result

In [201]:
# calculate permanova F-statistic for all combinations between Healthy, case lesional and case non lesional

case_type_subsets = [
    ["H nares Cape Town", "AD nares Cape Town"],
    ["H nares Umtata", "AD nares Umtata"]]

perma_res = {}

for i, case_type_subset in enumerate(case_type_subsets):
    # print("Subset case_type:", case_type_subset)
    # print("Available sample IDs:", spca_df.index.tolist())

    result = permanova_on_case_type_subset(spca_df, distance, case_type_subset)
    group_label = (
        "Nares of Healthy vs. AD Children in Cape Town" if i == 0 else
        "Nares of Healthy vs. AD Children in Umtata"
    )
    f_val = result["test statistic"]
    p_val = result["p-value"]
    
    perma_res[group_label] = {
        "p": f"{p_val:.2e}",
        "f": f"{f_val:.2f}"
    }

perma_res



              PC1       PC2       PC3 individual_case_location
900459  -0.010591 -0.088512  0.049942          AD nares Umtata
900092   0.045115  0.070730 -0.080180       AD nares Cape Town
900391  -0.039030  0.000598 -0.045942           H nares Umtata
900466   0.000768  0.004813 -0.166650          AD nares Umtata
9003932  0.087608 -0.028640 -0.023018           H nares Umtata
...           ...       ...       ...                      ...
900547   0.086773 -0.084143  0.028597        H nares Cape Town
900086  -0.059003  0.072043  0.070186       AD nares Cape Town
900304   0.045750 -0.024506 -0.042610          AD nares Umtata
900580   0.018258  0.175107 -0.096730       AD nares Cape Town
900484   0.031543 -0.029679 -0.023330          AD nares Umtata

[197 rows x 4 columns]
Index(['900092', '900556', '900612', '900563', '900601', 'Ca009STPN', '900577',
       '900623', '900543', '900144', '900065', '900059', '900541', 'Co001SMPN',
       '900080', '900072', '900109', 'Ca007NKPN', 'Ca006ONPN

{'Nares of Healthy vs. AD Children in Cape Town': {'p': '6.90e-02',
  'f': '2.47'},
 'Nares of Healthy vs. AD Children in Umtata': {'p': '3.50e-02', 'f': '3.19'}}

In [202]:
spca_df['individual_case_location'] = spca_df['individual_case_location'].replace({
    'H nares Cape Town': 'H Child CT',
    'AD nares Cape Town': 'AD Child CT',
    'H nares Umtata': 'H Child UM',
    'AD nares Umtata': 'AD Child UM'
})

spca_df

Unnamed: 0,PC1,PC2,PC3,individual_case_location
900459,-0.010591,-0.088512,0.049942,AD Child UM
900092,0.045115,0.070730,-0.080180,AD Child CT
900391,-0.039030,0.000598,-0.045942,H Child UM
900466,0.000768,0.004813,-0.166650,AD Child UM
9003932,0.087608,-0.028640,-0.023018,H Child UM
...,...,...,...,...
900547,0.086773,-0.084143,0.028597,H Child CT
900086,-0.059003,0.072043,0.070186,AD Child CT
900304,0.045750,-0.024506,-0.042610,AD Child UM
900580,0.018258,0.175107,-0.096730,AD Child CT


In [203]:
print("Columns in spca_df:", spca_df.columns)

Columns in spca_df: Index(['PC1', 'PC2', 'PC3', 'individual_case_location'], dtype='object')


In [204]:
spca_df["simplified_label"] = spca_df["individual_case_location"].replace({
    "H Child CT": "H Child",
    "AD Child CT": "AD Child",
    "H Child UM": "H Child",
    "AD Child UM": "AD Child"
})


In [205]:
# Set the color palette for the groups in the correct order
palette = {
    'H Child CT': '#A7C7E7',
    'H Child UM': '#ADD8E6',
    'AD Child CT': '#d2b48c',
    'AD Child UM': '#fa8072'
}

In [206]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Circle

# Create label map with (n=#)
label_map = spca_df["individual_case_location"].value_counts().to_dict()
label_map = {k: f"{k} (n={v})" for k, v in label_map.items()}

# Add labeled group column
spca_df["group_label"] = spca_df["individual_case_location"].map(label_map)

# Create figure
fig, axes = plt.subplots(1, 2, figsize=(9, 5), sharex=True, sharey=True)

# Rename ordination axes
fpca_df.columns = [f"PC{i+1}" for i in range(fpca_df.shape[1])]

split_map = {
    "Cape Town": ["H Child CT", "AD Child CT"],
    "Umtata": ["H Child UM", "AD Child UM"]
}

for ax, (title, groups) in zip(axes, split_map.items()):
    subset_df = spca_df[spca_df["individual_case_location"].isin(groups)].copy()
    subset_df["group_label"] = subset_df["individual_case_location"].map(label_map)
    
    sns.scatterplot(
        data=subset_df,
        x="PC1",
        y="PC2",
        hue="group_label",
        hue_order=[label_map[g] for g in groups],
        s=50,
        edgecolor="black",
        linewidth=0.5,
        palette={label_map[g]: palette[g] for g in groups},
        ax=ax
    )
    
    for case_type, case_type_df in subset_df.groupby("individual_case_location"):
        color = palette[case_type]
        points = case_type_df[["PC1", "PC2"]].values

        centroid = points.mean(axis=0)
        dists = np.linalg.norm(points - centroid, axis=1)
        radius = np.percentile(dists, 90)

        circle = Circle(centroid, radius, edgecolor=color, facecolor=color, alpha=0.2, lw=1, zorder=0)
        ax.add_patch(circle)

    ax.set_title(title, fontsize=16)
    ax.set_xlabel("")
    ax.set_ylabel("")
    ax.legend(frameon=False, fontsize=9, loc='upper right')

# Shared axis labels and ticks
pc1_pct, pc2_pct, _ = [f"RPCA PC{i+1} ({x*100:.2f}%)" for i, x in enumerate(ordination.proportion_explained)]
axes[0].set_ylabel(pc2_pct, fontsize=14)
for ax in axes:
    ax.set_xlabel(pc1_pct, fontsize=14)
    ax.set_xticks([-0.2, -0.1, 0.0, 0.1, 0.2, 0.3])
    ax.set_xticklabels([-0.2, -0.1, 0.0, 0.1, 0.2, 0.3], fontsize=12)
    ax.set_yticks([-0.2, -0.1, 0.0, 0.1, 0.2, 0.3])
    ax.set_yticklabels([-0.2, -0.1, 0.0, 0.1, 0.2, 0.3], fontsize=12)

# PERMANOVA stats
axes[0].text(
    -0.2, 0.27,
    f"p={float(perma_res['Nares of Healthy vs. AD Children in Cape Town']['p']):.3f}, "
    f"F={float(perma_res['Nares of Healthy vs. AD Children in Cape Town']['f']):.2f}",
    fontsize=12
)
axes[1].text(
    -0.2, 0.27,
    f"p={float(perma_res['Nares of Healthy vs. AD Children in Umtata']['p']):.3f}, "
    f"F={float(perma_res['Nares of Healthy vs. AD Children in Umtata']['f']):.2f}",
    fontsize=12
)

# Final touches
fig.suptitle("Beta Diversity by Region", fontsize=20, y=0.92)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.savefig("../Plots/Analysis_figures/Diversity/16S_Beta_Diversity_RPCA_skin_by-location_split_nares.png", dpi=600)
