# Beta Diversity (RPCA)

In [15]:
# import Python packages
import pandas as pd
import numpy as np
from numpy import array
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.gridspec import GridSpec
import scipy
import scipy.stats as ss
from skbio.stats.distance import permanova
import biom
from biom import load_table
from biom.table import Table
from biom.util import biom_open
import h5py
from gemelli.rpca import rpca
from matplotlib.patches import Circle
from matplotlib.colors import to_hex
import statsmodels.api as sm
from skbio import DistanceMatrix
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri, r
from rpy2.robjects.conversion import localconverter


### See RPCA standalone python tutorial: 
# https://github.com/biocore/gemelli/blob/master/ipynb/tutorials/RPCA-moving-pictures-standalone-cli-and-api.ipynb

In [16]:
def load_biom_table_skin_only(biom_path, metadata_path):
    """
    Load BIOM table and metadata, remove non-skin samples (e.g., nares),
    sort by total abundance, and return both DataFrame and filtered metadata.
    """
    import pandas as pd
    import biom

    # Load BIOM table
    table = biom.load_table(biom_path)
    df = pd.DataFrame(
        table.matrix_data.toarray(),
        index=table.ids(axis='observation'),
        columns=table.ids(axis='sample')
    )

    # Sort rows by total abundance (descending)
    df['row_sum'] = df.sum(axis=1)
    df = df.sort_values(by='row_sum', ascending=False)
    df = df.drop(columns=['row_sum'])

    # Replace ' g__' rows with ' g__Unknown'
    df.index = df.index.map(lambda x: ' g__Unknown' if x.strip() == 'g__' or x.strip() == 'g' else x)

    # Remove '15564.' prefix from columns
    df.columns = df.columns.str.replace('15564.', '', regex=False)

    # Load metadata
    metadata = pd.read_csv(metadata_path, sep='\t')

    # Normalize sample IDs to match BIOM column names
    metadata['#sample-id'] = metadata['#sample-id'].str.replace('_', '', regex=False)
    metadata = metadata.set_index('#sample-id')

    # Add individual_case_location
    metadata['individual_case_location'] = metadata['case_type'] + ' ' + metadata['area']

    # Convert SCORAD to numeric
    metadata['o_scorad'] = pd.to_numeric(metadata['o_scorad'], errors='coerce')

    # Keep only skin samples
    skin_metadata = metadata[metadata['case_type'].str.contains('skin', na=False)]

    # Subset BIOM table to matching skin samples
    shared_samples = df.columns.intersection(skin_metadata.index)
    df_skin = df[shared_samples]
    skin_metadata = skin_metadata.loc[shared_samples]

    print(f"Skin-only BIOM table created with {len(shared_samples)} samples and {df_skin.shape[0]} features.")

    return df_skin, skin_metadata


biom_path = '../Data/Tables/Count_Tables/3_209766_feature_table_dedup_prev-filt-1pct.biom'
metadata_path = '../Metadata/16S_AD_South-Africa_metadata_subset.tsv'

df_skin, metadata_skin = load_biom_table_skin_only(biom_path, metadata_path)

# ==============================================
# ===== FILTER FOR AD (SCORAD 15-40) AND HEALTHY =====
# ==============================================
print(f"\nBefore filtering: {len(metadata_skin)} samples")
print(f"Case type distribution:\n{metadata_skin['case_type'].value_counts()}")

# Filter for:
# 1. AD cases with SCORAD between 15-40, OR
# 2. Control/healthy samples
metadata_filtered = metadata_skin[
    (
        (metadata_skin['case_type'].str.contains('case', case=False, na=False)) &
        (metadata_skin['o_scorad'].between(15, 40))
    ) |
    (metadata_skin['case_type'].str.contains('control', case=False, na=False))
].copy()

print(f"\nAfter filtering (AD SCORAD 15-40 + Healthy): {len(metadata_filtered)} samples")
print(f"Case type distribution:\n{metadata_filtered['case_type'].value_counts()}")

# Check SCORAD distribution for cases
print(f"\nSCORAD statistics for AD cases:")
case_mask = metadata_filtered['case_type'].str.contains('case', case=False, na=False)
print(metadata_filtered.loc[case_mask, 'o_scorad'].describe())

# Subset BIOM table to match filtered metadata
df_filtered = df_skin[metadata_filtered.index]

print(f"\nFiltered BIOM table: {df_filtered.shape[0]} features × {df_filtered.shape[1]} samples")

# Update variable names for downstream analysis
df_skin = df_filtered
metadata_skin = metadata_filtered

print("\n=== Final dataset ready for analysis ===")
print(f"Samples: {len(metadata_skin)}")
print(f"Features: {df_skin.shape[0]}") 

Skin-only BIOM table created with 282 samples and 868 features.

Before filtering: 282 samples
Case type distribution:
case_type
case-lesional skin          99
case-nonlesional skin       99
control-nonlesional skin    84
Name: count, dtype: int64

After filtering (AD SCORAD 15-40 + Healthy): 182 samples
Case type distribution:
case_type
control-nonlesional skin    84
case-lesional skin          49
case-nonlesional skin       49
Name: count, dtype: int64

SCORAD statistics for AD cases:
count    98.000000
mean     30.428571
std       6.166086
min      21.000000
25%      25.000000
50%      30.000000
75%      36.000000
max      40.000000
Name: o_scorad, dtype: float64

Filtered BIOM table: 868 features × 182 samples

=== Final dataset ready for analysis ===
Samples: 182
Features: 868


In [17]:
def save_df_as_biom(df, output_path):
    """
    Convert a pandas DataFrame (samples as rows, features as columns)
    into a BIOM table and save to disk.
    """
    print(f"Saving DataFrame as BIOM: {output_path}")
    print(f"Input shape: {df.shape} (samples x features)")

    # Ensure the correct orientation: rows = samples, columns = features
    obs_ids = df.index
    samp_ids = df.columns
    biom_obj = Table(df.values, observation_ids=obs_ids, sample_ids=samp_ids)

    # Save in HDF5 BIOM format
    with biom_open(output_path, 'w') as f:
        biom_obj.to_hdf5(f, "Generated from pandas DataFrame")

    print(f"BIOM table successfully saved to: {output_path}")


# Define output path
output_path = '../Data/Tables/Count_Tables/3_209766_feature_table_dedup_prev-filt-1pct_skin-only_subset.biom'

# Save DataFrame as BIOM
save_df_as_biom(df_skin, output_path)



Saving DataFrame as BIOM: ../Data/Tables/Count_Tables/3_209766_feature_table_dedup_prev-filt-1pct_skin-only_subset.biom
Input shape: (868, 182) (samples x features)
BIOM table successfully saved to: ../Data/Tables/Count_Tables/3_209766_feature_table_dedup_prev-filt-1pct_skin-only_subset.biom


In [18]:
# Reload the BIOM table
biom_table = load_table(output_path) # *skin-only.biom

# Perform RPCA on the BIOM Table (not DataFrame)
np.seterr(divide='ignore')
ordination, distance = rpca(biom_table)


# Save RPCA distance matrix
distance_df = pd.DataFrame(distance.data,
                           index=distance.ids,
                           columns=distance.ids)

# Remove index name
distance_df.index.name = None

# Save the distance matrix
distance_df.to_csv('../Data/Beta_Diversity/rpca_distance_matrix.tsv', sep='\t')

# Extract per-sample ordinations
spca_df = ordination.samples
print(f"RPCA ordination shape: {spca_df.shape}")

# Add metadata for plotting
spca_df = spca_df.join(metadata_skin['individual_case_location'])
spca_df.head()

RPCA ordination shape: (182, 3)


Unnamed: 0,PC1,PC2,PC3,individual_case_location
900344,-0.01465,-0.067463,0.039545,control-nonlesional skin Umtata
900221,0.071628,0.010093,0.147081,case-lesional skin Umtata
900570,0.08201,-0.09851,-0.036723,case-nonlesional skin Cape Town
900321,-0.014285,-0.01034,0.041141,control-nonlesional skin Umtata
900245,0.086925,-0.013606,0.038745,control-nonlesional skin Umtata


In [19]:
# extract and view feature ordinations from RPCA result
fpca_df = ordination.features
fpca_df.head()

Unnamed: 0,PC1,PC2,PC3
GTGCCAGCAGCCGCGGTAATACGTAGGTCCCGAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTAGATAAGTCTGAAGTTAAAGGCTG,-0.087769,-0.327797,0.083257
GTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTATCCGGAATTATTGGGCGTAAAGCGCGCGTAGGCGGTTTTTTAAGTCTGATGTGAAAGCCCA,0.035283,-0.174349,-0.558086
GTGCCAGCCGCCGCGGTAATACGTAGGTCCCGAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTTAGATAAGTCTGAAGTTAAAGGCTG,-0.102951,-0.285263,0.0941
GTGCCAGCCGCCGCGGTAATACGTAGGTGGCAAGCGTTATCCGGAATTATTGGGCGTAAAGCGCGCGTAGGCGGTTTTTTAAGTCTGATGTGAAAGCCCA,0.009097,-0.125812,-0.512744
GTGCCAGCAGCCGCGGTAATACGGAGGGTGCGAGCGTTAATCGGAATAACTGGGCGTAAAGGGCACGCAGGCGGTTATTTAAGTGAGGTGTGAAAGCCCC,-0.09095,-0.151078,0.122585


In [20]:
def permanova_on_case_type_subset(df, dist_matrix, case_type_subset):
    """
    Perform PERMANOVA on a subset of the data.
    
    Parameters:
    - df: DataFrame with metadata, must include the grouping variable
    - dist_matrix: DistanceMatrix object from scikit-bio
    - case_type_subset: list of case_type groups to include in the test
    
    Returns:
    - PERMANOVA result (dict-like with p-value, test statistic, etc.)
    """
    # Subset the DataFrame
    subset_df = df[df['individual_case_location'].isin(case_type_subset)]
    print(subset_df)
    group_counts = subset_df['individual_case_location'].value_counts()
    print("Group counts:", group_counts)

    # Get the matching IDs and subset distance matrix
    ids = subset_df.index
    sub_dm = dist_matrix.filter(ids, strict=False)
    
    # Run PERMANOVA
    result = permanova(sub_dm, grouping=subset_df['individual_case_location'], permutations=999)
    
    return result

In [21]:
# Set the color palette for the groups in the correct order
palette = {
    'control-nonlesional skin Cape Town': '#7FBCEB',
    'control-nonlesional skin Umtata': '#66C2EE',
    'case-nonlesional skin Cape Town': '#FAD5A5',
    'case-nonlesional skin Umtata': '#FAD5A5',
    'case-lesional skin Cape Town': '#cd853f',
    'case-lesional skin Umtata': '#fa8072'
}

In [22]:
def permanova_on_case_type_subset(df, dist_matrix, case_type_subset):
    """
    Perform PERMANOVA on a subset of the data.
    
    Parameters:
    - df: DataFrame with metadata, must include the grouping variable
    - dist_matrix: DistanceMatrix object from scikit-bio
    - case_type_subset: list of case_type groups to include in the test
    
    Returns:
    - PERMANOVA result (dict-like with p-value, test statistic, etc.)
    """
    # Subset the DataFrame
    subset_df = df[df['individual_case_location'].isin(case_type_subset)]
    print(subset_df)
    group_counts = subset_df['individual_case_location'].value_counts()
    print("Group counts:", group_counts)

    # Get the matching IDs and subset distance matrix
    ids = subset_df.index
    sub_dm = dist_matrix.filter(ids, strict=False)
    
    # Run PERMANOVA
    result = permanova(sub_dm, grouping=subset_df['individual_case_location'], permutations=999)
    
    return result

In [23]:
# Define all pairwise combinations for both locations
case_type_subsets = [
    # Cape Town
    ["control-nonlesional skin Cape Town", "case-nonlesional skin Cape Town"],
    ["control-nonlesional skin Cape Town", "case-lesional skin Cape Town"],
    ["case-nonlesional skin Cape Town", "case-lesional skin Cape Town"],
    # Umtata
    ["control-nonlesional skin Umtata", "case-nonlesional skin Umtata"],
    ["control-nonlesional skin Umtata", "case-lesional skin Umtata"],
    ["case-nonlesional skin Umtata", "case-lesional skin Umtata"]
]

perma_res = {}

for case_type_subset in case_type_subsets:
    print("Subset case_type:", case_type_subset)

    result = permanova_on_case_type_subset(spca_df, distance, case_type_subset)

    # Build descriptive key for results dictionary
    area = "Cape Town" if "Cape Town" in case_type_subset[0] else "Umtata"
    g1, g2 = [c.replace(f" {area}", "") for c in case_type_subset]
    label = f"PERMANOVA {g1} vs. {g2} ({area})"

    # Extract F and p values
    f_val = result["test statistic"]
    p_val = result["p-value"]

    # Store formatted results
    perma_res[label] = {
        "p": f"{p_val:.2e}",
        "f": f"{f_val:.2f}"
    }

# Display results
for k, v in perma_res.items():
    print(f"{k}: F = {v['f']}, p = {v['p']}")


Subset case_type: ['control-nonlesional skin Cape Town', 'case-nonlesional skin Cape Town']
                PC1       PC2       PC3            individual_case_location
900570     0.082010 -0.098510 -0.036723     case-nonlesional skin Cape Town
900145     0.107163  0.124847 -0.035255  control-nonlesional skin Cape Town
900544     0.073279 -0.025068 -0.102265  control-nonlesional skin Cape Town
900600    -0.009069  0.096791  0.027176  control-nonlesional skin Cape Town
900596    -0.018431  0.087119 -0.143788  control-nonlesional skin Cape Town
900085     0.003394 -0.015436  0.042003     case-nonlesional skin Cape Town
900139    -0.042749  0.063169  0.019771  control-nonlesional skin Cape Town
900111     0.073847  0.004133 -0.032691     case-nonlesional skin Cape Town
900120     0.017555 -0.001397 -0.023798     case-nonlesional skin Cape Town
900074    -0.067960 -0.022744  0.090514     case-nonlesional skin Cape Town
900137    -0.080143  0.012079 -0.196547  control-nonlesional skin Cape T

In [24]:
# Set seed for reproducibility
np.random.seed(42)

# Identify Healthy samples for each region
um_h_samples = spca_df.loc[
    spca_df["individual_case_location"] == "control-nonlesional skin Umtata"
].index
ct_h_samples = spca_df.loc[
    spca_df["individual_case_location"] == "control-nonlesional skin Cape Town"
].index

# Subsample Healthy samples
um_h_subsample = np.random.choice(um_h_samples, size=26, replace=False)
ct_h_subsample = np.random.choice(ct_h_samples, size=23, replace=False)

# Create a mask to keep only those
keep_samples = spca_df.index[
    spca_df["individual_case_location"].isin([
        "case-lesional skin Umtata", "case-nonlesional skin Umtata",
        "case-lesional skin Cape Town", "case-nonlesional skin Cape Town"
    ])
].union(um_h_subsample).union(ct_h_subsample)

# Subset the full spca_df to keep only those samples
spca_df = spca_df.loc[keep_samples]

print(f"Umtata Healthy subsampled: {len(um_h_subsample)} samples")
print(f"Cape Town Healthy subsampled: {len(ct_h_subsample)} samples")
print(f"Final total samples: {spca_df.shape[0]}")


Umtata Healthy subsampled: 26 samples
Cape Town Healthy subsampled: 23 samples
Final total samples: 147


In [25]:
# Set the color palette for the groups in the correct order
palette = {
    'control-nonlesional skin Cape Town': '#7FBCEB',
    'control-nonlesional skin Umtata': '#66C2EE',
    'case-nonlesional skin Cape Town': '#FAD5A5',
    'case-nonlesional skin Umtata': '#FAD5A5',
    'case-lesional skin Cape Town': '#cd853f',
    'case-lesional skin Umtata': '#fa8072'
}

In [26]:
print("Umtata H count:", sum(spca_df["individual_case_location"] == "control-nonlesional skin Umtata"))
print("Cape Town H count:", sum(spca_df["individual_case_location"] == "control-nonlesional skin Cape Town"))
print(spca_df["individual_case_location"].value_counts())


Umtata H count: 26
Cape Town H count: 23
individual_case_location
case-lesional skin Umtata             26
case-nonlesional skin Umtata          26
control-nonlesional skin Umtata       26
case-lesional skin Cape Town          23
case-nonlesional skin Cape Town       23
control-nonlesional skin Cape Town    23
Name: count, dtype: int64


In [27]:
# Reload metadata for global access
metadata = pd.read_csv(metadata_path, sep='\t')
metadata['#sample-id'] = metadata['#sample-id'].str.replace('_', '', regex=False)
metadata = metadata.set_index('#sample-id')

# Map area column to spca_df
spca_df = spca_df.join(metadata[['area']], how='left')

# Map age_months column to spca_df
spca_df = spca_df.join(metadata[['age_months']], how='left')

# Map enrolment_season column to spca_df
spca_df = spca_df.join(metadata[['enrolment_season']], how='left')

# Map case_type column to spca_df
spca_df = spca_df.join(metadata[['case_type']], how='left')

# Define confounders identified from previous analysis
confounders_to_adjust = ['age_months', 'enrolment_season']

# ==============================================
# ===== PREPARE LABELS AND COLORS =====
# ==============================================
group_short_labels = {
    "case-lesional skin Umtata": "UM-ADL",
    "case-lesional skin Cape Town": "CT-ADL",
    "case-nonlesional skin Umtata": "UM-ADNL",
    "case-nonlesional skin Cape Town": "CT-ADNL",
    "control-nonlesional skin Umtata": "UM-H",
    "control-nonlesional skin Cape Town": "CT-H"
}

# Add short label column
spca_df["short_label"] = spca_df["individual_case_location"].map(group_short_labels)

# Create label map with (n=#)
label_map = spca_df["short_label"].value_counts().to_dict()
label_map = {k: f"{k} (n={v})" for k, v in label_map.items()}
spca_df["group_label"] = spca_df["short_label"].map(label_map)

# Define PC variance
pc1_var = ordination.proportion_explained['PC1'] * 100
pc2_var = ordination.proportion_explained['PC2'] * 100

# Define short names for display
short_names = {
    "control-nonlesional skin": "H",
    "case-nonlesional skin": "ADNL",
    "case-lesional skin": "ADL"
}

# Define color palette
palette = {
    "control-nonlesional skin Cape Town": "#7FBCEB",
    "case-nonlesional skin Cape Town": "#FAD5A5",
    "case-lesional skin Cape Town": "#C9A34F",
    "control-nonlesional skin Umtata": "#66C2EE",
    "case-nonlesional skin Umtata": "#FAD5A5",
    "case-lesional skin Umtata": "#F0806B"
}

def format_p(p):
    """Format p-values in plain decimal form with a lower bound."""
    if p < 0.001:
        return "<0.001"
    else:
        return f"{p:.3f}"

In [28]:
def downsample_per_group(df, group_col, n_per_group, random_state=42):
    """Randomly downsample each group to n_per_group samples (without replacement)."""
    return (
        df.groupby(group_col, group_keys=False)
          .apply(lambda x: x.sample(n=min(len(x), n_per_group), random_state=random_state))
    )


# ==============================================
# ===== SUPPLEMENTARY FIGURE: CAPE TOWN (ADJUSTED) =====
# ==============================================
ct_full_groups = [
    "control-nonlesional skin Cape Town",
    "case-lesional skin Cape Town",
    "case-nonlesional skin Cape Town"
]
ct_subset = spca_df[spca_df["individual_case_location"].isin(ct_full_groups)].copy()

# Downsample each group to 23 samples
ct_subset = downsample_per_group(ct_subset, "individual_case_location", n_per_group=23)

fig_ct, ax_ct = plt.subplots(figsize=(7, 6))
sns.scatterplot(
    data=ct_subset,
    x="PC1", y="PC2",
    hue="group_label",
    hue_order=[label_map[group_short_labels[g]] for g in ct_full_groups],
    s=50, edgecolor="black", linewidth=0.5,
    palette={label_map[group_short_labels[g]]: palette[g] for g in ct_full_groups},
    ax=ax_ct
)

# Confidence ellipses
for group in ct_full_groups:
    df = ct_subset[ct_subset["individual_case_location"] == group]
    color = palette[group]
    pts = df[["PC1", "PC2"]].values
    if len(pts) == 0:
        continue
    center = pts.mean(axis=0)
    radius = np.percentile(np.linalg.norm(pts - center, axis=1), 90)
    ax_ct.add_patch(Circle(center, radius, edgecolor=color, facecolor=color, alpha=0.2, lw=1, zorder=0))

# Axis + style
ax_ct.set_xlabel(f"RPCA PC1 ({pc1_var:.1f}%)", fontsize=16)
ax_ct.set_ylabel(f"RPCA PC2 ({pc2_var:.1f}%)", fontsize=16)
ax_ct.tick_params(labelsize=12)
ax_ct.legend(frameon=False, fontsize=12, loc='upper right')
ax_ct.set_title("Cape Town)", fontsize=18)

# ==== Adjusted PERMANOVA annotations ====
y_text = 0.1
line_height = 0.05
i = 0
order = ["H vs. ADL", "H vs. ADNL", "ADNL vs. ADL"]

for comparison in order:
    for label, stats in perma_res.items():
        if "Cape Town" in label:
            parts = label.replace("PERMANOVA ", "").replace(" (Cape Town)", "").split(" vs. ")
            short_label = f"{short_names[parts[0]]} vs. {short_names[parts[1]]}"
            if short_label == comparison:
                p_val = stats['p']
                if isinstance(p_val, str) and p_val.startswith('<'):
                    p_disp = p_val
                else:
                    p_disp = f"{float(p_val):.3f}"
                ax_ct.text(
                    0.78, y_text - i * line_height,
                    f"{short_label}: F={float(stats['f']):.1f}, p={p_disp}",
                    fontsize=10,
                    ha='center', transform=ax_ct.transAxes
                )
                i += 0.75

plt.tight_layout()
fig_ct.savefig("../Figures/Main/Fig_4C.jpg", dpi=600, bbox_inches='tight')
plt.show()


# ==============================================
# ===== SUPPLEMENTARY FIGURE: UMTATA (ADJUSTED) =====
# ==============================================
um_full_groups = [
    "control-nonlesional skin Umtata",
    "case-lesional skin Umtata",
    "case-nonlesional skin Umtata"
]
um_subset = spca_df[spca_df["individual_case_location"].isin(um_full_groups)].copy()

# Downsample each group to 26 samples
um_subset = downsample_per_group(um_subset, "individual_case_location", n_per_group=26)

fig_um, ax_um = plt.subplots(figsize=(7, 6))
sns.scatterplot(
    data=um_subset,
    x="PC1", y="PC2",
    hue="group_label",
    hue_order=[label_map[group_short_labels[g]] for g in um_full_groups],
    s=50, edgecolor="black", linewidth=0.5,
    palette={label_map[group_short_labels[g]]: palette[g] for g in um_full_groups},
    ax=ax_um
)

# Confidence ellipses
for group in um_full_groups:
    df = um_subset[um_subset["individual_case_location"] == group]
    color = palette[group]
    pts = df[["PC1", "PC2"]].values
    if len(pts) == 0:
        continue
    center = pts.mean(axis=0)
    radius = np.percentile(np.linalg.norm(pts - center, axis=1), 90)
    ax_um.add_patch(Circle(center, radius, edgecolor=color, facecolor=color, alpha=0.2, lw=1, zorder=0))

# Axis + style
ax_um.set_xlabel(f"RPCA PC1 ({pc1_var:.1f}%)", fontsize=16)
ax_um.set_ylabel(f"RPCA PC2 ({pc2_var:.1f}%)", fontsize=16)
ax_um.tick_params(labelsize=12)
ax_um.legend(frameon=False, fontsize=12, loc='upper right')
ax_um.set_title("Umtata (rural", fontsize=18)

# ==== Adjusted PERMANOVA annotations ====
y_text = 0.1
line_height = 0.05
i = 0
order = ["H vs. ADL", "H vs. ADNL", "ADNL vs. ADL"]

for comparison in order:
    for label, stats in perma_res.items():
        if "Umtata" in label:
            parts = label.replace("PERMANOVA ", "").replace(" (Umtata)", "").split(" vs. ")
            short_label = f"{short_names[parts[0]]} vs. {short_names[parts[1]]}"
            if short_label == comparison:
                p_val = stats['p']
                if isinstance(p_val, str) and p_val.startswith('<'):
                    p_disp = p_val
                else:
                    p_disp = f"{float(p_val):.3f}"
                ax_um.text(
                    0.78, y_text - i * line_height,
                    f"{short_label}: F={float(stats['f']):.1f}, p={p_disp}",
                    fontsize=10,
                    ha='center', transform=ax_um.transAxes
                )
                i += 0.75

plt.tight_layout()
fig_um.savefig("../Figures/Main/Fig_4B.jpg", dpi=600, bbox_inches='tight')
plt.show()

print("✓ Downsampled adjusted PERMANOVA RPCA plots saved.")


  .apply(lambda x: x.sample(n=min(len(x), n_per_group), random_state=random_state))
  plt.show()
  .apply(lambda x: x.sample(n=min(len(x), n_per_group), random_state=random_state))


✓ Downsampled adjusted PERMANOVA RPCA plots saved.


  plt.show()
