# Visualization of FEAST source tracking outputs

In [20]:
import pandas as pd
import re
import os
import seaborn as sns
import matplotlib.pyplot as plt
import networkx as nx


In [21]:
# File metadata: filenames, skin group, and direction of transmission
file_metadata = {
    "../Analyses/FEAST/1_case-lesional_top21_nares-source.tsv": ("case-lesional", "nares→skin"),
    "../Analyses/FEAST/2_case-lesional_top21_skin-source.tsv": ("case-lesional", "skin→nares"),
    "../Analyses/FEAST/3_case-nonlesional_top21_nares-source.tsv": ("case-nonlesional", "nares→skin"),
    "../Analyses/FEAST/4_case-nonlesional_top21_skin-source.tsv": ("case-nonlesional", "skin→nares"),
    "../Analyses/FEAST/5_healthy_top21_nares-source.tsv": ("healthy", "nares→skin"),
    "../Analyses/FEAST/6_healthy_top21_skin-source.tsv": ("healthy", "skin→nares"),
}

In [22]:
# Function to extract subject ID using regex
def extract_subject_id(name):
    match = re.search(r'Ca-\d{3}-[A-Z]{2}', name)
    return match.group(0) if match else None

# Initialize list to hold all subject-matched contributions
all_matches = []

# Loop through files and process
for filename, (skin_group, direction) in file_metadata.items():
    print(f"Processing {filename}...")
    feast_output = pd.read_csv(filename, sep="\t", index_col=0)
    
    # Extract subject IDs
    sink_subject_ids = feast_output.index.to_series().apply(extract_subject_id)
    source_subject_ids = pd.Series(feast_output.columns).apply(extract_subject_id)
    source_subject_ids.index = feast_output.columns

    # Find intra-subject contributions
    for sink, sink_subj in sink_subject_ids.items():
        if sink_subj is None:
            continue
        for source in feast_output.columns:
            if source_subject_ids[source] == sink_subj:
                contribution = feast_output.at[sink, source]
                all_matches.append({
                    "Sink": sink,
                    "Source": source,
                    "Subject_ID": sink_subj,
                    "Contribution": contribution,
                    "Direction": direction,
                    "Skin_Group": skin_group
                })

# Convert to DataFrame
summary_df = pd.DataFrame(all_matches)

# Filter for strong contributions (≥10%)
strong_matches_df = summary_df[summary_df["Contribution"] >= 0.1].sort_values(by="Contribution", ascending=False)

# Save both full and filtered versions
summary_df.to_csv("../Analyses/FEAST/all_matched_contributions.tsv", sep="\t", index=False)
strong_matches_df.to_csv("../Analyses/FEAST/strong_matched_contributions.tsv", sep="\t", index=False)

print("✅ Done! Saved:")
print("- all_matched_contributions.tsv")
print("- strong_matched_contributions.tsv")

Processing ../Analyses/FEAST/1_case-lesional_top21_nares-source.tsv...
Processing ../Analyses/FEAST/2_case-lesional_top21_skin-source.tsv...
Processing ../Analyses/FEAST/3_case-nonlesional_top21_nares-source.tsv...
Processing ../Analyses/FEAST/4_case-nonlesional_top21_skin-source.tsv...
Processing ../Analyses/FEAST/5_healthy_top21_nares-source.tsv...
Processing ../Analyses/FEAST/6_healthy_top21_skin-source.tsv...
✅ Done! Saved:
- all_matched_contributions.tsv
- strong_matched_contributions.tsv


In [61]:
# Load the data
df = pd.read_csv("../Analyses/FEAST/strong_matched_contributions.tsv", sep="\t")

# Count matches by group and direction
count_df = df.groupby(["Skin_Group", "Direction"]).size().reset_index(name="Count")

# Ensure all combinations are represented (even 0)
all_combos = pd.MultiIndex.from_product([
    ["case-lesional", "case-nonlesional", "healthy"],
    ["nares→skin", "skin→nares"]
], names=["Skin_Group", "Direction"])
count_df = count_df.set_index(["Skin_Group", "Direction"]).reindex(all_combos, fill_value=0).reset_index()

# Define custom colors for direction
direction_palette = {
    "nares→skin": "#ffd17e",   # Yellow
    "skin→nares": "#947fff"    # Red
}

# Set style and figure size
sns.set(style="whitegrid", context="talk")
plt.figure(figsize=(6, 6))

# Create barplot
ax = sns.barplot(
    data=count_df,
    x="Skin_Group",
    y="Count",
    hue="Direction",
    palette=direction_palette
)

# Manually update x-axis tick labels to L, NL, H
ax.set_xticklabels(["AD-L", "AD-NL", "H"], fontsize=18)

# Customize bar width
bar_width = 0.25
for bar in ax.patches:
    current_width = bar.get_width()
    diff = current_width - bar_width
    bar.set_width(bar_width)
    bar.set_x(bar.get_x() + diff / 2)

# Titles and labels
plt.suptitle("SourceTracking Matched Skin↔Nares", fontsize=20, y=0.96, x=0.55)
plt.title("Bacterial Transfer Within Individual Child", fontsize=16)
plt.ylabel("# of Subject-Matched Contributions (≥10%)", fontsize=16)
plt.xlabel("")
plt.ylim(0, count_df["Count"].max() + 1)
plt.legend(title="Direction (source→sink)", fontsize=14, title_fontsize=14)

# Save figure
plt.tight_layout()
plt.savefig("../Plots/Analysis_figures/FEAST/within_subject_transfer_summary.png", dpi=300)
print("✅ Plot saved as 'within_subject_transfer_summary.png'")


✅ Plot saved as 'within_subject_transfer_summary.png'
