In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import biom
from biom import load_table
import numpy as np
from scipy.stats import spearmanr
from scipy.stats import mannwhitneyu
from statsmodels.stats.multitest import multipletests
import itertools
import matplotlib.patches
from scipy import stats
from scipy.stats import pearsonr



In [2]:
# Load the metadata
metadata_path = '../Data/Metadata/updated_clean_ant_skin_metadata.tab'
metadata = pd.read_csv(metadata_path, sep='\t')

metadata['#sample-id'] = metadata['#sample-id'].str.replace('_', '')
# Set Sample-ID as the index for the metadata dataframe 
metadata = metadata.set_index('#sample-id')


# Create group column based on case_type to simplify group names
metadata['group'] = metadata['case_type'].map({
    'case-lesional skin': 'skin-ADL',
    'case-nonlesional skin': 'skin-ADNL', 
    'control-nonlesional skin': 'skin-H',
    'case-anterior nares': 'nares-AD',
    'control-anterior nares': 'nares-H'
})

metadata

Unnamed: 0_level_0,PlateNumber,PlateLocation,i5,i5Sequence,i7,i7Sequence,identifier,Sequence,Plate ID,Well location,...,sex,enrolment_date,enrolment_season,hiv_exposure,hiv_status,household_size,o_scorad,FWD_filepath,REV_filepath,group
#sample-id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Ca009STL,1,A1,SA501,ATCGTACG,SA701,CGAGAGTT,SA701SA501,CGAGAGTT-ATCGTACG,1.010000e+21,A1,...,male,4/16/2015,Autumn,Unexposed,negative,4.0,40,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,skin-ADL
900221,1,B1,SA502,ACTATCTG,SA701,CGAGAGTT,SA701SA502,CGAGAGTT-ACTATCTG,1.010000e+21,B1,...,female,8/11/2015,Winter,Unexposed,negative,7.0,34,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,skin-ADL
Ca010EBL,1,C1,SA503,TAGCGAGT,SA701,CGAGAGTT,SA701SA503,CGAGAGTT-TAGCGAGT,1.010000e+21,C1,...,female,11/20/2014,Spring,Unexposed,negative,7.0,21,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,skin-ADL
900460,1,D1,SA504,CTGCGTGT,SA701,CGAGAGTT,SA701SA504,CGAGAGTT-CTGCGTGT,1.010000e+21,D1,...,female,9/23/2015,Spring,Unexposed,,4.0,40,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,skin-ADL
900051,1,E1,SA505,TCATCGAG,SA701,CGAGAGTT,SA701SA505,CGAGAGTT-TCATCGAG,1.010000e+21,E1,...,male,4/21/2015,Autumn,Unexposed,negative,7.0,41,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,skin-ADL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Ca006ONL2,6,H1,SA508,GACACCGT,SB701,CTCGACTT,SB701SA508,CTCGACTT-GACACCGT,1.010000e+21,H1,...,female,3/25/2015,Autumn,Unexposed,negative,3.0,34,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,skin-ADL
Ca006ONNL,6,F2,SA506,CGTGAGTG,SB702,CGAAGTAT,SB702SA506,CGAAGTAT-CGTGAGTG,1.010000e+21,F2,...,female,3/25/2015,Autumn,Unexposed,negative,3.0,34,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,skin-ADNL
Ca006ONNL2,6,H2,SA508,GACACCGT,SB702,CGAAGTAT,SB702SA508,CGAAGTAT-GACACCGT,1.010000e+21,H2,...,female,3/25/2015,Autumn,Unexposed,negative,3.0,34,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,skin-ADNL
Ca006ONPN,6,F3,SA506,CGTGAGTG,SB703,TAGCAGCT,SB703SA506,TAGCAGCT-CGTGAGTG,1.010000e+21,F3,...,female,3/25/2015,Autumn,Unexposed,negative,3.0,34,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,/Users/yac027/Gallo_lab/16S_AD_Dube_Dupont/ato...,nares-AD


In [3]:
# Read in table at ASV level
biom_path = '../Data/Tables/Absolute_Abundance_Tables/feature_table_with_tax_labels_Genus.biom'
# biom_path = '../Data/Tables/Absolute_Abundance_Tables/209766_filtered_by_prevalence_1pct_rare_Genus-ASV-non-collapse.biom'
biom_tbl = load_table(biom_path)
df = pd.DataFrame(biom_tbl.to_dataframe().T)

# delete the prefix from the index
df.index = df.index.str.replace('15564.', '')

# Get samples that don't start with 'skin' in metadata
skin_samples = metadata[metadata['group'].str.startswith('skin')].index

# Filter df to keep only skin samples
df_skin = df.loc[skin_samples]

# Map the 'pid' column from metadata to df_skin samples
df_skin = df_skin.assign(pid=metadata.loc[df_skin.index, 'pid'])

# Group by 'pid' and calculate mean for all numeric columns
df_skin = df_skin.groupby('pid').mean()


df_skin

Unnamed: 0_level_0,g__Streptococcus_ASV-1,g__Streptococcus_ASV-2,g__Corynebacterium_ASV-1,g__Corynebacterium_ASV-3,g___ASV-3,g__Bacillus_P_294101_ASV-2,g__Cutibacterium_ASV-1,g___ASV-18,g___ASV-28,g___ASV-25,...,g__Blautia_A_141781_ASV-7,g___ASV-358,g__Peptoniphilus_A_ASV-7,g___ASV-154,g___ASV-169,g__UBA952_ASV-1,g__Petroclostridium_ASV-1,g__Capnocytophaga_820690_ASV-4,g___ASV-202,g__Streptococcus_ASV-37
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Ca-0006-KM,3.0,1.0,0,0,0,0,2.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Ca-006-ON,6.5,0.5,0,0,0,0,7.75,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Ca-007-NK,0,0,0,0,0,0,0,3.0,0,0,...,0,0,0,0,0,0,0,0,0,0
Ca-008-AF,0,0,0,0,0,0,1.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Ca-008-HN,3.5,0,0,0,0,0,0.5,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Co-149-LM,3.0,1.0,0,0,0,0,2.0,1.0,0,0,...,0,0,0,0,0,0,0,0,0,0
Co-150-LN,0,0,0,0,0,0,7.0,1.0,1.0,0,...,0,0,0,0,0,0,0,0,0,0
Co-151-AN,1.0,0,0,0,0,0,4.0,5.0,0,0,...,0,0,0,0,0,0,0,0,0,0
Co-152-AF,2.0,1.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
# Read in table at ASV level
biom_path = '../Data/Tables/Absolute_Abundance_Tables/feature_table_with_tax_labels_Genus.biom'
# biom_path = '../Data/Tables/Absolute_Abundance_Tables/209766_filtered_by_prevalence_1pct_rare_Genus-ASV-non-collapse.biom'
biom_tbl = load_table(biom_path)
df = pd.DataFrame(biom_tbl.to_dataframe().T)

# delete the prefix from the index
df.index = df.index.str.replace('15564.', '')

# Get samples that don't start with 'skin' in metadata
nares_samples = metadata[metadata['group'].str.startswith('nares')].index

# Filter df to keep only skin samples
df_nares = df.loc[nares_samples]

# Map the 'pid' column from metadata to df_nares samples
df_nares = df_nares.assign(pid=metadata.loc[df_nares.index, 'pid'])

# Group by 'pid' and calculate mean for all numeric columns
df_nares = df_nares.groupby('pid').mean()

df_nares

Unnamed: 0_level_0,g__Streptococcus_ASV-1,g__Streptococcus_ASV-2,g__Corynebacterium_ASV-1,g__Corynebacterium_ASV-3,g___ASV-3,g__Bacillus_P_294101_ASV-2,g__Cutibacterium_ASV-1,g___ASV-18,g___ASV-28,g___ASV-25,...,g__Blautia_A_141781_ASV-7,g___ASV-358,g__Peptoniphilus_A_ASV-7,g___ASV-154,g___ASV-169,g__UBA952_ASV-1,g__Petroclostridium_ASV-1,g__Capnocytophaga_820690_ASV-4,g___ASV-202,g__Streptococcus_ASV-37
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Ca-006-ON,0,0,41.5,0,3.0,0,6.5,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Ca-007-NK,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Ca-008-HN,262.0,76.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Ca-009-ST,51.0,0,191.0,0,0,0,0,2.0,0,0,...,0,0,0,0,0,0,0,0,0,0
Ca-009-ZN,160.0,0,36.0,0,0,0,1.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Co-149-LM,129.0,115.0,17.0,10.0,0,0,0,0,0,2.0,...,0,0,0,0,0,0,0,0,0,0
Co-150-LN,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Co-151-AN,178.0,0,5.0,1.0,17.0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Co-152-AF,172.5,75.5,29.0,1.5,0,0,0,0,0,1.5,...,0,0,0,0,0,0,0,0,0,0


In [5]:
# Get indexes that appear in both dataframes
common_indexes = df_skin.index.intersection(df_nares.index)

# Filter both dataframes to only include common indexes
df_skin_common = df_skin.loc[common_indexes]
df_nares_common = df_nares.loc[common_indexes]

# Add the values from both dataframes
df_combined = df_skin_common + df_nares_common

df_combined


Unnamed: 0_level_0,g__Streptococcus_ASV-1,g__Streptococcus_ASV-2,g__Corynebacterium_ASV-1,g__Corynebacterium_ASV-3,g___ASV-3,g__Bacillus_P_294101_ASV-2,g__Cutibacterium_ASV-1,g___ASV-18,g___ASV-28,g___ASV-25,...,g__Blautia_A_141781_ASV-7,g___ASV-358,g__Peptoniphilus_A_ASV-7,g___ASV-154,g___ASV-169,g__UBA952_ASV-1,g__Petroclostridium_ASV-1,g__Capnocytophaga_820690_ASV-4,g___ASV-202,g__Streptococcus_ASV-37
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Ca-006-ON,6.5,0.5,41.5,0.0,3.0,0.0,14.25,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ca-007-NK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ca-008-HN,265.5,76.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ca-009-ST,58.5,0.0,191.0,0.0,0.0,0.0,6.5,2.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ca-009-ZN,167.0,0.0,36.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Co-149-LM,132.0,116.0,17.0,10.0,0.0,0.0,2.0,1.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Co-150-LN,0.0,0.0,0.0,0.0,0.0,0.0,7.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Co-151-AN,179.0,0.0,5.0,1.0,17.0,0.0,4.0,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Co-152-AF,174.5,76.5,29.0,1.5,0.0,0.0,0.0,0.0,0.0,1.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
def rclr_transform(df, pseudocount=1e-6):
    """
    Applies Robust Centered Log-Ratio (RCLR) transformation to a DataFrame.
    Zeros are ignored in the geometric mean calculation per sample.
    """
    # Convert sparse to dense if needed
    df = df.sparse.to_dense()

    # Replace 0 with np.nan to ignore in log and mean
    df_masked = df.replace(0, np.nan)

    # Apply log (with pseudocount only where needed)
    log_df = np.log(df_masked + pseudocount)

    # Subtract mean of each row (feature)
    rclr_df = log_df.sub(log_df.mean(axis=1, skipna=True), axis=0)

    return rclr_df



df_combined = rclr_transform(df_combined)
df_combined

Unnamed: 0_level_0,g__Streptococcus_ASV-1,g__Streptococcus_ASV-2,g__Corynebacterium_ASV-1,g__Corynebacterium_ASV-3,g___ASV-3,g__Bacillus_P_294101_ASV-2,g__Cutibacterium_ASV-1,g___ASV-18,g___ASV-28,g___ASV-25,...,g__Blautia_A_141781_ASV-7,g___ASV-358,g__Peptoniphilus_A_ASV-7,g___ASV-154,g___ASV-169,g__UBA952_ASV-1,g__Petroclostridium_ASV-1,g__Capnocytophaga_820690_ASV-4,g___ASV-202,g__Streptococcus_ASV-37
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Ca-006-ON,1.461292,-1.103655,3.315183,,0.688103,,2.246247,,,,...,,,,,,,,,,
Ca-007-NK,,,,,,,,-0.647995,,,...,,,,,,,,,,
Ca-008-HN,4.655161,3.404280,,,,,-1.619599,,,,...,,,,,,,,,,
Ca-009-ST,2.837385,,4.020631,,,,0.640160,-0.538494,,,...,,,,,,,,,,
Ca-009-ZN,4.034382,,2.499907,,,,0.015000,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Co-149-LM,3.876483,3.747272,1.826895,1.296266,,,-0.313171,-1.006318,,-0.313171,...,,,,,,,,,,
Co-150-LN,,,,,,,0.797662,-1.148247,-1.148247,,...,,,,,,,,,,
Co-151-AN,3.593336,,0.015389,-1.594048,1.239164,,-0.207755,0.015389,,,...,,,,,,,,,,
Co-152-AF,4.279805,3.455171,2.485176,-0.476654,,,,,,-0.476654,...,,,,,,,,,,


In [7]:
# Map o_scorad from metadata to df_combined based on pid
df_combined = df_combined.assign(o_scorad=metadata.groupby('pid')['o_scorad'].first().loc[df_combined.index])

# Remove rows where o_scorad is not a valid numeric value (None or NaN)
df_combined = df_combined.dropna(subset=['o_scorad'])

df_combined

Unnamed: 0_level_0,g__Streptococcus_ASV-1,g__Streptococcus_ASV-2,g__Corynebacterium_ASV-1,g__Corynebacterium_ASV-3,g___ASV-3,g__Bacillus_P_294101_ASV-2,g__Cutibacterium_ASV-1,g___ASV-18,g___ASV-28,g___ASV-25,...,g___ASV-358,g__Peptoniphilus_A_ASV-7,g___ASV-154,g___ASV-169,g__UBA952_ASV-1,g__Petroclostridium_ASV-1,g__Capnocytophaga_820690_ASV-4,g___ASV-202,g__Streptococcus_ASV-37,o_scorad
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Ca-006-ON,1.461292,-1.103655,3.315183,,0.688103,,2.246247,,,,...,,,,,,,,,,34
Ca-007-NK,,,,,,,,-0.647995,,,...,,,,,,,,,,22
Ca-008-HN,4.655161,3.404280,,,,,-1.619599,,,,...,,,,,,,,,,30
Ca-009-ST,2.837385,,4.020631,,,,0.640160,-0.538494,,,...,,,,,,,,,,40
Ca-010-EB,3.736792,-1.591082,3.260947,,3.508783,,1.544411,,,,...,,,,,,,,,,21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Ca-158-LC,3.681608,1.412925,4.072185,,3.555788,,0.314313,,,,...,,,,,,,,,,54
Ca-159-AS,4.699391,4.121313,,,-0.025781,,1.806800,,,,...,,,,,,,,,,32
Ca-160-LB,3.924813,3.812696,-0.670306,-1.363453,1.469760,,,,0.582457,,...,,,,,,,,,,10
Ca-161-OM,3.952776,0.369258,2.017916,,,,-0.141568,,,,...,,,,,,,,,,21


In [8]:
taxa_list = ['g__Streptococcus_ASV-1', 'g__Streptococcus_ASV-2', 'g__Staphylococcus_ASV-1', 'g__Micrococcus_ASV-1', 'g__Veillonella_A_ASV-1']
# Create a figure with 5 subplots
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

# For each taxon, create a scatter plot with regression line
for i, taxon in enumerate(taxa_list):
    print(i, taxon)
    # Get data
    x = df_combined['o_scorad']
    print(x)
    y = df_combined[taxon]
    print(y)
    
    # Calculate correlation coefficient and p-value
    # mask = ~np.isnan(x) & ~np.isnan(y)
    r, p = stats.pearsonr(x, y)
    
    # Create scatter plot
    sns.regplot(data=df_combined, x='o_scorad', y=taxon, ax=axes[i])
    
    # Add correlation coefficient and p-value to plot
    axes[i].text(0.05, 0.95, f'r = {r:.2f}\np = {p:.3f}', 
                 transform=axes[i].transAxes,
                 verticalalignment='top')
    
    # Clean up axis labels
    axes[i].set_xlabel('SCORAD')
    axes[i].set_ylabel('RCLR Abundance')
    axes[i].set_title(taxon.split('__')[1].replace('_', ' '))

# Remove the empty subplot
axes[-1].remove()

# Adjust layout
plt.tight_layout()
plt.savefig('combined_skin-nares_severity.png', dpi=600)



0 g__Streptococcus_ASV-1
pid
Ca-006-ON    34
Ca-007-NK    22
Ca-008-HN    30
Ca-009-ST    40
Ca-010-EB    21
             ..
Ca-158-LC    54
Ca-159-AS    32
Ca-160-LB    10
Ca-161-OM    21
Co-109-AG    43
Name: o_scorad, Length: 101, dtype: object
pid
Ca-006-ON    1.461292
Ca-007-NK         NaN
Ca-008-HN    4.655161
Ca-009-ST    2.837385
Ca-010-EB    3.736792
               ...   
Ca-158-LC    3.681608
Ca-159-AS    4.699391
Ca-160-LB    3.924813
Ca-161-OM    3.952776
Co-109-AG    3.233835
Name: g__Streptococcus_ASV-1, Length: 101, dtype: float64


TypeError: unsupported operand type(s) for +: 'float' and 'str'