In [85]:
'''
Realtive abundance plots
Gets the data from the mesocosm experiment and plot the relative abundance of all species as a stackplot
and the relative abundance of a specific taxon in a time course
'''

'\nRealtive abundance plots\nGets the data from the mesocosm experiment and plot the relative abundance of all species as a stackplot\nand the relative abundance of a specific taxon in a time course\n'

In [86]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib qt

In [87]:
def find_indices(list_to_check, item_to_find):
    array = np.array(list_to_check)
    indices = np.where(array == item_to_find)[0]
    return list(indices)

In [88]:
def plot_relative_abundance(abundance, blue_dots, red_dots):
    fig, ax = plt.subplots(figsize=(5,2), dpi = 300)
    y = abundance.sort_index()
    x = list(y.index)
    x = [float(n) for n in x]
    
    markers_on=list(np.where(np.isin(x,blue_dots)))
    markers_on2=list(np.where(np.isin(x,red_dots)))
    ax = plt.plot(x,list(y),
                   '-bo',  c='#D5BB67', mfc='blue', mec='k'
         #         ,color = '#D5BB67'
                  ,markevery=markers_on
                 )
    plt.plot(x,list(y),
               '-bo',  linestyle='',c='#D5BB67', mfc='red', mec='k'
     #         ,color = '#D5BB67'
              ,markevery=markers_on2)
    plt.xlabel("Time (days)")
    plt.ylabel("Relative abundance")
   # plt.ylim(0, 0.0)
    plt.xlim(0,max(x))

In [89]:
def get_relative_abundance(abundance,taxonomy, taxa_dict):
    #fig, ax = plt.subplots(figsize=(5,2), dpi = 300)
    list_y = []
    df = pd.DataFrame(index = abundance.index, columns = taxa_dict.keys())
    for key,value in taxa_dict.items():
        asv_data = list(taxonomy[taxonomy[value] ==key].index)
        df[key] = abundance[asv_data].sum(axis = 1).sort_index()
    return df

In [90]:
dict_taxa = {'Bacillariophyta': 'Class',
            'Labyrinthulea': 'Class',
            'Katablepharidaceae': 'Class',
            'Prymnesiophyceae': 'Class',
            'Dinoflagellata': 'Division',
            'Cercozoa': 'Division',
            'Chrysophyceae': 'Class',
            'MAST': 'Class',
            'Ciliophora': 'Division'}


In [91]:
dict_colors = {'Bacillariophyta': '#8C613C',
                'Labyrinthulea':'black',
                'Katablepharidaceae': '#D5BB67',
                'Prymnesiophyceae': '#DC7EC0',
                'Dinoflagellata': '#EE854A',
                'Cercozoa': '#6ACC64',
                'Chrysophyceae': '#4878D0',
                'MAST': '#D65F5F',
                'Ciliophora': '#82C6E2',
                'Other eukaryotes': 'grey'}

In [92]:
def get_relative_abundances(ps_core_sample,ps_core_otu,ps_core_taxonomy,bag,taxa):
    bag4_dict = ps_core_sample[ps_core_sample['Bag'] == bag]['TimePoint'].to_dict()
    ps_core_otu_b4 = ps_core_otu.loc[bag4_dict.keys()]

    # exclude the metazoans from the analysis
    list_metazoa = list(ps_core_taxonomy[ps_core_taxonomy['Division'] =='Metazoa'].index)
    ps_core_otu_nometazoa = ps_core_otu_b4.drop(list_metazoa, axis = 1)
    relative_abundance_nometazoa = ps_core_otu_nometazoa.div(ps_core_otu_nometazoa.sum(axis=1), axis=0)
    relative_abundance_nometazoa.index = relative_abundance_nometazoa.index.map(bag4_dict)

    relative_all = get_relative_abundance(relative_abundance_nometazoa,ps_core_taxonomy, dict_taxa)
    
    relative_all['Other eukaryotes'] = 1-relative_all.sum(axis = 1)
    relative_all = relative_all.sort_index()
    relative_all = relative_all.iloc[:, ::-1]
    # get the asv of the only the Katablepharidaceae
    asv_taxa = list(ps_core_taxonomy[ps_core_taxonomy['Class'] == taxa].index)
    
    return relative_all, asv_taxa

In [93]:
def plot_all(relative_all):
    fig, ax = plt.subplots(figsize=(5,2), dpi = 300)

    # Define your time course with irregular time points and starting point
    time_course = relative_all.index.tolist()


    # Define your data as a list of lists, where each inner list corresponds to the frequency of a variable at each time point
    data = relative_all.T.values.tolist()

    # Create a stacked bar plot

    for n in range(len(data)):
        i = relative_all.columns[n]

        bottom = np.sum(data[:n], axis=0)
        ax.bar(time_course, list(relative_all[i]), color=dict_colors[i], bottom=bottom,width=0.4)

    # Set the axis labels and title

    # Set the x-axis limits and ticks
    ax.set_xlim([-0.3, 24])
    ax.set_ylim([0, 1])

    ax.set_xticks([0,2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24])
    ax.set_xticklabels([0,2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24])

    ax.set_ylabel("Relative abundance", fontsize = 10)
    ax.set_xlabel("Time (days)", fontsize = 10)
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles[::-1], labels[::-1],
            #  loc = 'upper center',
       bbox_to_anchor=(0.028,1), 
               fontsize = 8,
                  ncol = 3,
              columnspacing = 0.5,
                   frameon=False,
                  handletextpad = 0.3,
                  labelspacing = 0.2)

    # Show the plot
    return
    plt.show()


In [94]:

def main():
    # path to mesocosm files
    path = '/Users/amirf/Dropbox (Weizmann Institute)/scGVDB/pipeline_files/'
    # name of OTU file 
    ps_core_otu = pd.read_table(path + "ps_core_otu_table.txt", sep = " ")
    # name of the table containing sample names and what day and bag they represent
    ps_core_sample = pd.read_table(path + "ps_core_sample_data.txt", sep = " ")
    # name of the table containing taxonomy data and their corresponding ASV number
    ps_core_taxonomy = pd.read_table(path + "ps_core_taxonomy_table.txt", sep = " ")
    
    bag = '4'
    taxa = 'Katablepharidaceae'
    
    relative_all, asv_taxa = get_relative_abundances(ps_core_sample,ps_core_otu,ps_core_taxonomy,
                                                     bag = bag,
                                                     taxa = taxa)
    
    plot_all(relative_all)
    
    # plots the relative abundance of one taxon. mark sampling time points (for single-cell)
    # as blue dots and an interesting time point in red
    sampling_tp = [13,15,19]
    interest_tp = [20]
    
    plot_relative_abundance(relative_all[taxa],sampling_tp,interest_tp)


In [95]:
if __name__ == "__main__":
    main()

  ax = plt.plot(x,list(y),
  plt.plot(x,list(y),
  plt.plot(x,list(y),
