In [1]:
%matplotlib inline
import qiime2 as q2

from biom import Table

import pandas as pd, numpy as np, seaborn as sns
import qiime2.plugins.taxa as taxa
import qiime2.plugins.feature_table as FT

In [2]:
cd trimmed-150nts/

/Users/yoshikivazquezbaeza/Documents/PDF/KnightLaboratory/HastyWater/trimmed-150nts


In [3]:
mf = q2.Metadata.load('mapping-file.alpha.tsv').to_dataframe()
table = q2.Artifact.load('table-deblur.qza').view(pd.DataFrame)

In [4]:
normalization = q2.Artifact.load('feature-table.acinetos.normalization-features.qza').view(pd.DataFrame)
discardable = q2.Artifact.load('feature-table.acinetos.discardable-features.qza').view(pd.DataFrame)

Filter the table to only include the normalization features, make sure to remove the discardable features.

In [5]:
table['spike-collapsed'] = normalization.sum(axis=1)
table.drop(discardable.columns, axis=1, inplace=True)
table.drop(normalization.columns, axis=1, inplace=True)

In [6]:
def normalize_by_sequence(row, reference=None):
    ref = row[reference].sum()
    return ((row * 10000) / ref).astype(np.int)

norm = table.apply(normalize_by_sequence, axis=1, reference='spike-collapsed')

After the normalization, the reference sequence is uniform through all the samples, hence we should see 10,000 return from the following operation.

In [7]:
norm['spike-collapsed'].sum() / len(norm)

10000.0

Remove the spike altogether.

In [8]:
norm.drop(['spike-collapsed'], axis=1, inplace=True)

Save as a new artifact and rarefy the table.

In [9]:
normalized_feature_table = q2.Artifact.import_data('FeatureTable[Frequency]', norm)

In [10]:
normalized_feature_table.save('feature-table.normalized.qza')

'feature-table.normalized.qza'

In [11]:
normalized_feature_table.view(pd.DataFrame).sum(axis=1).sort_values(ascending=False)

11282.d0          25806886.0
11282.d1.2          106908.0
11282.d0.spike       98998.0
11282.d1.1           86985.0
11282.d1.3           78540.0
11282.d2.5           69746.0
11282.d4.6           67380.0
11282.d3.6           64047.0
11282.d1.6           61873.0
11282.d1.8           58792.0
11282.d4.2           58435.0
11282.d2.8           58289.0
11282.d4.8           56487.0
11282.d2.9           56436.0
11282.d2.6           55528.0
11282.d3.8           53972.0
11282.d4.3           53510.0
11282.d1.9           52338.0
11282.d3.5           50527.0
11282.d3.1           50346.0
11282.d2.1           50313.0
11282.d1.5           50141.0
11282.d3.9           49549.0
11282.d3.2           49069.0
11282.d1.7           48721.0
11282.d4.9           48565.0
11282.d2.18          47863.0
11282.d2.27          46846.0
11282.d2.13          46594.0
11282.d3.3           46474.0
                     ...    
11282.d2.26          31781.0
11282.d2.14          31720.0
11282.d3.12          31623.0
11282.d3.21   

In [12]:
res = FT.methods.rarefy(normalized_feature_table, 18813)

In [13]:
res.rarefied_table.save('feature-table.even.18813.normalized.qza')

'feature-table.even.18813.normalized.qza'