In [None]:
import pandas as pd

#qiime2
import qiime2
from qiime2 import Artifact, Metadata
from qiime2.plugins.feature_table.methods import rarefy
from qiime2.plugins.feature_table.visualizers import summarize

#CLR transformation
from skbio.stats.composition import clr
from scipy.stats import gmean

import warnings

In [None]:
warnings.filterwarnings('ignore')

In [None]:
#read data - male
raw_data = pd.read_csv('raw_male_feces.csv')

### rarefaction

In [None]:
#transform data ti biom to use it in qiime2
!biom convert -i raw_male_feces.csv -o raw_male_feces.biom --table-type="OTU table" --to-hdf5
#import to qiime2 format
unrarefied_table = Artifact.import_data("FeatureTable[Frequency]", 'raw_male_feces.biom')

In [None]:
#visualize table to select suitable cutoff
SUMMARY = summarize(unrarefied_table)
SUMMARY.visualization

In [None]:
#rarefy table
rarefy_result = rarefy(table=unrarefied_table, sampling_depth=26500) #16000
rarefied_table = rarefy_result.rarefied_table
#convert to csv
rarefied_table_df = rarefied_table.view(pd.DataFrame)

In [None]:
rarefied_table_df.to_csv('male_rarefied_df.csv', sep = '\t')

### clr transformation

In [None]:
raw_data = unrarefied_table.view(pd.DataFrame)

In [None]:
df_pseudocount = raw_data + 1e-5
clr_df = df_pseudocount.apply(clr)
clr_df[clr_df < 0] = 0

In [None]:
clr_df.to_csv('male_clr_df.csv', sep = '\t')

### rclr transformation

In [None]:
def rclr(x):
    
    dropna_x = [i for i in x if i != 0]
    g = gmean(dropna_x)
    
    return np.log(x/g)

rclr_df = raw_data.apply(rclr, axis = 1)
rclr_df[rclr_df < 0] = 0

In [None]:
rclr_df.to_csv('male_rclr_df.csv', sep = '\t')