In [5]:
#base
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import warnings
import random
#keras
import keras.backend as K
import tensorflow as tf 
#sklearn
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler, RobustScaler, QuantileTransformer, PowerTransformer
from sklearn.model_selection import train_test_split

from sklearn.decomposition import PCA
from scipy.stats import pearsonr
from scipy.stats.mstats import gmean

In [3]:
warnings.filterwarnings('ignore')
sns.set_theme()

In [8]:
%cd ..

/home/MCB/zkarwowska/AE


In [9]:
df = pd.read_csv('rarefied_double_interpolated_feces_male_otu.csv', index_col = [0])
x_train = df.iloc[:220]
x_test = df.iloc[221:]

In [10]:
def filter_rare_bacteria(df):
    
    rare_bacteria_df = pd.DataFrame((df == 0).astype(int).sum(axis = 0))
    rare_bacteria_col = rare_bacteria_df[rare_bacteria_df[0] > 250].index
    df_filtered = df.drop(rare_bacteria_col, axis = 1)
    
    return df_filtered

In [11]:
filtered_df = filter_rare_bacteria(df)
#filtered_df.to_csv('improve_latent/filtered_data.csv')

### declare model

In [12]:
#original model
def ae_svar(normalized_train, normalized_test):
    
    l2_norm = 0
    h_neurons = 256
    l_neurons = 64
    input_size = normalized_train.shape[1]

    weights = tf.keras.Input(shape=(input_size,), name = 'weights_input')
    weights_svar = tf.keras.Input(shape=(input_size,), name = 'weights_svar')
    reconstruction_targets = tf.keras.Input(shape=(input_size,), name = 'reconstruction_targets')
    svar_targets = tf.keras.Input(shape=(input_size,), name = 'svar_targets')

    input_data_1 = tf.keras.Input(shape=(input_size,), name = 'input_data_1')
    input_data_2 = tf.keras.Input(shape=(input_size,), name = 'input_data_2')
    input_svar = tf.keras.Input(shape=(input_size,), name = 'input_svar')

    first_layer = tf.keras.layers.Dense(h_neurons, activation='relu', kernel_regularizer=tf.keras.regularizers.L2(l2_norm), name = 'first_layer')

    encoded_1 = first_layer(input_data_1)
    encoded_2 = first_layer(input_data_2)
    encoded_svar = first_layer(input_svar)

    #latent
    second_layer = tf.keras.layers.Dense(l_neurons, kernel_regularizer=tf.keras.regularizers.L2(l2_norm), name = 'second_layer')

    latent_1 = second_layer(encoded_1)
    latent_2 = second_layer(encoded_2)
    latent_svar = second_layer(encoded_svar)

    input_decoder = tf.keras.layers.Input(l_neurons, name = 'input_decoder')
    svar_layer_1 = tf.keras.layers.Dense(l_neurons, kernel_regularizer=tf.keras.regularizers.L1L2(l1=0.01, l2=0.01), name = 'svar_layer_1')
    latent_svar_train = svar_layer_1(latent_1)
    latent_svar_pred = svar_layer_1(latent_svar)
    latent_svar_encoded = svar_layer_1(latent_2)

    first_decoder = tf.keras.layers.Dense(h_neurons, activation='relu', kernel_regularizer=tf.keras.regularizers.L2(l2_norm), name = 'first_decoder')
    decoder_1 = first_decoder(latent_1)
    decoder_svar = first_decoder(latent_svar_train)
    decoder = first_decoder(input_decoder)
    decoder_svar_pred = first_decoder(latent_svar_pred)

    output1 = tf.keras.layers.Dense(input_size, activation='linear', kernel_regularizer=tf.keras.regularizers.L2(l2_norm), name = 'output1')(decoder_1)
    output_svar = tf.keras.layers.Dense(input_size, activation='linear', kernel_regularizer=tf.keras.regularizers.L2(l2_norm), name = 'output_svar')(decoder_svar)
    output = tf.keras.layers.Dense(input_size, activation='linear', name = 'output')(decoder)
    output_svar_pred = tf.keras.layers.Dense(input_size, activation='linear', kernel_regularizer=tf.keras.regularizers.L2(l2_norm), name = 'output_svar_pred')(decoder_svar_pred)


    # Compile model
    final_output1 = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.NONE)(output1,reconstruction_targets)
    final_output_svar = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.NONE)(output_svar, svar_targets)    
    final_output1 = tf.keras.layers.Multiply()([final_output1, weights])
    final_output_svar = tf.keras.layers.Multiply()([final_output_svar, weights_svar])  

    svar_autoencoder = tf.keras.Model([input_data_1, weights, weights_svar, reconstruction_targets, svar_targets], [final_output1, final_output_svar]) 

    decoder_model = tf.keras.Model(input_decoder, output)
    encoder_model = tf.keras.Model(input_data_2, [latent_2, latent_svar_encoded])


    svar_autoencoder.compile(
        loss = [tf.keras.losses.MeanAbsoluteError(),tf.keras.losses.MeanAbsoluteError()],
        loss_weights = [1.0, 60.0],
        optimizer='adam'
    )
    
    weights = (x_train!=0).values*2
    test_weights = np.ones((x_test.shape[0], x_test.shape[1]), dtype = int)


    ae_result = svar_autoencoder.fit(
        [normalized_train[:-1], weights[:-1], weights[1:], normalized_train[:-1], normalized_train[1:]],
        [np.zeros_like(normalized_train[:-1]), np.zeros_like(normalized_train[1:])],
        #[normalized_train[:-1], normalized_train[1:]],
        epochs=50,
        batch_size=16,
        shuffle=False,
        verbose = 0,
        validation_data=([normalized_test[:-1], test_weights[:-1], test_weights[1:], normalized_test[:-1], normalized_test[1:]],
        [normalized_test[:-1], normalized_test[1:]]),
    )
    
    return ae_result, encoder_model, decoder_model