In [7]:
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import numpy as np
import pandas as pd
from scipy.spatial.distance import braycurtis

In [8]:
metadata_df = pd.read_csv("train_metadata.csv", index_col="sample")
data_df = pd.read_csv("train_data.csv",  index_col="sample")
metadata_df["collection_date"] = (pd.to_datetime(metadata_df['collection_date']) - pd.Timestamp('1970-01-01')).dt.days

In [13]:
microbiom_with_sample_data = data_df.join(metadata_df[["baboon_id", "collection_date"]], how="inner")
microbiom_with_sample_data = microbiom_with_sample_data.sort_values(["baboon_id", "collection_date"])
ls = []
days =[]
label = []
for idx in range(len(microbiom_with_sample_data)-1):
    curr = microbiom_with_sample_data.iloc[[idx]].values.reshape(-1)
    next = microbiom_with_sample_data.iloc[[idx+1]].values.reshape(-1)
    if(curr[-2] == next[-2]):    
        ls.append(braycurtis(curr[:-2],next[:-2]))
        label.append(curr[-2])
        days.append(next[-1] - curr[-1])
ls = np.array(ls)

days = np.array(days)
ls = np.array(ls)

In [14]:

app = Dash(__name__)


app.layout = html.Div([
    html.H4('Interactive normal distribution'),
    dcc.Graph(id="graph"),
    html.P("Min days:"),
    dcc.Slider(id="min days", min=int(min(days)), max=int(max(days)), value=int((days.mean()-min(days)/2)), 
               marks={int(min(days)): f'{min(days)}', int(max(days)): f'{max(days)}'}),
    html.P("Max days:"),
    dcc.Slider(id="max days", min=int(min(days)), max=int(max(days)), value=int((days.mean()+max(days))/2), 
               marks={int(min(days)): f'{min(days)}', int(max(days)): f'{max(days)}'}),
])


@app.callback(
    Output("graph", "figure"), 
    Input("min days", "value"), 
    Input("max days", "value"))
def display_color(min, max):
    data = ls[(days >= min) & (days <= max)]
    fig = px.histogram(data)
    fig.update_layout(title=f"Bray Curtis Distance Distribution min days: {min} max days: {max}")
    return fig
app.run_server(debug=True)