In [5]:
from pathlib import Path
import pandas as pd
import numpy as np
import seaborn as sns
import filtre
import altair as alt


np.set_printoptions(precision=4, suppress=True)
pd.set_option("display.float_format", lambda x: "{:.3f}".format(x))
pd.set_option("display.max_columns", 10)
pd.set_option("display.max_rows", 20)
pd.set_option("display.min_rows", 10)

# (11.7, 8.27) = A4 landscape
sns.set_theme(style="dark", palette="muted", font_scale=1.10, rc={"figure.figsize": (16.54, 11.7)})


DATASET_FILENAME = Path("results/activities_2022-01-29_16-33-05.csv")
dataset = pd.read_csv(DATASET_FILENAME, index_col=[0, 1, 2], header=[0, 1, 2])

all_compounds = set(dataset.index.get_level_values(1))
all_activities = set(dataset.columns.get_level_values(1))
nb_acti = len(all_activities)
nb_comp = len(all_compounds)


sort_order = [True, True, False]
dataset.sort_index(axis=1, inplace=True, ascending=sort_order)
dataset.sort_index(axis=0, inplace=True, ascending=sort_order)


assert nb_comp * 2 == len(dataset.index)
assert nb_acti * 2 == len(dataset.columns)
dataset

filtre.keep_with_or_without(dataset, "w/")

Unnamed: 0_level_0,Unnamed: 1_level_0,abiotic,abiotic,abiotic,abiotic,abiotic,...,pharmaco,pharmaco,pharmaco,pharmaco,toxicity
Unnamed: 0_level_1,Unnamed: 1_level_1,antioxidant,drought,metal,salt,uv,...,obesity,rheumatism,sedative,wound,toxicity
alkaloid,acridine,266,2,257,163,80,...,3,0,8,77,402
alkaloid,benzylamine,71,0,165,80,23,...,34,0,6,19,102
alkaloid,colchicine,192,4,84,187,20,...,172,19,34,222,866
alkaloid,cyclopeptide,57,1,168,35,16,...,45,7,4,51,171
alkaloid,imidazole,1082,8,2507,1195,302,...,297,1,486,460,1768
...,...,...,...,...,...,...,...,...,...,...,...,...
terpenoid/terpene,polyterpene,0,0,0,0,0,...,0,0,0,0,2
terpenoid/terpene,sesquiterpene,863,17,72,59,39,...,49,0,30,124,341
terpenoid/terpene,sesterterpene,7,0,0,3,0,...,1,0,1,2,12
terpenoid/terpene,tetraterpene/carotenoid/xanthophyll,7587,345,592,513,392,...,418,1,3,97,828


In [6]:
testGraph = filtre.keep_with_or_without(dataset, "w/")

comp_class = list(testGraph.index.get_level_values(0))
comp = list(testGraph.index.get_level_values(1))

acti_class = list(testGraph.T.index.get_level_values(0))
acti = [list(testGraph.T.index.get_level_values(1))]*53

source = pd.DataFrame({
    "value": ([item for sublist in testGraph.values for item in sublist]),
    "acti": [f"{acti[i][y]}" for i in range(len(acti)) for y in range(len(acti[i]))],
    "com": [item for sublist in [comp]*33 for item in sublist],
})


source.to_json("vbar_data.json", orient="records")

source

Unnamed: 0,value,acti,com
0,266,antioxidant,acridine
1,2,drought,benzylamine
2,257,metal,colchicine
3,163,salt,cyclopeptide
4,80,uv,imidazole
...,...,...,...
1744,127,obesity,polyterpene
1745,2,rheumatism,sesquiterpene
1746,22,sedative,sesterterpene
1747,203,wound,tetraterpene/carotenoid/xanthophyll


In [8]:
min_value = 0
max_value = 1000

testGraph = filtre.keep_with_or_without(dataset, "w/")
comp = list(testGraph.index.get_level_values(1))

slider = alt.binding_range(min=min_value, max=max_value, step=1)
select_slider = alt.selection_single(name="slider", fields=['cutoff'],
                                   bind=slider, init={'cutoff': 0})


input_dropdown = alt.binding_select(options=list(comp), name='selection')
selection = alt.selection_single(name="selection", fields=['com'], bind=input_dropdown)

vbar_test = alt.Chart(source).mark_bar().encode(
    x='acti:N',
    # use :Q to force the column to be sort 
    y='value:Q',
).properties(
    height= 500
).add_selection(selection, select_slider).transform_filter(
    'selection.com == datum.com && datum.value >= slider.cutoff[0]'
)


vbar_test.save('json_chart/vbar.json')