In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import seaborn as sns
import ipywidgets
from IPython.display import display
from collections import defaultdict

np.set_printoptions(precision=4, suppress=True)
pd.set_option("display.float_format", lambda x: "{:.3f}".format(x))
pd.set_option("display.max_columns", 10)
pd.set_option("display.max_rows", 20)
pd.set_option("display.min_rows", 10)

# (11.7, 8.27) = A4 landscape
sns.set_theme(style="dark", palette="muted", font_scale=1.10, rc={"figure.figsize": (16.54, 11.7)})

SELECTORS = ["w/o", "w/"]  # ordered as bools
MARGIN_SYMB = "Σ"
CLASS_SYMB = "*"
MARGIN_IDX = (CLASS_SYMB, MARGIN_SYMB, SELECTORS[1])
SORT_ORDER = [True, True, False]

# DATASET_FILENAME = Path("results/activities_2022-01-29_16-33-05.csv")
DATASET_FILENAME = Path("results/pharmaco_chemistry_2022-03-17_16-43-57.csv")
dataset_with_margin = pd.read_csv(DATASET_FILENAME, index_col=[0, 1, 2], header=[0, 1, 2])
dataset_with_margin.sort_index(axis=1, inplace=True, ascending=SORT_ORDER)
dataset_with_margin.sort_index(axis=0, inplace=True, ascending=SORT_ORDER)

N = dataset_with_margin.loc[MARGIN_IDX,MARGIN_IDX]

all_comp_but_margin = pd.Series(idx for idx in dataset_with_margin.index if idx != MARGIN_IDX)
all_acti_but_margin = pd.Series(idx for idx in dataset_with_margin.columns if idx != MARGIN_IDX)

comp_margin = dataset_with_margin.loc[all_comp_but_margin, MARGIN_IDX]
acti_margin = dataset_with_margin.loc[MARGIN_IDX, all_acti_but_margin]



dataset = dataset_with_margin.loc[all_comp_but_margin, all_acti_but_margin]

display(dataset)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,abiotic,abiotic,abiotic,abiotic,abiotic,...,pharmaco,pharmaco,pharmaco,toxicity,toxicity
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,antioxidant,antioxidant,drought,drought,metal,...,sedative,wound,wound,toxicity,toxicity
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,w/o,w/,w/o,w/,w/o,...,w/,w/o,w/,w/o,w/
alkaloid,acridine,w/o,181187,63047,243137,1097,218950,...,2494,237393,6841,218279,25955
alkaloid,acridine,w/,2452,271,2721,2,2464,...,8,2644,79,2313,410
alkaloid,benzylamine,w/o,182861,63247,245009,1099,220732,...,2496,239207,6901,219845,26263
alkaloid,benzylamine,w/,778,71,849,0,682,...,6,830,19,747,102
alkaloid,colchicine,w/o,178008,63121,240034,1095,215672,...,2468,234432,6697,215632,25497
...,...,...,...,...,...,...,...,...,...,...,...,...,...
terpenoid/terpene,sesterterpene,w/,185,7,192,0,192,...,1,190,2,180,12
terpenoid/terpene,tetraterpene/carotenoid/xanthophyll,w/o,180613,55637,235503,747,211304,...,2499,229428,6822,210723,25527
terpenoid/terpene,tetraterpene/carotenoid/xanthophyll,w/,3026,7681,10355,352,10110,...,3,10609,98,9869,838
terpenoid/terpene,triterpene,w/o,179131,62141,240176,1096,215781,...,2480,234558,6714,215333,25939


In [2]:
display(acti_margin)
display(comp_margin)
display(N)

Boilerplate : juste de la manipulation de dictionnaire/MultiIndex

In [3]:
# # toutes les classes chimique, le 1er niveau de l'index
comp_class = sorted(set(dataset.index.get_level_values(0)))
# # dictionnaire qui à une molécule fait correspondre sa classe
# get_comp_class = {k: c for c, k in dataset.index.droplevel(2)}
# # le dictionnaire inverse du précédent : la liste des molécule d'une classe
get_comps = defaultdict(set)
for c, k in dataset.index.droplevel(2):
    get_comps[c].add(k)
#  le cas particulier la classe "tous"
# get_comps[ALL] = set(dataset.index.get_level_values(1))
get_comps[CLASS_SYMB] = set()
# on passe en listes une bonne fois pour toutes
get_comps = {k: list(l) for k, l in get_comps.items()}

# # idem pour les activités
acti_class = sorted(set(dataset.columns.get_level_values(0)))
# get_acti_class = {k: c for c, k in dataset.columns.droplevel(2)}
get_actis = defaultdict(set)
for c, k in dataset.columns.droplevel(2):
    get_actis[c].add(k)
# get_actis[ALL] = set(dataset.columns.get_level_values(1))
get_actis[CLASS_SYMB] = set()
get_actis = {k: list(l) for k, l in get_actis.items()}


# get_comps


In [4]:
def to_slice(x_class=CLASS_SYMB, x_name=CLASS_SYMB):
    """Turns a pair (class, name) into a slice. Must be consistent in the sens that if comp is set, comp_class should be its class."""
    if x_class == CLASS_SYMB:
        if x_name != CLASS_SYMB:
            raise ValueError(f"x_name ({x_name}) should be a refinement of x_class wich is {CLASS_SYMB}")
        return slice(None, None)
    elif x_name == CLASS_SYMB:
        return slice((x_class,), (x_class,))
    else:
        return slice((x_class, x_name), (x_class, x_name))


In [5]:
# avec loc et les slices on accède facilement à des ensemble de lignes/colonnes hiérarchisés
dataset.loc[to_slice("polyketide")]
# dataset.loc[to_slice(ALL, "ansamycin")]
comp_margin.xs(SELECTORS[1], level=2)


In [6]:
# OK
# ipywidgets.interact(
#     lambda comp_class: display(dataset.loc[comp_class]),
#     comp_class=ipywidgets.Dropdown(
#         options=comp_class,
#         value="polyketide",
#         description="Class",
#     ),
# )

comp_class_widget = ipywidgets.Dropdown(
    options=[CLASS_SYMB] + comp_class,
    value=CLASS_SYMB,
    description="C-Class",
)

comp_widget = ipywidgets.Dropdown(
    options=[CLASS_SYMB] + get_comps[comp_class_widget.value],
    value=CLASS_SYMB,
    description="Compound",
)

acti_class_widget = ipywidgets.Dropdown(
    options=[CLASS_SYMB] + acti_class,
    value=CLASS_SYMB,
    description="A-Class",
)

acti_widget = ipywidgets.Dropdown(
    options=[CLASS_SYMB] + get_actis[acti_class_widget.value],
    value=CLASS_SYMB,
    description="Activity",
)

comp_margin_w = comp_margin.xs(SELECTORS[1], level=2)
comp_threshold_widget = ipywidgets.IntSlider(min=comp_margin_w.min(), max=comp_margin_w.max(), step=10)
acti_margin_w = acti_margin.xs(SELECTORS[1], level=2)
acti_threshold_widget = ipywidgets.IntSlider(min=acti_margin_w.min(), max=acti_margin_w.max(), step=10)


def update_compounds(*args):
    comp_widget.options = [CLASS_SYMB] + get_comps[comp_class_widget.value]


comp_class_widget.observe(update_compounds, "value")


def update_actis(*args):
    acti_widget.options = [CLASS_SYMB] + get_actis[acti_class_widget.value]


acti_class_widget.observe(update_actis, "value")


def filter_display(comp_class, comp, acti_class, acti, comp_threshold, acti_threshold):
    s_c = to_slice(comp_class, comp)
    s_a = to_slice(acti_class, acti)
    df = dataset.loc[s_c, s_a]
    comp_filter = comp_margin.xs(SELECTORS[1], level=2) >= comp_threshold
    acti_filter = acti_margin.xs(SELECTORS[1], level=2) >= acti_threshold
    display(df.loc[comp_filter,acti_filter])


ipywidgets.interact(
    filter_display,
    comp_class=comp_class_widget,
    comp=comp_widget,
    acti_class=acti_class_widget,
    acti=acti_widget,
    comp_threshold=comp_threshold_widget,
    acti_threshold=acti_threshold_widget,
)


interactive(children=(Dropdown(description='C-Class', options=('*', 'alkaloid', 'phenolic compound', 'polyketi…