In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import seaborn as sns
import filtre
import altair as alt


np.set_printoptions(precision=4, suppress=True)
pd.set_option("display.float_format", lambda x: "{:.3f}".format(x))
pd.set_option("display.max_columns", 10)
pd.set_option("display.max_rows", 20)
pd.set_option("display.min_rows", 10)

# (11.7, 8.27) = A4 landscape
sns.set_theme(style="dark", palette="muted", font_scale=1.10, rc={"figure.figsize": (16.54, 11.7)})


DATASET_FILENAME = Path("results/activities_2022-01-29_16-33-05.csv")
dataset = pd.read_csv(DATASET_FILENAME, index_col=[0, 1, 2], header=[0, 1, 2])

all_compounds = set(dataset.index.get_level_values(1))
all_activities = set(dataset.columns.get_level_values(1))
nb_acti = len(all_activities)
nb_comp = len(all_compounds)


sort_order = [True, True, False]
dataset.sort_index(axis=1, inplace=True, ascending=sort_order)
dataset.sort_index(axis=0, inplace=True, ascending=sort_order)


assert nb_comp * 2 == len(dataset.index)
assert nb_acti * 2 == len(dataset.columns)
dataset

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,abiotic,abiotic,abiotic,abiotic,abiotic,...,pharmaco,pharmaco,pharmaco,toxicity,toxicity
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,antioxidant,antioxidant,drought,drought,metal,...,sedative,wound,wound,toxicity,toxicity
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,w/o,w/,w/o,w/,w/o,...,w/,w/o,w/,w/o,w/
alkaloid,acridine,w/o,179092,62176,240191,1077,216324,...,2483,234561,6707,215604,25664
alkaloid,acridine,w/,2430,266,2694,2,2439,...,8,2619,77,2294,402
alkaloid,benzylamine,w/o,180754,62371,242046,1079,218089,...,2485,236360,6765,217161,25964
alkaloid,benzylamine,w/,768,71,839,0,674,...,6,820,19,737,102
alkaloid,colchicine,w/o,175968,62250,237143,1075,213101,...,2457,231656,6562,213018,25200
...,...,...,...,...,...,...,...,...,...,...,...,...,...
terpenoid/terpene,sesterterpene,w/,182,7,189,0,189,...,1,187,2,177,12
terpenoid/terpene,tetraterpene/carotenoid/xanthophyll,w/o,178534,54855,232655,734,208780,...,2488,226702,6687,208151,25238
terpenoid/terpene,tetraterpene/carotenoid/xanthophyll,w/,2988,7587,10230,345,9983,...,3,10478,97,9747,828
terpenoid/terpene,triterpene,w/o,177099,61285,237308,1076,213234,...,2469,231803,6581,212731,25653


In [2]:
testGraph = filtre.keep_with_or_without(dataset, "w/")
testGraph = filtre.by_row_name(testGraph, "alkaloid", "colchicine")
testGraph

abiotic   antioxidant    192
          drought          4
          metal           84
          salt           187
          uv              20
                        ... 
pharmaco  obesity        172
          rheumatism      19
          sedative        34
          wound          222
toxicity  toxicity       866
Name: (alkaloid, colchicine), Length: 33, dtype: int64

In [3]:
# get only the value of testGraph
# get only the name of row from testGraph



source = pd.DataFrame({
    'a': list(testGraph.index.get_level_values(1)),
    'b': list(testGraph.values),
    'test': list(testGraph.values)
})

source.b
source.a

0     antioxidant
1         drought
2           metal
3            salt
4              uv
         ...     
28        obesity
29     rheumatism
30       sedative
31          wound
32       toxicity
Name: a, Length: 33, dtype: object

In [4]:

alt.Chart(source).mark_bar().encode(
    x='a',
    y='b',
)

In [5]:
import altair as alt
from vega_datasets import data


slider = alt.binding_range(min=0, max=2000, step=10)
select_year = alt.selection_single(name="slider", fields=['cutoff'],
                                   bind=slider, init={'cutoff': 2000})

alt.Chart(source).mark_bar().encode(
    x='a',
    y='b',
    opacity=alt.condition(
        'datum.b < slider.cutoff[0]',
        alt.value(1), alt.value(0),
    )
).properties(
    width=700
).add_selection(
    select_year
).configure_facet(
    spacing=8
)

In [6]:

min_value = min(source.b)
max_value = max(source.b)

slider = alt.binding_range(min=min_value, max=max_value, step=10)
slider_test = alt.selection_single(name="slider", fields=['cutoff'],
                                   bind=slider, init={'cutoff': max_value})

slider2 = alt.binding_range(min=0, max=max_value, step=10)
slider_test2 = alt.selection_single(name="slider2", fields=['cutoff'],
                                   bind=slider, init={'cutoff': min_value})

vbar_test = alt.Chart(source).mark_bar().encode(
    x='a',
    y='b',
).properties(
    width=700
).add_selection(
).configure_facet(
    spacing=8
).add_selection(slider_test, slider_test2).transform_filter(
    'datum.b < slider.cutoff[0] &&'
    'datum.b > slider2.cutoff[0]'
)

vbar_test


In [7]:

data_w = filtre.keep_with_or_without(dataset, "w/")
data_w_o = filtre.keep_with_or_without(dataset, "w/o")
data_w = filtre.by_row_name(data_w, "alkaloid", "colchicine")
data_w_o = filtre.by_row_name(data_w_o, "alkaloid", "colchicine")

In [8]:
test = {}

keys = list(testGraph.index.get_level_values(1))
# add key to test
coms = list(testGraph.index.get_level_values(0))

test["key"] = keys
test["com"] = coms
test["value"] = list(data_w_o.values)
test["value2"] = list(data_w.values)
test = pd.DataFrame.from_dict(test)


input_dropdown = alt.binding_select(options=keys, name='Country')
selection = alt.selection_single(fields=['key'], bind=input_dropdown)


input_dropdown2 = alt.binding_select(options=list(set(coms)), name='Country')
selection2 = alt.selection_single(fields=['com'], bind=input_dropdown2)

color = alt.condition(selection,
                    alt.Color('key:N', legend=None),
                    alt.value('0'))
color2 = alt.condition(selection2,
                    alt.Color('com:N', legend=None),
                    alt.value('0'),
                    name="color")

point_test = alt.Chart(test).mark_point().encode(
    x='value',
    y='value2',
    color=color,
    tooltip=['com', 'key', 'value', 'value2']
    ).interactive(

    ).add_selection(
    selection,
    selection2
)

vbar_test  
point_test


In [9]:
test = {}

keys = list(testGraph.index.get_level_values(1))
# add key to test
coms = list(testGraph.index.get_level_values(0))

test["key"] = keys
test["com"] = coms
test["value"] = list(data_w_o.values)
test["value2"] = list(data_w.values)
test = pd.DataFrame.from_dict(test)






input_dropdown = alt.binding_select(options=list(set(coms)), name='selection')
selection = alt.selection_single(fields=['com'], bind=input_dropdown)

input_dropdown2 = alt.binding_select(options=keys, name='selection2')
selection2 = alt.selection_single(fields=['key'], bind=input_dropdown2)



color = alt.condition(selection | selection2,
                    alt.Color('key:N', legend=None) and alt.Color('com:N', legend=None),
                    alt.value('0'))


point_test = alt.Chart(test).mark_point().encode(
    x='value',
    y='value2',
    color=color,
    tooltip=['com', 'key', 'value', 'value2']
    ).interactive(

    ).add_selection(
    selection2,
    selection
)



In [10]:
vbar_test 

In [11]:
point_test

In [12]:
testGraph = filtre.keep_with_or_without(dataset, "w/")


source = pd.DataFrame({
    'acti': list(testGraph.index.get_level_values(1)),
    'com': list(testGraph.values),
    'com name': [list(testGraph.T.index.get_level_values(1))]*53,
    'com class': [list(testGraph.T.index.get_level_values(0))]*53,
    "value" : [10]*53,
})

source

Unnamed: 0,acti,com,com name,com class,value
0,acridine,"[266, 2, 257, 163, 80, 1, 0, 0, 11, 1, 0, 0, 8...","[antioxidant, drought, metal, salt, uv, antife...","[abiotic, abiotic, abiotic, abiotic, abiotic, ...",10
1,benzylamine,"[71, 0, 165, 80, 23, 0, 0, 0, 4, 6, 0, 0, 2, 0...","[antioxidant, drought, metal, salt, uv, antife...","[abiotic, abiotic, abiotic, abiotic, abiotic, ...",10
2,colchicine,"[192, 4, 84, 187, 20, 0, 0, 0, 47, 0, 0, 0, 9,...","[antioxidant, drought, metal, salt, uv, antife...","[abiotic, abiotic, abiotic, abiotic, abiotic, ...",10
3,cyclopeptide,"[57, 1, 168, 35, 16, 0, 0, 0, 18, 1, 0, 1, 13,...","[antioxidant, drought, metal, salt, uv, antife...","[abiotic, abiotic, abiotic, abiotic, abiotic, ...",10
4,imidazole,"[1082, 8, 2507, 1195, 302, 3, 0, 1, 53, 9, 0, ...","[antioxidant, drought, metal, salt, uv, antife...","[abiotic, abiotic, abiotic, abiotic, abiotic, ...",10
...,...,...,...,...,...
48,polyterpene,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[antioxidant, drought, metal, salt, uv, antife...","[abiotic, abiotic, abiotic, abiotic, abiotic, ...",10
49,sesquiterpene,"[863, 17, 72, 59, 39, 60, 5, 31, 130, 21, 0, 5...","[antioxidant, drought, metal, salt, uv, antife...","[abiotic, abiotic, abiotic, abiotic, abiotic, ...",10
50,sesterterpene,"[7, 0, 0, 3, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...","[antioxidant, drought, metal, salt, uv, antife...","[abiotic, abiotic, abiotic, abiotic, abiotic, ...",10
51,tetraterpene/carotenoid/xanthophyll,"[7587, 345, 592, 513, 392, 0, 0, 3, 309, 9, 4,...","[antioxidant, drought, metal, salt, uv, antife...","[abiotic, abiotic, abiotic, abiotic, abiotic, ...",10


In [13]:


min_value = 0
max_value = 5000

slider = alt.binding_range(min=0, max=10, step=0.1)
select_slider = alt.selection_single(name="slider", fields=['value'],
                                   bind=slider, init={'value': 5})

# slider2 = alt.binding_range(min=0, max=max_value, step=10)
# select_slider2 = alt.selection_single(name="slider2", fields=['value'],
#                                    bind=slider, init={'value': min_value})


# input_dropdown = alt.binding_select(options=list(source.acti), name='selection')
# selection = alt.selection_single(name="selection", fields=['com name'], bind=input_dropdown)

vbar_test = alt.Chart(source).mark_bar().encode(
    x='acti',
    y='value',
).properties(
    width=700
).interactive(

).configure_facet(
    spacing=80   
).add_selection(
    select_slider,
    #select_slider2, 
    #selection
).transform_filter(
    "true"
     #'selection.value > datum.value'
    #'datum.value > slider.cutoff[0]'
)

vbar_test

In [14]:
# get only the value of testGraph
# get only the name of row from testGraph

all_graph_data = []
testGraph = filtre.keep_with_or_without(dataset, "w/")

colum_class = list(testGraph.index.get_level_values(0))
colum_comp = list(testGraph.index.get_level_values(1))

rom_class = list(testGraph.T.index.get_level_values(0))
rom_activ = list(testGraph.T.index.get_level_values(1))

# get only the value of testGraph
# get only the name of row from testGraph




for i in range(len(colum_class)):
    temp_data = filtre.by_row_name(testGraph, colum_class[i], colum_comp[i])
    all_graph_data.append(
        pd.DataFrame({
            'a': rom_activ,
            'b': list(temp_data.values),
            }))
    



In [15]:
i = 0
for data in all_graph_data:
    min_value = min(data.b)
    max_value = max(data.b)
    print(max_value)

    slider = alt.binding_range(min=min_value, max=max_value, step=1)
    slider_test = alt.selection_single(name="slider", fields=['cutoff'],
                                    bind=slider, init={'cutoff': max_value})

    slider2 = alt.binding_range(min=0, max=max_value, step=1)
    slider_test2 = alt.selection_single(name="slider2", fields=['cutoff'],
                                    bind=slider, init={'cutoff': min_value})

    vbar_test = alt.Chart(data).mark_bar().encode(
        x='a',
        y='b',
    ).properties(
        width=700
    ).add_selection(
    ).configure_facet(
        spacing=1
    ).add_selection(slider_test, slider_test2).transform_filter(
        'datum.b < slider.cutoff[0] &&'
        'datum.b > slider2.cutoff[0]'
    )

    # save graph in html
    #vbar_test.save(f"test/{colum_comp[i]}.html")
    i+=1



1296


FileNotFoundError: [Errno 2] No such file or directory: 'test/acridine.html'