Load the dataset: 

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import seaborn as sns
import filtre

np.set_printoptions(precision=4, suppress=True)
pd.set_option("display.float_format", lambda x: "{:.3f}".format(x))
pd.set_option("display.max_columns", 10)
pd.set_option("display.max_rows", 20)
pd.set_option("display.min_rows", 10)

# (11.7, 8.27) = A4 landscape
sns.set_theme(style="dark", palette="muted", font_scale=1.10, rc={"figure.figsize": (16.54, 11.7)})


DATASET_FILENAME = Path("results/pharmaco_chemistry_cross_2022-05-19_17-18-20.csv")
dataset = pd.read_csv(DATASET_FILENAME, index_col=[0, 1, 2], header=[0, 1, 2])

all_compounds = set(dataset.index.get_level_values(1))
all_activities = set(dataset.columns.get_level_values(1))

sort_order = [True, True, False]
dataset.sort_index(axis=1, inplace=True, ascending=sort_order)
dataset.sort_index(axis=0, inplace=True, ascending=sort_order)

dataset


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,*,abiotic,abiotic,abiotic,abiotic,...,pharmaco,pharmaco,pharmaco,toxicity,toxicity
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Σ,antioxidant,antioxidant,drought,drought,...,sedative,wound,wound,toxicity,toxicity
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,w/,w/o,w/,w/o,w/,...,w/,w/o,w/,w/o,w/
*,Σ,w/,250082,185802,64280,248944,1138,...,2511,243035,7047,223411,26671
alkaloid,acridine,w/o,247332,183327,64005,246196,1136,...,2503,240364,6968,221076,26256
alkaloid,acridine,w/,2750,2475,275,2748,2,...,8,2671,79,2335,415
alkaloid,benzylamine,w/o,249222,185015,64207,248084,1138,...,2505,242195,7027,222653,26569
alkaloid,benzylamine,w/,860,787,73,860,0,...,6,840,20,758,102
...,...,...,...,...,...,...,...,...,...,...,...,...,...
terpenoid/terpene,sesterterpene,w/,193,186,7,193,0,...,1,191,2,181,12
terpenoid/terpene,tetraterpene/carotenoid/xanthophyll,w/o,239238,182734,56504,238462,776,...,2507,232291,6947,213416,25822
terpenoid/terpene,tetraterpene/carotenoid/xanthophyll,w/,10844,3068,7776,10482,362,...,4,10744,100,9995,849
terpenoid/terpene,triterpene,w/o,244281,181206,63075,243146,1135,...,2489,237450,6831,218048,26233


Apliquer un filtre sur les colonnes et sur les lignes du dataset afin de retirer celles qui sont inférieures au seuil :

In [168]:
dataset = filtre.column(dataset, 100000)
dataset = filtre.row(dataset, 100000)
dataset


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,abiotic,abiotic,abiotic,abiotic,abiotic,...,pharmaco,pharmaco,pharmaco,toxicity,toxicity
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,antioxidant,antioxidant,metal,metal,salt,...,sedative,wound,wound,toxicity,toxicity
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,w/o,w/,w/o,w/,w/o,...,w/,w/o,w/,w/o,w/
alkaloid,colchicine,w/o,175968,62250,213101,25117,225006,...,2457,231656,6562,213018,25200
alkaloid,colchicine,w/,5554,192,5662,84,5559,...,34,5524,222,4880,866
alkaloid,imidazole,w/o,166078,61360,204744,22694,215234,...,2005,221114,6324,203140,24298
alkaloid,imidazole,w/,15444,1082,14019,2507,15331,...,486,16066,460,14758,1768
alkaloid,indole,w/o,169465,61118,206864,23719,217901,...,2412,224146,6437,205889,24694
...,...,...,...,...,...,...,...,...,...,...,...,...,...
terpenoid/terpene,sesquiterpene,w/,4547,863,5338,72,5351,...,30,5286,124,5069,341
terpenoid/terpene,tetraterpene/carotenoid/xanthophyll,w/o,178534,54855,208780,24609,220503,...,2488,226702,6687,208151,25238
terpenoid/terpene,tetraterpene/carotenoid/xanthophyll,w/,2988,7587,9983,592,10062,...,3,10478,97,9747,828
terpenoid/terpene,triterpene,w/o,177099,61285,213234,25150,225044,...,2469,231803,6581,212731,25653


In [169]:
dataset = filtre.KeepWithOrWithOut(dataset, "w/")
dataset


Unnamed: 0_level_0,Unnamed: 1_level_0,abiotic,abiotic,abiotic,abiotic,allelopathy,...,pharmaco,pharmaco,pharmaco,pharmaco,toxicity
Unnamed: 0_level_1,Unnamed: 1_level_1,antioxidant,metal,salt,uv,germination,...,cytotoxicity,obesity,sedative,wound,toxicity
alkaloid,colchicine,192,84,187,20,47,...,1257,172,34,222,866
alkaloid,imidazole,1082,2507,1195,302,53,...,2816,297,486,460,1768
alkaloid,indole,1324,1482,717,283,177,...,3683,341,79,347,1372
alkaloid,piperidine,577,292,280,66,7,...,1044,473,431,104,791
alkaloid,pyridine,1033,5330,1369,737,33,...,2941,262,811,393,1836
alkaloid,quinoline,560,1213,404,152,16,...,2005,175,54,613,1348
alkaloid,thiazole,863,511,4202,88,33,...,2729,292,106,165,1200
phenolic compound,flavonoids,25763,1764,617,1128,509,...,5403,758,120,911,3129
phenolic compound,phenol,27855,7791,1991,2614,782,...,5076,668,89,828,6648
phenolic compound,tannin,5276,918,191,214,162,...,1231,133,44,415,1556


filter for the activities row's of the compound :

In [170]:
testGraph = filtre.ByRowName(dataset, "phenolic compound", "tannin")
testGraph

abiotic      antioxidant          5276
             metal                 918
             salt                  191
             uv                    214
allelopathy  germination           162
pharmaco     anti-inflammatory     689
             antibacterial        2157
             anticancer            150
             antidiabetic          759
             antifungal            812
             antimicrobial        1607
             antiviral             292
             arthritis             114
             cardiovascular        156
             cytotoxicity         1231
             obesity               133
             sedative               44
             wound                 415
toxicity     toxicity             1556
Name: tannin, dtype: int64

In [171]:
tempDemo = filtre.ByRowName(dataset, "alkaloid")
tempDemo = filtre.ByColumnName(tempDemo, "abiotic")
tempDemo


Unnamed: 0,antioxidant,metal,salt,uv
colchicine,192,84,187,20
imidazole,1082,2507,1195,302
indole,1324,1482,717,283
piperidine,577,292,280,66
pyridine,1033,5330,1369,737
quinoline,560,1213,404,152
thiazole,863,511,4202,88


In [172]:
name = list(testGraph.index.get_level_values(1))
classActPrivate = list(set(testGraph.index))
test = testGraph.index
classAct = list(set(testGraph.index.get_level_values(0)))
y = list(testGraph.values)
classAct
classActPrivate
list(test)

[('abiotic', 'antioxidant'),
 ('abiotic', 'metal'),
 ('abiotic', 'salt'),
 ('abiotic', 'uv'),
 ('allelopathy', 'germination'),
 ('pharmaco', 'anti-inflammatory'),
 ('pharmaco', 'antibacterial'),
 ('pharmaco', 'anticancer'),
 ('pharmaco', 'antidiabetic'),
 ('pharmaco', 'antifungal'),
 ('pharmaco', 'antimicrobial'),
 ('pharmaco', 'antiviral'),
 ('pharmaco', 'arthritis'),
 ('pharmaco', 'cardiovascular'),
 ('pharmaco', 'cytotoxicity'),
 ('pharmaco', 'obesity'),
 ('pharmaco', 'sedative'),
 ('pharmaco', 'wound'),
 ('toxicity', 'toxicity')]

Starting to use Bokeh for create graphics

In [173]:
from collections import defaultdict

x = list(range(0, len(y)))
import random
colors1 = defaultdict()
colors = []
for act in classAct:
  colors1[act] = (
    (
      random.randint(0, 255),
      random.randint(0, 255),
      random.randint(0, 255)
    )
  )

for act in name:
  for data in classActPrivate:
    if data[1] == act:
      colors.append(colors1[data[0]])
    #print(classAct[i])
colors1
colors
testGraph

abiotic      antioxidant          5276
             metal                 918
             salt                  191
             uv                    214
allelopathy  germination           162
pharmaco     anti-inflammatory     689
             antibacterial        2157
             anticancer            150
             antidiabetic          759
             antifungal            812
             antimicrobial        1607
             antiviral             292
             arthritis             114
             cardiovascular        156
             cytotoxicity         1231
             obesity               133
             sedative               44
             wound                 415
toxicity     toxicity             1556
Name: tannin, dtype: int64

In [175]:
from bokeh.plotting import figure, output_file, save, show, ColumnDataSource
from bokeh.models import HoverTool
from bokeh.io import  output_notebook

output_notebook()


    
#print(colors)
g = figure(title= "tannin",sizing_mode="stretch_width", max_width=500, height=500, tools='hover')

source = ColumnDataSource(
  { "x": x,
   "top":y,
    "name": name,
    "colors" : colors,
    "legend": classAct,
    "classActPrivate":test,
  })

g.vbar(width=0.5, bottom=0,
color = "colors",
  legend_group="legend",
   source=source)

g.legend.location = "top_right"
g.title.align = "center"



hover = g.select(dict(type=HoverTool))
hover.tooltips = [
    ("name", "@name"),
    ("Value", "@top"),
    ("famille", "@classActPrivate")
    ]

show(g)

