# Plot histograms for collections
<b style="color:red">WARNING</b> **This notebook is for developpers to update graphs in documentation**

Install additional dependencies

Install additional dependencies. This notebook was written in [jupyter lab](https://jupyter.org/). If you are in jupyter notebook or other environments, `plotly` may not work. If you have troubles, follow this instruction https://plotly.com/python/getting-started/#installation.

In [None]:
# !pip install pandas
# !pip install plotly
# !pip install "jupyterlab>=3" "ipywidgets>=7.6"

In [None]:
# # or use conda
# !conda install pandas
# !conda install -c plotly plotly
# !conda install "jupyterlab>=3" "ipywidgets>=7.6" 

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd

from bioimageloader._experimentals import (ALL_COLLECTIONS, ROOTS,
                                           load_all_datasets)
from bioimageloader.plots import cycle_colors, to_hex_color

## Load all collections
Point to root directories

Below, I have all datasets under `../Data`

In [None]:
ROOTS

In [None]:
ROOTS = {
    # anno
    'DSB2018'                 : '../Data/DSB2018',
    'TNBC'                    : '../Data/TNBC_NucleiSegmentation',
    'ComputationalPathology'  : '../Data/ComputationalPathology',
    'S_BSST265'               : '../Data/BioStudies',
    'MurphyLab'               : '../Data/2009_ISBI_2DNuclei_code_data',
    'BBBC006'                 : '../Data/bbbc/006',
    'BBBC007'                 : '../Data/bbbc/007',
    'BBBC008'                 : '../Data/bbbc/008',
    'BBBC018'                 : '../Data/bbbc/018',
    'BBBC020'                 : '../Data/bbbc/020',
    'BBBC039'                 : '../Data/bbbc/039',
    # partial anno
    'DigitalPathology'        : '../Data/DigitalPathology',
    'UCSB'                    : '../Data/UCSB_BioSegmentation',
    'BBBC002'                 : '../Data/bbbc/002',
    # no anno
    'BBBC013'                 : '../Data/bbbc/013',
    'BBBC014'                 : '../Data/bbbc/014',
    'BBBC015'                 : '../Data/bbbc/015',
    'BBBC016'                 : '../Data/bbbc/016',
    'BBBC026'                 : '../Data/bbbc/026',
    'BBBC041'                 : '../Data/bbbc/041',
    'FRUNet'                  : '../Data/FRU_processing',
    'BBBC021'                 : '../Data/bbbc/021',
}



In [None]:
all_datasets = load_all_datasets(roots=ROOTS)

In [None]:
all_datasets

Sort in length

In [None]:
def _sort_in_length(dset):
    return len(dset)

In [None]:
print([len(dset) for dset in all_datasets])
all_datasets = sorted(all_datasets, key=_sort_in_length)[::-1]
print([len(dset) for dset in all_datasets])

## hist(All Collections)

In [None]:
# Choose your color map (default: tab10)
colors = cycle_colors(px.colors.qualitative.T10, len(all_datasets))

In [None]:
df_all = pd.DataFrame({
    'acronym': [dset.acronym for dset in all_datasets],
    'length': [len(dset) for dset in all_datasets],
    'color': colors,
})

In [None]:
df_all.head()

Add percentage info

In [None]:
df_all['perc'] = df_all.length / df_all.length.sum()
df_all.head()

In [None]:
hovertemplate = '%{x}, %{y}, %{customdata:.2f}%<extra></extra>'

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Bar(x=df_all.acronym, y=df_all.length,
           marker_color=df_all.color,
           customdata=df_all.perc,
           hovertemplate=hovertemplate)
)
fig.update_layout(
    title_text='All Collections (#collections: {}, #images: {})'.format(
        len(df_all), df_all.length.sum())
)
fig.show()

In [None]:
fig.write_html(
    '../docs/_static/hist_all_collections_div.html',
    full_html=False,
    include_plotlyjs='cnd'  # important
)

## hist(Mask Collections)

In [None]:
who_has_masks = ['DSB2018', 
'TNBC', 
'ComPath', 
'S_BSST265', 
'MurphyLab', 
'FRUNet', 
'BBBC006', 
'BBBC007', 
'BBBC008', 
'BBBC018', 
'BBBC020', 
'BBBC039', ]


maskdset_indices = [df_all.acronym.to_list().index(who) for who in who_has_masks]

In [None]:
maskdset_indices, len(maskdset_indices) / len(all_datasets)

In [None]:
df_mask = df_all.drop(set(df_all.index).difference(maskdset_indices))

In [None]:
df_mask.perc = df_mask.length / df_mask.length.sum()
df_mask.head()

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Bar(x=df_mask.acronym, y=df_mask.length,
           marker_color=df_mask.color,
           customdata=df_mask.perc,
           hovertemplate=hovertemplate)
)
fig.update_layout(
    title_text='Mask Collections (#collections: {}, #images: {})'.format(
        len(df_mask), df_mask.length.sum())
)
fig.show()

In [None]:
fig.write_html(
    '../docs/_static/hist_mask_collections_div.html',
    full_html=False,
    include_plotlyjs=False  # *
)