In [18]:
# conda env: datacat(Python 3.8.20)
import os
import pandas as pd

import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

from datacat4ml.const import CAT_DATA_DIR

In [19]:
hhd_or_dfs_len = pd.read_csv(os.path.join(CAT_DATA_DIR, 'hhd_or_dfs_len.csv'))
hhd_gpcr_dfs_len = pd.read_csv(os.path.join(CAT_DATA_DIR, 'hhd_gpcr_dfs_len.csv'))
mhd_or_dfs_len = pd.read_csv(os.path.join(CAT_DATA_DIR, 'mhd_or_dfs_len.csv'))
mhd_gpcr_dfs_len = pd.read_csv(os.path.join(CAT_DATA_DIR, 'mhd_gpcr_dfs_len.csv'))
lhd_or_dfs_len = pd.read_csv(os.path.join(CAT_DATA_DIR, 'lhd_or_dfs_len.csv'))
lhd_gpcr_dfs_len = pd.read_csv(os.path.join(CAT_DATA_DIR, 'lhd_gpcr_dfs_len.csv'))

print(f'The shape of hhd_or_dfs_len: {hhd_or_dfs_len.shape}')
print(f'The shape of hhd_gpcr_dfs_len: {hhd_gpcr_dfs_len.shape}')
print(f'The shape of mhd_or_dfs_len: {mhd_or_dfs_len.shape}')
print(f'The shape of mhd_gpcr_dfs_len: {mhd_gpcr_dfs_len.shape}')
print(f'The shape of lhd_or_dfs_len: {lhd_or_dfs_len.shape}')
print(f'The shape of lhd_gpcr_dfs_len: {lhd_gpcr_dfs_len.shape}')

The shape of hhd_or_dfs_len: (12, 13)
The shape of hhd_gpcr_dfs_len: (688, 13)
The shape of mhd_or_dfs_len: (39, 13)
The shape of mhd_gpcr_dfs_len: (1120, 13)
The shape of lhd_or_dfs_len: (35, 13)
The shape of lhd_gpcr_dfs_len: (713, 13)


In [20]:
mhd_or_dfs_len

Unnamed: 0,df_level,use_lookup,target,effect,assay,std_type,assay_chembl_id,hhd_df,effect_type_df,plus_df,exclude_df,mhd_df,lhd_df
0,mhd,True,CHEMBL233,bind,RBA,Ki,,5682,5418,139,0,5557,
1,mhd,True,CHEMBL233,bind,RBA,IC50,,1366,668,19,0,687,
2,mhd,True,CHEMBL237,bind,RBA,Ki,,4857,4535,140,29,4646,
3,mhd,True,CHEMBL237,bind,RBA,IC50,,1049,478,7,0,485,
4,mhd,True,CHEMBL236,bind,RBA,Ki,,5152,4933,65,1,4997,
5,mhd,True,CHEMBL236,bind,RBA,IC50,,1316,930,34,0,964,
6,mhd,True,CHEMBL2014,bind,RBA,Ki,,1381,1320,12,0,1332,
7,mhd,True,CHEMBL2014,bind,RBA,IC50,,747,496,11,0,507,
8,mhd,True,CHEMBL233,agon,G-GTP,EC50,,2296,1067,80,23,1124,
9,mhd,True,CHEMBL237,agon,G-GTP,EC50,,2062,1365,103,0,1468,


In [None]:
lhd_gpcr_dfs_len['lhd_df'].value_counts()

# Data Visualization

## Create the dfs to store the stats

In [None]:
or_combine_len_dfs = [or_bind_len_dfs, 
                    or_agon_G_GTP_len_dfs, 
                    or_agon_G_cAMP_len_dfs, 
                    or_agon_G_Ca_len_dfs, 
                    or_agon_B_arrest_len_dfs, 
                    or_antag_G_GTP_len_dfs,  
                    or_antag_B_arrest_len_dfs]

In [None]:
final_len_df = pd.DataFrame()
for len_dfs in or_combine_len_dfs:
    for key, len_df in len_dfs.items():
        #print(key)
        # use method 'concat' to append len_df to final_len_df
        final_len_df = pd.concat([final_len_df, len_df], axis=0, sort=False)

# remove the rows where 'std_type' is 'Ke' or 'Kb'
final_len_df = final_len_df[~final_len_df['std_type'].isin(['Ke', 'Kb'])]
        
final_len_df
# type_df = final_df + final_out_df
# final_df = effect_type_df + plus_df - exclude_df

## Effectwise

### donut plot for effect-wise activity space

In [None]:
def effect_activity_space(target='CHEMBL233', startangle=-90, fig_format='pdf'):
    
    target_len_df = final_len_df[final_len_df['target'] == target]
    # extract coloums needed for plotting
    target_len_df = target_len_df[['effect', 'assay', 'std_type', 'final_df']]
    # create the pivot table
    target_len_df = pd.pivot_table(target_len_df, index=['effect','assay', 'std_type'], values=['final_df'])
    # make a new column for labeling the minor data
    target_len_df['assay-type'] = target_len_df.index.get_level_values('std_type') + ': ' + target_len_df.index.get_level_values('assay')


    ##########Plot the nested pie chart#####################
    # create a figure and subplots
    fig, ax = plt.subplots(figsize=(10, 6))
    
    width = 0.3

    # data for pie charts
    # Major category values = sum of minor category values
    major_data = target_len_df.groupby('effect')['final_df'].sum()
    major_labels = target_len_df.index.get_level_values('effect').unique()
    # Minor category values
    minor_data = target_len_df['final_df']
    minor_labels = target_len_df['assay-type'] + '(' + target_len_df['final_df'].astype(str) + ')'

    # create custom colormaps based on a given color
    # color for major category
    bind_color = '#b0e3e6'
    agon_color = '#f5426c'
    antag_color = '#4278f5'
    major_colors = [agon_color, antag_color, bind_color]
    # color for minor category
    bind_palette = sns.light_palette(bind_color, n_colors=2)[::-1]
    agon_palette = sns.light_palette(agon_color, n_colors=4)[::-1]
    antag_palette = sns.light_palette(antag_color, n_colors=6)[::-1]
    minor_colors = [agon_palette[3], agon_palette[2],agon_palette[1], agon_palette[0],
                    antag_palette[5], antag_palette[4], antag_palette[3], antag_palette[2], antag_palette[1], antag_palette[0],
                    bind_palette[1], bind_palette[0]
                    ]

    # Draw pies
    hfont = {'fontname':'serif'}
    # pie for major category - 'effect', put in the inner circle
    ax.pie(major_data, 
        radius=1-width, 
        colors=major_colors,
        labels=major_labels,
        startangle=startangle,
        textprops={'fontsize': 13, 'fontname':'serif', 'fontweight':'bold'},
        wedgeprops=dict(width=width, edgecolor='w'),
        labeldistance=0.6)
    
    # pie for minor category - 'assay', put in the outer circle
    ax.pie(minor_data,
        radius=1,
        colors=minor_colors,
        labels=minor_labels,
        startangle=startangle,
        textprops={'fontsize': 11, 'fontname':'serif'},
        wedgeprops=dict(width=width, edgecolor='w'),
        labeldistance=1)

    # Set a title
    ax.set_title(f'{target}: effect-wise activity space', fontsize=16, fontweight='bold', **hfont)

    # save the figure
    file_path = os.path.join(CAT_FIG_DIR, target)
    mkdirs(file_path)

    fig.savefig(os.path.join(file_path, f"{target}_effect-wise_activity_space_pie.{fig_format}"), dpi=300, bbox_inches='tight')

    return target_len_df

In [None]:
mor_effect_len_df = effect_activity_space(target='CHEMBL233', startangle=-90, fig_format='pdf')
kor_effect_len_df = effect_activity_space(target='CHEMBL237', startangle=-73, fig_format='pdf')
dor_effect_len_df = effect_activity_space(target='CHEMBL236', startangle=-70, fig_format='pdf')
nor_effect_len_df = effect_activity_space(target='CHEMBL2014', startangle=-50, fig_format='pdf')

In [None]:
mor_effect_len_df

In [None]:
kor_effect_len_df

In [None]:
dor_effect_len_df

In [None]:
nor_effect_len_df

## Typewise

### donut plot for type-wise activity space

In [None]:
def type_activity_space(target='CHEMBL233', startangle=-90, fig_format='pdf'):
    
    target_len_df = final_len_df[final_len_df['target'] == target]
    # extract columns needed for plotting
    target_len_df = target_len_df[['effect', 'assay', 'std_type', 'final_df']]
    # create the pivot table
    target_len_df = pd.pivot_table(target_len_df, index=['std_type', 'effect','assay'], values=['final_df'])
    # make a new column for labeling the minor data
    target_len_df['effect-assay'] = target_len_df.index.get_level_values('effect') + ': ' + target_len_df.index.get_level_values('assay')


    ##########Plot the nested pie chart#####################
    # create a figure and subplots
    fig, ax = plt.subplots(figsize=(10, 6))

    width = 0.3

    # data for pie charts
    # Major category values = sum of minor category values
    major_data = target_len_df.groupby('std_type')['final_df'].sum()
    major_labels = target_len_df.index.get_level_values('std_type').unique()
    # Minor category values
    minor_data = target_len_df['final_df']
    minor_labels = target_len_df['effect-assay'] + '(' + target_len_df['final_df'].astype(str) + ')'

    # create custom colormaps based on a given color
    # color for major category
    ec50_color = '#b0daff'
    ic50_color = '#19a7ce'
    kb_color = '#146c94'
    ke_color = '#164b60' 
    ki_color = '#c4b0ff' #'#e5beec', #f9f54b

    major_colors = [ec50_color, ic50_color, kb_color, ke_color, ki_color]
    # color for minor category
    ki_palette = sns.light_palette(ki_color, n_colors=2)[::-1]
    ic50_palette = sns.light_palette(ic50_color, n_colors=4)[::-1]
    ec50_palette = sns.light_palette(ec50_color, n_colors=4)[::-1]
    ke_palette = sns.light_palette(ke_color, n_colors=1)[::-1]
    kb_palette = sns.light_palette(kb_color, n_colors=1)[::-1]
    minor_colors = [ec50_palette[3], ec50_palette[2], ec50_palette[1], ec50_palette[0],
                    ic50_palette[3], ic50_palette[2], ic50_palette[1], ic50_palette[0],
                    kb_palette[0],
                    ke_palette[0],
                    ki_palette[1], ki_palette[0]]
    # Draw pies
    hfont = {'fontname':'serif'}
    # pie for major category - 'effect', put in the inner circle
    ax.pie(major_data, 
        radius=1-width, 
        colors=major_colors,
        labels=major_labels,
        startangle=startangle,
        textprops={'fontsize': 11, 'fontweight':'bold'},
        wedgeprops=dict(width=width, edgecolor='w'),
        labeldistance=0.65)
    # pie for minor category - 'assay', put in the outer circle
    ax.pie(minor_data,
        radius=1,
        colors=minor_colors,
        labels=minor_labels,
        startangle=startangle,
        textprops={'fontsize': 11, 'fontname':'serif'},
        wedgeprops=dict(width=width, edgecolor='w'),
        labeldistance=1)
    
    # Set a title
    ax.set_title(f'{target}: type-wise activity space', fontsize=16, fontweight='bold', **hfont)

    # save the figure
    file_path = os.path.join(CAT_FIG_DIR, target)
    mkdirs(file_path)

    fig.savefig(os.path.join(file_path, f"{target}_type-wise_activity_space_pie.{fig_format}"), dpi=300, bbox_inches='tight')
    print(f"Figure saved in {file_path}")
    
    return target_len_df

In [None]:
mor_type_len_df = type_activity_space(target='CHEMBL233', startangle=-90, fig_format='pdf')
kor_type_len_df = type_activity_space(target='CHEMBL237', startangle=-103, fig_format='pdf')
dor_type_len_df = type_activity_space(target='CHEMBL236', startangle=-101, fig_format='pdf')
nor_type_len_df = type_activity_space(target='CHEMBL2014', startangle=-100, fig_format='pdf')

In [None]:
mor_type_len_df

In [None]:
kor_type_len_df

In [None]:
dor_type_len_df

In [None]:
nor_type_len_df