In [78]:
import os
import pandas as pd
import numpy as np
import altair as alt

from scipy.stats import chisquare, chi2

In [47]:
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### OVERLAP MATRIX

In [21]:
def overlap_matrix(overlapdir, genes):
    TFvsGene = pd.DataFrame(index=sorted(os.listdir(overlapdir)),columns=genes)

    for tf_file in os.listdir(overlapdir):
        filepath = os.path.join(overlapdir, tf_file)
    
        try:
            a = pd.read_table(filepath, header=None)[3].tolist()
            TFvsGene.loc[tf_file] = TFvsGene.columns.isin(a).astype(int)
        except:
            TFvsGene.loc[tf_file] = 0
    
    TFvsGene = TFvsGene.set_index(w.replace(".bed.gz", "") for w in TFvsGene.index.tolist())
    return TFvsGene

In [22]:
escapeoverlapdir = "./ReMap2022_Overlap/ReMap2022_Escape"

escape_bed = pd.read_table(os.path.join('./query/hg38_escape_55TSS.bed.gz'), names=['chr', 'start', 'end', 'Gene_Name'])
escape_genes = escape_bed['Gene_Name'].tolist()

TFvsEsGene_2022 = overlap_matrix(escapeoverlapdir, escape_genes)

numTFs_es = TFvsEsGene_2022.sum(0).to_frame(name='numTFs')
numTFs_es['type'] = 'Escape'

TFvsEsGene_2022

Unnamed: 0,GYG2,ARSD,ARSH,CXorf28,MXRA5,PRKX,NLGN4X,HDHD1,STS,PNPLA4,...,JPX,NAP1L3,TAF7L,NXF5,AK026512,ZCCHC16,HTR2C,GPR112,VGLL1,L1CAM
ARID3A,0,0,0,0,0,1,0,0,1,1,...,1,0,0,0,0,0,0,0,0,0
ARNT,0,0,0,0,0,0,0,0,1,0,...,1,0,0,0,0,0,0,0,0,0
ASH2L,0,0,0,0,0,1,0,0,1,1,...,1,0,0,0,0,0,0,0,0,0
ATF2,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
ATF3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZNF597,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZNF622,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZNF687,0,0,0,0,0,1,0,0,1,1,...,1,0,0,0,0,0,0,0,0,0
ZSCAN29,0,0,0,0,0,0,0,0,1,1,...,1,0,0,0,0,0,0,0,0,0


In [23]:
subjectoverlapdir = './ReMap2022_Overlap/ReMap2022_Subject'
subject_bed = pd.read_table(os.path.join('./query/hg38_subject_462TSS.bed.gz'), names=['chr', 'start', 'end', 'Gene_Name'])
subject_genes = subject_bed['Gene_Name'].tolist()

TFvsSubGene_2022 = overlap_matrix(subjectoverlapdir, subject_genes)

numTFs_sub = TFvsSubGene_2022.sum(0).to_frame(name='numTFs')
numTFs_sub['type'] = 'Subject'

TFvsEsGene_2022

Unnamed: 0,GYG2,ARSD,ARSH,CXorf28,MXRA5,PRKX,NLGN4X,HDHD1,STS,PNPLA4,...,JPX,NAP1L3,TAF7L,NXF5,AK026512,ZCCHC16,HTR2C,GPR112,VGLL1,L1CAM
ARID3A,0,0,0,0,0,1,0,0,1,1,...,1,0,0,0,0,0,0,0,0,0
ARNT,0,0,0,0,0,0,0,0,1,0,...,1,0,0,0,0,0,0,0,0,0
ASH2L,0,0,0,0,0,1,0,0,1,1,...,1,0,0,0,0,0,0,0,0,0
ATF2,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
ATF3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZNF597,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZNF622,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZNF687,0,0,0,0,0,1,0,0,1,1,...,1,0,0,0,0,0,0,0,0,0
ZSCAN29,0,0,0,0,0,0,0,0,1,1,...,1,0,0,0,0,0,0,0,0,0


In [24]:
all_bed = pd.read_table(os.path.join('./query/hg38_autosomes_19119TSS.bed.gz'), names=['chr', 'start', 'end', 'Gene_Name'])

In [44]:
autosomes = [ "chr" + str(num) for num in list(range(1, 23)) ]

autooverlapdir = './ReMap2022_Overlap/ReMap2022_Auto'
auto_genes = all_bed.query('chr == @autosomes')['Gene_Name'].tolist()

TFvsAutoGene_2022 = overlap_matrix(autooverlapdir, auto_genes)

numTFs_auto = TFvsAutoGene_2022.sum(0).to_frame(name='numTFs')

numTFs_auto_sep = numTFs_auto.reset_index().merge(all_bed[['chr', 'Gene_Name']], left_on='index', right_on='Gene_Name').set_index('index').rename_axis(None).rename(columns={'chr':'type'})


numTFs_auto['type'] = 'Auto'

TFvsAutoGene_2022

Unnamed: 0,OR4F5,OR4F29,OR4F16,SAMD11,NOC2L,KLHL17,PLEKHN1,PERM1,HES4,ISG15,...,NOXA1,ENTPD8,NSMF,PNPLA7,MRPL41,DPH7,ZMYND19,ARRDC1,EHMT1,CACNA1B
ARID3A,0,0,0,0,1,1,0,0,1,1,...,1,0,1,0,0,1,1,1,1,0
ARNT,0,0,0,0,0,0,0,0,1,0,...,1,0,0,0,0,0,0,0,0,0
ASH2L,0,0,0,0,1,0,1,0,0,1,...,1,0,1,0,1,1,1,1,1,0
ATF2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ATF3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZNF597,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZNF622,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZNF687,0,0,0,0,0,1,0,0,1,1,...,1,0,1,1,1,1,0,1,1,0
ZSCAN29,0,0,0,0,0,1,0,0,1,0,...,1,0,0,0,0,1,1,1,0,0


In [None]:
chrXoverlapdir = './ReMap2022_Overlap/ReMap2022_XGenes'
chrX_genes = all_bed.query('chr == "chrX"')['Gene_Name'].tolist()

TFvschrXGene_2022 = overlap_matrix(chrXoverlapdir, chrX_genes)

numTFs_chrX = TFvschrXGene_2022.sum(0).to_frame(name='numTFs')
numTFs_chrX['type'] = 'chrX'

TFvschrXGene_2022

Unnamed: 0,PLCXD1,GTPBP6,PPP2R3B,SHOX,CRLF2,CSF2RA,IL3RA,SLC25A6,ASMTL,P2RY8,...,RAB39B,CLIC2,H2AB2,F8A2,F8A3,H2AB3,TMLHE,SPRY3,VAMP7,IL9R
ARID3A,0,1,1,0,1,0,0,1,1,1,...,1,0,0,0,0,0,0,0,1,0
ARNT,0,0,0,0,1,0,0,1,0,1,...,0,1,0,0,0,0,0,0,0,1
ASH2L,0,1,1,0,0,0,1,1,1,1,...,1,0,0,0,0,0,1,1,1,0
ATF2,0,0,0,0,1,0,1,0,0,0,...,0,1,0,0,0,0,0,0,1,0
ATF3,0,1,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZNF597,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZNF622,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZNF687,0,0,1,0,0,0,0,1,0,1,...,1,1,0,0,0,0,0,0,0,0
ZSCAN29,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


In [51]:
numTFs_essub = pd.concat([numTFs_es, numTFs_sub])

numTFs_all = pd.concat([numTFs_essub, numTFs_auto, numTFs_chrX])

### EGENES & NUMTFs

In [None]:
egenes_beforecen = escape_bed.query('start < 58605579')['Gene_Name'].tolist()
egenes_aftercen = escape_bed.query('start > 58605579')['Gene_Name'].tolist()

egenes_order = escape_bed.sort_values('start')['Gene_Name'].tolist()

eetfs_2022 = ['HSF1', 'ZFP36', 'NIPBL', 'MYB', 'STAT1']

eefs_allEgenes = TFvsEsGene_2022.loc[eetfs_2022, :]
pgk1overlap = TFvsSubGene_2022[['PGK1']].query('index == @eetfs_2022')

In [None]:
egene_bcent = alt.Chart(eefs_allEgenes[egenes_beforecen].melt(ignore_index=False).reset_index().replace(1, 'Bind').astype(str)).mark_rect().encode(
    alt.Y('index', title='TFs', axis=alt.Axis(titleFontSize=25, labelFontSize=25)),
    alt.X('variable', title='Escape genes (sorted by coordinates)', sort=egenes_order, axis=alt.Axis(titleFontSize=25, labelFontSize=25, titleX=800)),
    alt.Color('value', title='', legend=None, scale=alt.Scale(range=['lightgrey', 'green']))
).properties(height=230,width=1350, title=alt.TitleParams(text="Short Arm", fontSize=25))

egene_acent = alt.Chart(eefs_allEgenes[egenes_aftercen].melt(ignore_index=False).reset_index().replace(1, 'Bind').astype(str)).mark_rect().encode(
    alt.Y('index', title=None, axis=None),
    alt.X('variable', title=None, sort=egenes_order, axis=alt.Axis(titleFontSize=25, labelFontSize=25)),
    alt.Color('value', title='', legend=None, scale=alt.Scale(range=['lightgrey', 'green']))
).properties(height=230, width=310, title=alt.TitleParams(text="Long Arm", fontSize=25))

pgk1 = alt.Chart(pgk1overlap.melt(ignore_index=False).reset_index().replace(1, 'Bind').astype(str)).mark_rect().encode(
    alt.Y('index', title='TFs', axis=None),
    alt.X('variable', title='Subject Control)', axis=alt.Axis(titleFontSize=0, labelFontSize=25)),
    alt.Color('value', title='', legend=alt.Legend(values=['Bind']), scale=alt.Scale(range=['lightgrey', 'green']))
).properties(height=230, width=30, title=alt.TitleParams(text=["Subject","Control"], fontSize=25))


bindegenes =alt.hconcat(egene_bcent, egene_acent, spacing=40)

alt.hconcat(bindegenes, pgk1, padding=10, spacing=40).resolve_scale(y='shared').properties(title=" ").configure_title(fontSize=30)

In [None]:
EsGene_bnumTFs = pd.DataFrame(TFvsEsGene_2022[egenes_beforecen].sum()).reset_index().rename(columns={'index':'Gene_Name', 0:'numTFs'})
EsGene_anumTFs = pd.DataFrame(TFvsEsGene_2022[egenes_aftercen].sum()).reset_index().rename(columns={'index':'Gene_Name', 0:'numTFs'})

egene_numTFs_bcent = alt.Chart(EsGene_bnumTFs).mark_bar(color='black').encode(
    alt.Y('numTFs', title='Number of TFs', axis=alt.Axis(titleFontSize=25, labelFontSize=25), scale=alt.Scale(domain=(0, 154))),
    alt.X('Gene_Name', title='Escape genes (sorted by coordinates)', sort=egenes_order, axis=alt.Axis(titleFontSize=25, labelFontSize=25, titleX=800)),
).properties(height=200,width=1350, title=alt.TitleParams(text="Short Arm", fontSize=25))

egene_numTFs_acent = alt.Chart(EsGene_anumTFs).mark_bar(color='black').encode(
    alt.Y('numTFs', title='', axis=alt.Axis(titleFontSize=25, labelFontSize=0)),
    alt.X('Gene_Name', title="", sort=egenes_order, axis=alt.Axis(titleFontSize=25, labelFontSize=25, titleX=800)),
).properties(height=200, width=310, title=alt.TitleParams(text="Long Arm", fontSize=25))

pgk1_numTFs = alt.Chart(pd.DataFrame(TFvsSubGene_2022[['PGK1']].sum()).reset_index().rename(columns={0:'numTFs'})).mark_bar(color='black').encode(
    alt.Y('numTFs', title='', axis=alt.Axis(titleFontSize=25, labelFontSize=0), scale=alt.Scale(domain=(0,100))),
    alt.X('index', title="", axis=alt.Axis(titleFontSize=25, labelFontSize=25, titleX=800)),
).properties(height=200, width=30, title=alt.TitleParams(text=["Subject","Control"], fontSize=25))

egenesnumTFs = alt.hconcat(egene_numTFs_bcent, egene_numTFs_acent, spacing=40).resolve_scale(y='shared')

alt.hconcat(egenesnumTFs, pgk1_numTFs, padding=10, spacing=40).configure_title(fontSize=40).resolve_scale(y='shared')

### NUMTFs VS GENE

In [None]:
alt.Chart(numTFs_essub).transform_density(
    'numTFs', groupby=['type'], as_=['number_TFs', 'density'],
).mark_area(opacity=0.75).encode(
    alt.X('number_TFs:Q', title="Number of binding TFs per gene", axis=alt.Axis(labelFontSize=20, titleFontSize=25)),
    alt.Y('density:Q', title="Scaled Density", axis=alt.Axis(labelFontSize=20, titleFontSize=25)),
    alt.Color('type', title="", scale=alt.Scale(range=['darkorange', 'dodgerblue']), legend=alt.Legend(labelFontSize=25, titleFontSize=28))
).properties(height=500, width=650)

In [49]:
alt.Chart(numTFs_auto_sep).transform_density(
    'numTFs', groupby=['type'], as_=['number_TFs', 'density'],
).mark_area(orient='horizontal').encode(
    alt.Y('number_TFs:Q', title="Number of binding TFs per gene", axis=alt.Axis(labelFontSize=20, titleFontSize=25)),
    alt.X('density:Q', stack='center', title=None, axis=alt.Axis(labels=False, values=[0],grid=False, ticks=True)),
    alt.Color('type:N', title="", legend=None),
    alt.Column('type:N',title="", header=alt.Header(titleOrient='bottom',labelOrient='bottom',labelPadding=0, labelFontSize=20), sort=autosomes)
).properties(height=400, width=100).configure_facet(spacing=0).configure_view(stroke=None)

In [52]:
alt.Chart(numTFs_all.query('numTFs >= 0')).transform_density(
    'numTFs',  as_=['number_TFs', 'density'], groupby=['type'],
).mark_area(orient='horizontal').encode(
    alt.Y('number_TFs:Q', title="Number of binding TFs per gene", axis=alt.Axis(labelFontSize=20, titleFontSize=25), scale=alt.Scale(domain=[5, 109])),
    #alt.X('density:Q', title="Scaled Density", axis=alt.Axis(labelFontSize=20, titleFontSize=25)),
    alt.X('density:Q', stack='center', title=None, axis=alt.Axis(labels=False, values=[0], grid=False, ticks=True)),
    alt.Color('type:N', title="", scale=alt.Scale(range=['grey', 'darkorange', 'dodgerblue', 'black']), legend=None),
    alt.Column('type:N', header=alt.Header(titleOrient='bottom', titleFontSize=0, labelOrient='bottom',labelPadding=0, labelFontSize=20), sort=['Auto', 'chrX', 'Escape', 'Subject'])
).properties(height=600, width=200).configure_facet(spacing=0).configure_view(stroke=None)

### CHI SQUARE FOR DISTRIBUTIONS

In [75]:
def dfforchi(ref_df, test_df):
    test_df = test_df.merge(ref_df[['bin', 'obs/sum']])
    test_df['expected'] = round(test_df['obs/sum'] * test_df['count'].sum())
    test_df = test_df.query('expected > 0')

    dof = test_df.shape[0]-1
    chisq = chisquare(test_df['count'], f_exp=test_df['expected'])
    crit = chi2.ppf(1-0.05, df=dof)

    print("degree of freedom:", dof)
    print("Chi Square Results:", chisq)
    print("Critical Value:", crit)

    if chisq[0] < crit:
        print("Same distribution")
    else:
        print("Diff distribution")

    print("")

    return test_df

In [79]:
numTF_summary = pd.DataFrame(numTFs_all.reset_index(drop=True).groupby(['numTFs', 'type']).size()).rename(columns={0: 'count'}).reset_index()
numTF_summary['bin'] = list(pd.cut(numTF_summary['numTFs'], bins=11, labels=['0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80-89', '90-99', '100-109']))

numTF_bins = numTF_summary.drop(columns=['numTFs']).groupby(['type', 'bin']).sum('count').reset_index()
# numTF_bins = numTF_bins.append([{'type':'Escape', 'bin':'100-109', 'count':0},{'type':'Subject', 'bin':'100-109', 'count':0}, {'type':'chrX', 'bin':'100-109', 'count':0}])


In [80]:
numTF_bins_auto = numTF_bins.query('type == "Auto"').copy()
numTF_bins_auto['obs/sum'] = numTF_bins_auto['count'] / numTF_bins_auto['count'].sum()
 
numTF_bins_chrXtoAuto = numTF_bins.query('type == "chrX"').copy()
numTF_bins_chrXtoAuto = dfforchi(numTF_bins_auto, numTF_bins_chrXtoAuto)

numTF_bins_EscapetoAuto = numTF_bins.query('type == "Escape"').copy()#.query('bin != "90-99"')
numTF_bins_EscapetoAuto = dfforchi(numTF_bins_auto, numTF_bins_EscapetoAuto)

numTF_bins_SubjecttoAuto = numTF_bins.query('type == "Subject"').copy()#.query('bin != "90-99"')
numTF_bins_SubjecttoAuto = dfforchi(numTF_bins_auto, numTF_bins_SubjecttoAuto)

degree of freedom: 8
Chi Square Results: Power_divergenceResult(statistic=289.3132656824399, pvalue=7.732569316760849e-58)
Critical Value: 15.50731305586545
Diff distribution

degree of freedom: 8
Chi Square Results: Power_divergenceResult(statistic=7.027380952380952, pvalue=0.5336813716068505)
Critical Value: 15.50731305586545
Same distribution

degree of freedom: 8
Chi Square Results: Power_divergenceResult(statistic=195.58254363283774, pvalue=5.444138020909324e-38)
Critical Value: 15.50731305586545
Diff distribution



In [82]:
numTF_bins_Escape = numTF_bins.query('type == "Escape"').copy()
numTF_bins_Escape['obs/sum'] = numTF_bins_Escape['count'] / numTF_bins_Escape['count'].sum()

numTF_bins_SubjecttoEscape = numTF_bins.query('type == "Subject"').copy()
numTF_bins_SubjecttoEscape = dfforchi(numTF_bins_Escape, numTF_bins_SubjecttoEscape)

degree of freedom: 8
Chi Square Results: Power_divergenceResult(statistic=426.44744360902257, pvalue=4.0982091392542415e-87)
Critical Value: 15.50731305586545
Diff distribution



### EXPRESSION AWARE - numTFs Distribution

In [67]:
def matched_expr(df, type, color):
    listexpr = sorted(df.query('type == @type')['GM12878_longpolyA_expr'].tolist())

    matched_df = df.query('type == @type')

    for e in listexpr:
        try:
            matched_df = matched_df.append(df.query('GM12878_longpolyA_expr == @e & type == "Auto"').sample(1))
        except:
            # print('find closest sample for:', e)
            autoonly = df.query('GM12878_longpolyA_expr != @e & type == "Auto"')
            closest = autoonly.iloc[(autoonly['GM12878_longpolyA_expr']-e).abs().argsort()[:1]]
            matched_df = matched_df.append(closest)
            # print("Closest:", closest['GM12878_longpolyA_expr'])

    matched_chart = alt.Chart(matched_df).mark_point(opacity=0.5).encode(
        x=alt.X('numTFs', title="Number of binding TFs per gene", axis=alt.Axis(labelFontSize=20, titleFontSize=25)),
        y=alt.Y('GM12878_longpolyA_expr', scale=alt.Scale(domain=(0, 300), clamp=True), title="Gene Expression (TPM)", axis=alt.Axis(labelFontSize=20, titleFontSize=25)),
        color=alt.Color('type', title="", scale=alt.Scale(range=['grey', color]), legend=alt.Legend(labelFontSize=25, titleFontSize=28)),
        tooltip='Gene Name'
    ).properties(height=500, width=600)

    return (matched_df, matched_chart)


In [68]:
tpms = pd.read_table('./db/GM12878/tpms.tsv', skiprows=4, usecols=[0,1,16])
joint = numTFs_all.reset_index().merge(tpms, left_on='index', right_on='Gene Name').dropna().rename(
    columns={'whole cell, long polyA RNA, GM12878':'GM12878_longpolyA_expr'}).sort_values('GM12878_longpolyA_expr')

In [69]:
matchedescape, matches_chart = matched_expr(joint, "Escape", "darkorange")

matches_chart + matches_chart.transform_regression('numTFs', 'GM12878_longpolyA_expr', groupby=['type']).mark_line(color='type')

In [70]:
alt.Chart(matchedescape).transform_density(
    'numTFs',  as_=['number_TFs', 'density'], groupby=['type'],
).mark_area(orient='horizontal').encode(
    alt.Y('number_TFs:Q', title="Number of binding TFs per gene", axis=alt.Axis(labelFontSize=20, titleFontSize=25), scale=alt.Scale(domain=[5, 109])),
    #alt.X('density:Q', title="Scaled Density", axis=alt.Axis(labelFontSize=20, titleFontSize=25)),
    alt.X('density:Q', stack='center', title=None, axis=alt.Axis(labels=False, values=[0], grid=False, ticks=True)),
    alt.Color('type:N', title="", scale=alt.Scale(range=['grey', 'darkorange', 'dodgerblue', 'black']), legend=None),
    alt.Column('type:N', header=alt.Header(titleOrient='bottom', titleFontSize=0, labelOrient='bottom',labelPadding=0, labelFontSize=20), sort=['Auto', 'chrX', 'Escape', 'Subject'])
).properties(height=600, width=200).configure_facet(spacing=0).configure_view(stroke=None)

In [71]:
matchedsubject, matchsub_chart = matched_expr(joint, "Subject", "dodgerblue")

matchsub_chart + matchsub_chart.transform_regression('numTFs', 'GM12878_longpolyA_expr', groupby=['type']).mark_line(color='type')

In [72]:
alt.Chart(matchedsubject).transform_density(
    'numTFs',  as_=['number_TFs', 'density'], groupby=['type'],
).mark_area(orient='horizontal').encode(
    alt.Y('number_TFs:Q', title="Number of binding TFs per gene", axis=alt.Axis(labelFontSize=20, titleFontSize=25), scale=alt.Scale(domain=[5, 109])),
    #alt.X('density:Q', title="Scaled Density", axis=alt.Axis(labelFontSize=20, titleFontSize=25)),
    alt.X('density:Q', stack='center', title=None, axis=alt.Axis(labels=False, values=[0], grid=False, ticks=True)),
    alt.Color('type:N', title="", scale=alt.Scale(range=['grey', 'dodgerblue', 'black']), legend=None),
    alt.Column('type:N', header=alt.Header(titleOrient='bottom', titleFontSize=0, labelOrient='bottom',labelPadding=0, labelFontSize=20), sort=['Auto', 'Subject'])
).properties(height=600, width=200).configure_facet(spacing=0).configure_view(stroke=None)