In [11]:
import pandas as pd
import altair as alt

In [12]:
# Leer los archivos por fase

pert_info_1 = pd.read_csv("GSE92742_Broad_LINCS_pert_info.txt", sep="\t")
pert_info_2 = pd.read_csv("GSE70138_Broad_LINCS_pert_info.txt", sep="\t")

In [13]:
# Leer el archivo 'antidepressants_pcp.tsv' (basado en las bases de datos de WHO y PubChem)

antidepressants = pd.read_csv("antidepressants.tsv", sep="\t")

display(antidepressants)

Unnamed: 0,cid,atc,pert_iname
0,2995,N06AA01,desipramine
1,3696,N06AA02,imipramine
2,65589,N06AA03,imipraminoxide
3,2801,N06AA04,clomipramine
4,9417,N06AA05,opipramol
...,...,...,...
60,6918314,N06AX24,vilazodone
61,9966051,N06AX26,vortioxetine
62,182137,N06AX27,esketamine
63,6917779,N06AX28,levomilnacipran


In [18]:
# Filtrar los antidepresivos dentro del archivo de 'pert_info'

antidepressants_1 = pd.merge(antidepressants, pert_info_1[["pert_id", "pert_iname", "canonical_smiles"]], on="pert_iname", how='left')
antidepressants_1.dropna(subset=["pert_id"], inplace=True)

display(antidepressants_1)

Unnamed: 0,cid,atc,pert_iname,pert_id,canonical_smiles
0,2995,N06AA01,desipramine,BRD-K60762818,CNCCCN1c2ccccc2CCc2ccccc12
1,3696,N06AA02,imipramine,BRD-K38436528,CN(C)CCCN1c2ccccc2CCc2ccccc12
3,2801,N06AA04,clomipramine,BRD-K52989797,CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc12
4,9417,N06AA05,opipramol,BRD-K43786866,OCCN1CCN(CCCN2c3ccccc3C=Cc3ccccc23)CC1
5,5584,N06AA06,trimipramine,BRD-A19195498,CC(CN(C)C)CN1c2ccccc2CCc2ccccc12
6,5584,N06AA06,trimipramine,BRD-A19195498,CC(CN(C)C)CN1c2ccccc2CCc2ccccc12
7,3947,N06AA07,lofepramine,BRD-K82147103,CN(CCCN1c2ccccc2CCc2ccccc12)CC(=O)c1ccc(Cl)cc1
8,9419,N06AA08,dibenzepin,BRD-K79145749,CN(C)CCN1C(=O)c2ccccc2N(C)c3ccccc13
9,2160,N06AA09,amitriptyline,BRD-K53737926,CN(C)CCC=C1c2ccccc2CCc2ccccc12
10,4543,N06AA10,nortriptyline,BRD-K91263825,CNCCC=C1c2ccccc2CCc2ccccc12


In [19]:
antidepressants_2 = pd.merge(antidepressants, pert_info_2[["pert_id", "pert_iname", "canonical_smiles"]], on="pert_iname", how='left')
antidepressants_2.dropna(subset=["pert_id"], inplace=True)

display(antidepressants_2)

Unnamed: 0,cid,atc,pert_iname,pert_id,canonical_smiles
1,3696,N06AA02,imipramine,BRD-K38436528,CN(C)CCCN1c2ccccc2CCc2ccccc12
3,2801,N06AA04,clomipramine,BRD-K52989797,CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc12
5,5584,N06AA06,trimipramine,BRD-A19195498,CC(CN(C)C)CN1c2ccccc2CCc2ccccc12
8,2160,N06AA09,amitriptyline,BRD-K53737926,CN(C)CCC=C1c2ccccc2CCc2ccccc12
9,4543,N06AA10,nortriptyline,BRD-K91263825,CNCCC=C1c2ccccc2CCc2ccccc12
10,4976,N06AA11,protriptyline,BRD-K42098891,CNCCCC1c2ccccc2C=Cc2ccccc12
11,667477,N06AA12,doxepin,BRD-K37694030,CN(C)CC\C=C1\c2ccccc2COc2ccccc12
15,5284550,N06AA16,dosulepin,BRD-K54759182,CN(C)CC\C=C1/c2ccccc2CSc2ccccc12
16,2170,N06AA17,amoxapine,BRD-K02265150,Clc1ccc2Oc3ccccc3N=C(N3CCNCC3)c2c1
18,34870,N06AA19,amineptine,BRD-K24219278,OC(=O)CCCCCCNC1c2ccccc2CCc2ccccc12


# Fase 1

In [15]:
# Filtrar las filas donde cada antidepresivo tenga más de un pert_id asociado
grouped_1 = antidepressants_1.groupby('pert_iname')
filtered_rows_1 = grouped_1.filter(lambda x: len(x['pert_id'].unique()) > 1)

# Crear un nuevo dataframe con el resultado
filtered_antidepressants_1 = filtered_rows_1[['cid', 'atc', 'pert_iname', 'pert_id', 'canonical_smiles']]
filtered_antidepressants_1.reset_index(drop=True, inplace=True)

display(filtered_antidepressants_1)

Unnamed: 0,cid,atc,pert_iname,pert_id,canonical_smiles
0,667477,N06AA12,doxepin,BRD-K36616567,CN(C)CCC=C1c2ccccc2COc3ccccc13
1,667477,N06AA12,doxepin,BRD-K37694030,CN(C)CC\C=C1\c2ccccc2COc2ccccc12
2,667477,N06AA12,doxepin,BRD-K54462405,CN(C)CC/C=C/1\c2ccccc2COc3ccccc13
3,4011,N06AA21,maprotiline,BRD-K03319035,CNCCC[C@]1(CC[C@H]2c3ccccc31)c4ccccc24
4,4011,N06AA21,maprotiline,BRD-K25433859,CNCCCC12CCC(c3ccccc13)c1ccccc21
5,43815,N06AB05,paroxetine,BRD-A62326914,Fc1ccc(cc1)C2CCNCC2COc3ccc4OCOc4c3
6,43815,N06AB05,paroxetine,BRD-K19277754,CN1CC[C@H]([C@H](COc2ccc3OCOc3c2)C1)c1ccc(F)cc1
7,43815,N06AB05,paroxetine,BRD-K37991163,Fc1ccc(cc1)[C@@H]1CCNC[C@H]1COc1ccc2OCOc2c1
8,68617,N06AB06,sertraline,BRD-K57174586,CN[C@H]1CC[C@H](c2ccc(Cl)c(Cl)c2)c2ccccc12
9,68617,N06AB06,sertraline,BRD-K82036761,CN[C@H]1CC[C@@H](c2ccc(Cl)c(Cl)c2)c2ccccc12


In [16]:
# Añadir información acerca de las diferencias en las canonical_smiles de cada antidepresivo

metadata = ['spatial (BRD-K54462405)',
            'structural (BRD-K36616567, BRD-K54462405)',
            'spatial (BRD-K36616567)',

            'spatial (BRD-K25433859)', 
            'spatial (BRD-K03319035)',

            'structural (BRD-K19277754); spatial (BRD-K37991163)',
            'structural (BRD-A62326914); spatial (BRD-K37991163)',
            'spatial (BRD-A62326914, BRD-K19277754)',

            'spatial (BRD-K82036761)',
            'spatial (BRD-K57174586)',

            'structural (BRD-K72676686)',
            'structural (BRD-K31534764)',

            'spatial (BRD-K47029922, BRD-K88809146)',
            'spatial (BRD-A43974575, BRD-K88809146)',
            'spatial (BRD-A43974575, BRD-K47029922)',

            'spatial (BRD-K83153774)',
            'spatial (BRD-A64977602)',

            'structural (BRD-K32480324, BRD-K32814891)',
            'spatial (BRD-K32814891)',
            'spatial (BRD-K32480324)']

filtered_antidepressants_1['isomer_type'] = metadata

display(filtered_antidepressants_1)

Unnamed: 0,cid,atc,pert_iname,pert_id,canonical_smiles,isomer_type
0,667477,N06AA12,doxepin,BRD-K36616567,CN(C)CCC=C1c2ccccc2COc3ccccc13,spatial (BRD-K54462405)
1,667477,N06AA12,doxepin,BRD-K37694030,CN(C)CC\C=C1\c2ccccc2COc2ccccc12,"structural (BRD-K36616567, BRD-K54462405)"
2,667477,N06AA12,doxepin,BRD-K54462405,CN(C)CC/C=C/1\c2ccccc2COc3ccccc13,spatial (BRD-K36616567)
3,4011,N06AA21,maprotiline,BRD-K03319035,CNCCC[C@]1(CC[C@H]2c3ccccc31)c4ccccc24,spatial (BRD-K25433859)
4,4011,N06AA21,maprotiline,BRD-K25433859,CNCCCC12CCC(c3ccccc13)c1ccccc21,spatial (BRD-K03319035)
5,43815,N06AB05,paroxetine,BRD-A62326914,Fc1ccc(cc1)C2CCNCC2COc3ccc4OCOc4c3,structural (BRD-K19277754); spatial (BRD-K3799...
6,43815,N06AB05,paroxetine,BRD-K19277754,CN1CC[C@H]([C@H](COc2ccc3OCOc3c2)C1)c1ccc(F)cc1,structural (BRD-A62326914); spatial (BRD-K3799...
7,43815,N06AB05,paroxetine,BRD-K37991163,Fc1ccc(cc1)[C@@H]1CCNC[C@H]1COc1ccc2OCOc2c1,"spatial (BRD-A62326914, BRD-K19277754)"
8,68617,N06AB06,sertraline,BRD-K57174586,CN[C@H]1CC[C@H](c2ccc(Cl)c(Cl)c2)c2ccccc12,spatial (BRD-K82036761)
9,68617,N06AB06,sertraline,BRD-K82036761,CN[C@H]1CC[C@@H](c2ccc(Cl)c(Cl)c2)c2ccccc12,spatial (BRD-K57174586)


In [21]:
# Crear una nueva columna en 'filtered_antidepressants_1'
filtered_antidepressants_1['phase_2'] = 0

# Filtrar revisando si el 'pert_id' aparece en el archivo de antidepresivos de la fase 2
filtered_antidepressants_1.loc[filtered_antidepressants_1['pert_id'].isin(antidepressants_2['pert_id']), 'phase_2'] = 1

display(filtered_antidepressants_1)

# Guardar el archivo

filtered_antidepressants_1.to_csv("antidepressants_stereo_phase1.tsv", sep="\t", index=False)

Unnamed: 0,cid,atc,pert_iname,pert_id,canonical_smiles,isomer_type,phase_2
0,667477,N06AA12,doxepin,BRD-K36616567,CN(C)CCC=C1c2ccccc2COc3ccccc13,spatial (BRD-K54462405),0
1,667477,N06AA12,doxepin,BRD-K37694030,CN(C)CC\C=C1\c2ccccc2COc2ccccc12,"structural (BRD-K36616567, BRD-K54462405)",1
2,667477,N06AA12,doxepin,BRD-K54462405,CN(C)CC/C=C/1\c2ccccc2COc3ccccc13,spatial (BRD-K36616567),0
3,4011,N06AA21,maprotiline,BRD-K03319035,CNCCC[C@]1(CC[C@H]2c3ccccc31)c4ccccc24,spatial (BRD-K25433859),0
4,4011,N06AA21,maprotiline,BRD-K25433859,CNCCCC12CCC(c3ccccc13)c1ccccc21,spatial (BRD-K03319035),1
5,43815,N06AB05,paroxetine,BRD-A62326914,Fc1ccc(cc1)C2CCNCC2COc3ccc4OCOc4c3,structural (BRD-K19277754); spatial (BRD-K3799...,0
6,43815,N06AB05,paroxetine,BRD-K19277754,CN1CC[C@H]([C@H](COc2ccc3OCOc3c2)C1)c1ccc(F)cc1,structural (BRD-A62326914); spatial (BRD-K3799...,0
7,43815,N06AB05,paroxetine,BRD-K37991163,Fc1ccc(cc1)[C@@H]1CCNC[C@H]1COc1ccc2OCOc2c1,"spatial (BRD-A62326914, BRD-K19277754)",1
8,68617,N06AB06,sertraline,BRD-K57174586,CN[C@H]1CC[C@H](c2ccc(Cl)c(Cl)c2)c2ccccc12,spatial (BRD-K82036761),0
9,68617,N06AB06,sertraline,BRD-K82036761,CN[C@H]1CC[C@@H](c2ccc(Cl)c(Cl)c2)c2ccccc12,spatial (BRD-K57174586),0


# Fase 2

In [17]:
# Filtrar las filas donde cada antidepresivo tenga más de un pert_id asociado
grouped_2 = antidepressants_2.groupby('pert_iname')
filtered_rows_2 = grouped_2.filter(lambda x: len(x['pert_id'].unique()) > 1)

# Crear un nuevo dataframe con el resultado
filtered_antidepressants_2 = filtered_rows_2[['cid', 'atc', 'pert_iname', 'pert_id', 'canonical_smiles']]
filtered_antidepressants_2.reset_index(drop=True, inplace=True)

display(filtered_antidepressants_2)

Unnamed: 0,cid,atc,pert_iname,pert_id,canonical_smiles
