In [5]:
import pandas as pd
import pubchempy as pcp
from rdkit import Chem

In [2]:
# Leer el archivo 'antidepressants.tsv' (basado en las bases de datos de WHO y PubChem)

antidepressants = pd.read_csv("antidepressants.tsv", sep="\t")

display(antidepressants)

Unnamed: 0,cid,atc,pert_iname
0,2995,N06AA01,desipramine
1,3696,N06AA02,imipramine
2,65589,N06AA03,imipraminoxide
3,2801,N06AA04,clomipramine
4,9417,N06AA05,opipramol
...,...,...,...
60,6918314,N06AX24,vilazodone
61,9966051,N06AX26,vortioxetine
62,182137,N06AX27,esketamine
63,6917779,N06AX28,levomilnacipran


In [3]:
# Crear una función para obtener la estructura química canónica (SMILES) de cada antidepresivo

def get_canonical_smiles_from_cid(cid):
    try:
        compound = pcp.Compound.from_cid(cid)
        canonical_smiles = compound.canonical_smiles
        return canonical_smiles
    except pcp.PubChemHTTPError as e:
        print(f"Error fetching data for CID {cid}: {e}")
        return None

In [4]:
# Añadir la estructura química canónica al dataframe de antidepresivos

antidepressants['pcp_canonical_smiles'] = antidepressants['cid'].apply(get_canonical_smiles_from_cid)

display(antidepressants)

# Guardar el archivo

antidepressants.to_csv("antidepressants_pcp.tsv", sep="\t", index=False)

Unnamed: 0,cid,atc,pert_iname,canonical_smiles
0,2995,N06AA01,desipramine,CNCCCN1C2=CC=CC=C2CCC3=CC=CC=C31
1,3696,N06AA02,imipramine,CN(C)CCCN1C2=CC=CC=C2CCC3=CC=CC=C31
2,65589,N06AA03,imipraminoxide,C[N+](C)(CCCN1C2=CC=CC=C2CCC3=CC=CC=C31)[O-]
3,2801,N06AA04,clomipramine,CN(C)CCCN1C2=CC=CC=C2CCC3=C1C=C(C=C3)Cl
4,9417,N06AA05,opipramol,C1CN(CCN1CCCN2C3=CC=CC=C3C=CC4=CC=CC=C42)CCO
...,...,...,...,...
60,6918314,N06AX24,vilazodone,C1CN(CCN1CCCCC2=CNC3=C2C=C(C=C3)C#N)C4=CC5=C(C...
61,9966051,N06AX26,vortioxetine,CC1=CC(=C(C=C1)SC2=CC=CC=C2N3CCNCC3)C
62,182137,N06AX27,esketamine,CNC1(CCCCC1=O)C2=CC=CC=C2Cl
63,6917779,N06AX28,levomilnacipran,CCN(CC)C(=O)C1(CC1CN)C2=CC=CC=C2


In [9]:
# Set the SANITIZE_ALL flag to enable full sanitization

antidepressants['canonical_smiles_stereo'] = antidepressants['canonical_smiles'].apply(lambda x: Chem.MolToSmiles(Chem.MolFromSmiles(x), isomericSmiles=True, canonical=True))

display(antidepressants)

Unnamed: 0,cid,atc,pert_iname,canonical_smiles,canonical_smiles_stereo
0,2995,N06AA01,desipramine,CNCCCN1C2=CC=CC=C2CCC3=CC=CC=C31,CNCCCN1c2ccccc2CCc2ccccc21
1,3696,N06AA02,imipramine,CN(C)CCCN1C2=CC=CC=C2CCC3=CC=CC=C31,CN(C)CCCN1c2ccccc2CCc2ccccc21
2,65589,N06AA03,imipraminoxide,C[N+](C)(CCCN1C2=CC=CC=C2CCC3=CC=CC=C31)[O-],C[N+](C)([O-])CCCN1c2ccccc2CCc2ccccc21
3,2801,N06AA04,clomipramine,CN(C)CCCN1C2=CC=CC=C2CCC3=C1C=C(C=C3)Cl,CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21
4,9417,N06AA05,opipramol,C1CN(CCN1CCCN2C3=CC=CC=C3C=CC4=CC=CC=C42)CCO,OCCN1CCN(CCCN2c3ccccc3C=Cc3ccccc32)CC1
...,...,...,...,...,...
60,6918314,N06AX24,vilazodone,C1CN(CCN1CCCCC2=CNC3=C2C=C(C=C3)C#N)C4=CC5=C(C...,N#Cc1ccc2[nH]cc(CCCCN3CCN(c4ccc5oc(C(N)=O)cc5c...
61,9966051,N06AX26,vortioxetine,CC1=CC(=C(C=C1)SC2=CC=CC=C2N3CCNCC3)C,Cc1ccc(Sc2ccccc2N2CCNCC2)c(C)c1
62,182137,N06AX27,esketamine,CNC1(CCCCC1=O)C2=CC=CC=C2Cl,CNC1(c2ccccc2Cl)CCCCC1=O
63,6917779,N06AX28,levomilnacipran,CCN(CC)C(=O)C1(CC1CN)C2=CC=CC=C2,CCN(CC)C(=O)C1(c2ccccc2)CC1CN


In [11]:
# Definir una función para combinar los dfs de los archivos de perturbaciones, experimentos y líneas celulares

def add_LINCS_info(antidepressants, pert_info, inst_info, cell_info):

    # Añadir el pert_id de los antidepresivos al archivo anterior, basado en el nombre de la perturbación (pert_iname = name)
    antidepressants = pd.merge(antidepressants, pert_info[["pert_id", "pert_iname"]], on="pert_iname", how='left')

    # Añadir la información del archivo de experimentos, basado en el id de la perturbación (pert_id)
    antidepressants = pd.merge(antidepressants, inst_info.drop(columns=["pert_iname"]), on="pert_id")

    # Añadir la información del archivo de células, basado en el id de la línea celular (cell_id)
    antidepressants = pd.merge(antidepressants, cell_info, on="cell_id")

    return antidepressants

# Fase 1 (GSE92742)

In [12]:
# Leer los archivos necesarios para la fase 1

pert_info = pd.read_csv("GSE92742_Broad_LINCS_pert_info.txt", sep="\t")
inst_info = pd.read_csv("GSE92742_Broad_LINCS_inst_info.txt", sep="\t")
cell_info = pd.read_csv("GSE92742_Broad_LINCS_cell_info.txt", sep="\t")

# Emplear la función definida para crear un df con los datos de la fase 1

antidepressants_1 = add_LINCS_info(antidepressants, pert_info, inst_info, cell_info)

display(antidepressants_1)

# Guardar el archivo con los datos de la fase 1

antidepressants_1.to_csv("LINCS_ad_phase1.tsv", sep="\t", index=False)

  inst_info = pd.read_csv("GSE92742_Broad_LINCS_inst_info.txt", sep="\t")


Unnamed: 0,cid,atc,pert_iname,canonical_smiles,canonical_smiles_stereo,pert_id,inst_id,rna_plate,rna_well,pert_type,...,modification,sample_type,primary_site,subtype,original_growth_pattern,provider_catalog_id,original_source_vendor,donor_age,donor_sex,donor_ethnicity
0,2995,N06AA01,desipramine,CNCCCN1C2=CC=CC=C2CCC3=CC=CC=C31,CNCCCN1c2ccccc2CCc2ccccc21,BRD-K60762818,CPC004_A375_6H_X1_B3_DUO52HI53LO:E11,CPC004_A375_6H_X1,E11,trt_cp,...,-666,tumor,skin,malignant melanoma,adherent,CRL-1619,ATCC,54,F,-666
1,2995,N06AA01,desipramine,CNCCCN1C2=CC=CC=C2CCC3=CC=CC=C31,CNCCCN1c2ccccc2CCc2ccccc21,BRD-K60762818,CPC004_A375_6H_X2_B3_DUO52HI53LO:E11,CPC004_A375_6H_X2,E11,trt_cp,...,-666,tumor,skin,malignant melanoma,adherent,CRL-1619,ATCC,54,F,-666
2,2995,N06AA01,desipramine,CNCCCN1C2=CC=CC=C2CCC3=CC=CC=C31,CNCCCN1c2ccccc2CCc2ccccc21,BRD-K60762818,CPC004_A375_6H_X3_B3_DUO52HI53LO:E11,CPC004_A375_6H_X3,E11,trt_cp,...,-666,tumor,skin,malignant melanoma,adherent,CRL-1619,ATCC,54,F,-666
3,2995,N06AA01,desipramine,CNCCCN1C2=CC=CC=C2CCC3=CC=CC=C31,CNCCCN1c2ccccc2CCc2ccccc21,BRD-K60762818,CPC017_A375_6H_X4_F1B5_DUO52HI53LO:M02,CPC017_A375_6H_X4,M02,trt_cp,...,-666,tumor,skin,malignant melanoma,adherent,CRL-1619,ATCC,54,F,-666
4,2995,N06AA01,desipramine,CNCCCN1C2=CC=CC=C2CCC3=CC=CC=C31,CNCCCN1c2ccccc2CCc2ccccc21,BRD-K60762818,CPC020_A375_6H_X1_B4_DUO52HI53LO:F21,CPC020_A375_6H_X1,F21,trt_cp,...,-666,tumor,skin,malignant melanoma,adherent,CRL-1619,ATCC,54,F,-666
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4656,19493,N06AF04,tranylcypromine,C1C(C1N)C2=CC=CC=C2,NC1CC1c1ccccc1,BRD-K88809146,NMH002_FIBRNPC_6H_X3_B6_DUO52HI53LO:H08,NMH002_FIBRNPC_6H_X3,H08,trt_cp,...,-666,normal,skin,-666,adherent,-666,-666,-666,-666,-666
4657,127151,N06AX18,reboxetine,CCOC1=CC=CC=C1OC(C2CNCCO2)C3=CC=CC=C3,CCOc1ccccc1OC(c1ccccc1)C1CNCCO1,BRD-K32814891,NMH001_FIBRNPC_6H_X1_B6_DUO52HI53LO:N11,NMH001_FIBRNPC_6H_X1,N11,trt_cp,...,-666,normal,skin,-666,adherent,-666,-666,-666,-666,-666
4658,127151,N06AX18,reboxetine,CCOC1=CC=CC=C1OC(C2CNCCO2)C3=CC=CC=C3,CCOc1ccccc1OC(c1ccccc1)C1CNCCO1,BRD-K32814891,NMH001_FIBRNPC_24H_X1_B6_DUO52HI53LO:N11,NMH001_FIBRNPC_24H_X1,N11,trt_cp,...,-666,normal,skin,-666,adherent,-666,-666,-666,-666,-666
4659,127151,N06AX18,reboxetine,CCOC1=CC=CC=C1OC(C2CNCCO2)C3=CC=CC=C3,CCOc1ccccc1OC(c1ccccc1)C1CNCCO1,BRD-K32814891,NMH001_FIBRNPC_24H_X3_B6_DUO52HI53LO:N11,NMH001_FIBRNPC_24H_X3,N11,trt_cp,...,-666,normal,skin,-666,adherent,-666,-666,-666,-666,-666


# Fase 2 (GSE70138)

In [13]:
# Leer los archivos necesarios para la fase 2

pert_info = pd.read_csv("GSE70138_Broad_LINCS_pert_info.txt", sep="\t")
inst_info = pd.read_csv("GSE70138_Broad_LINCS_inst_info.txt", sep="\t")
cell_info = pd.read_csv("GSE70138_Broad_LINCS_cell_info.txt", sep="\t")

# Emplear la función definida para crear un df con los datos de la fase 2

antidepressants_2 = add_LINCS_info(antidepressants, pert_info, inst_info, cell_info)

display(antidepressants_2)

# Guardar el archivo con los datos de la fase 2

antidepressants_2.to_csv("LINCS_ad_phase2.tsv", sep="\t", index=False)

Unnamed: 0,cid,atc,pert_iname,canonical_smiles,canonical_smiles_stereo,pert_id,inst_id,cell_id,det_plate,det_well,...,modification,sample_type,primary_site,subtype,original_growth_pattern,provider_catalog_id,original_source_vendor,donor_age,donor_sex,donor_ethnicity
0,3696,N06AA02,imipramine,CN(C)CCCN1C2=CC=CC=C2CCC3=CC=CC=C31,CN(C)CCCN1c2ccccc2CCc2ccccc21,BRD-K38436528,REP.A013_A375_24H_X1_B24:E01,A375,REP.A013_A375_24H_X1_B24,E01,...,-666,tumor,skin,malignant melanoma,adherent,CRL-1619,ATCC,54,F,-666
1,3696,N06AA02,imipramine,CN(C)CCCN1C2=CC=CC=C2CCC3=CC=CC=C31,CN(C)CCCN1c2ccccc2CCc2ccccc21,BRD-K38436528,REP.A013_A375_24H_X1_B24:E02,A375,REP.A013_A375_24H_X1_B24,E02,...,-666,tumor,skin,malignant melanoma,adherent,CRL-1619,ATCC,54,F,-666
2,3696,N06AA02,imipramine,CN(C)CCCN1C2=CC=CC=C2CCC3=CC=CC=C31,CN(C)CCCN1c2ccccc2CCc2ccccc21,BRD-K38436528,REP.A013_A375_24H_X1_B24:E03,A375,REP.A013_A375_24H_X1_B24,E03,...,-666,tumor,skin,malignant melanoma,adherent,CRL-1619,ATCC,54,F,-666
3,3696,N06AA02,imipramine,CN(C)CCCN1C2=CC=CC=C2CCC3=CC=CC=C31,CN(C)CCCN1c2ccccc2CCc2ccccc21,BRD-K38436528,REP.A013_A375_24H_X1_B24:E04,A375,REP.A013_A375_24H_X1_B24,E04,...,-666,tumor,skin,malignant melanoma,adherent,CRL-1619,ATCC,54,F,-666
4,3696,N06AA02,imipramine,CN(C)CCCN1C2=CC=CC=C2CCC3=CC=CC=C31,CN(C)CCCN1c2ccccc2CCc2ccccc21,BRD-K38436528,REP.A013_A375_24H_X1_B24:E05,A375,REP.A013_A375_24H_X1_B24,E05,...,-666,tumor,skin,malignant melanoma,adherent,CRL-1619,ATCC,54,F,-666
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4505,19493,N06AF04,tranylcypromine,C1C(C1N)C2=CC=CC=C2,NC1CC1c1ccccc1,BRD-K47029922,LJP008_SKL_24H_X3_B21:N08,SKL,LJP008_SKL_24H_X3_B21,N08,...,-666,primary,muscle,normal primary skeletal muscle cells,adherent,CC-2561,LONZA,-666,-666,-666
4506,19493,N06AF04,tranylcypromine,C1C(C1N)C2=CC=CC=C2,NC1CC1c1ccccc1,BRD-K47029922,LJP008_SKL_24H_X3_B21:N09,SKL,LJP008_SKL_24H_X3_B21,N09,...,-666,primary,muscle,normal primary skeletal muscle cells,adherent,CC-2561,LONZA,-666,-666,-666
4507,19493,N06AF04,tranylcypromine,C1C(C1N)C2=CC=CC=C2,NC1CC1c1ccccc1,BRD-K47029922,LJP008_SKL_24H_X3_B21:N10,SKL,LJP008_SKL_24H_X3_B21,N10,...,-666,primary,muscle,normal primary skeletal muscle cells,adherent,CC-2561,LONZA,-666,-666,-666
4508,19493,N06AF04,tranylcypromine,C1C(C1N)C2=CC=CC=C2,NC1CC1c1ccccc1,BRD-K47029922,LJP008_SKL_24H_X3_B21:N11,SKL,LJP008_SKL_24H_X3_B21,N11,...,-666,primary,muscle,normal primary skeletal muscle cells,adherent,CC-2561,LONZA,-666,-666,-666
