# Playing with the data structure

In [1]:
%load_ext autoreload
%autoreload 2

import malt
import torch
import seaborn as sns
from malt.molecule import Molecule

Using backend: pytorch


In [2]:
from pathlib import Path
f = f'{Path.home()}/dev/choderalab/data/data/moonshot_fluorescence_titration_curves.csv'
import pandas as pd
df = pd.read_csv(f, index_col=0).dropna()
df = df.rename({'concentration': 'c', 'inhibition': 'y'}, axis=1)

In [3]:
from dgllife.utils import smiles_to_bigraph, CanonicalAtomFeaturizer, CanonicalBondFeaturizer

def parse_graph(smiles):
    return smiles_to_bigraph(
        smiles = smiles,
        node_featurizer = CanonicalAtomFeaturizer(),
        edge_featurizer = CanonicalBondFeaturizer()
    )

In [11]:
from malt import Dataset, AssayedMolecule
from tqdm import tqdm

molecules = []
for smiles, mol_metadata in tqdm(df.groupby('SMILES')):
    molecule = AssayedMolecule(
        smiles = smiles,
        # g = parse_graph(smiles),
        metadata = {'fluorescence': mol_metadata.drop('SMILES', axis=1).to_dict('records')}
    )
    molecules.append(molecule)
    
# create dataset
data = Dataset(molecules)

100%|████████████████████████████████████████████████████████████████████████████| 2281/2281 [00:01<00:00, 1364.54it/s]


In [6]:
%load_ext autoreload
%autoreload 2
import malt
malt.data.utils.batch_metadata

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


<function malt.data.utils.batch_metadata(molecule, key, **kwargs)>

In [40]:
import copy

copy.deepcopy(data[0])

Brc1ccc(CN2C(=O)C(=O)c3cccc(Br)c23)cc1

In [30]:
dl = data.view(
    batch_size=1,
    batch_metadata=malt.data.utils.batch_assay_metadata
)

In [45]:
data[0]['fluorescence'] = 0

TypeError: 'AssayedMolecule' object does not support item assignment

In [37]:
for d in dl:
    print(len(d[1]))
    break

24


In [None]:
for idx in tqdm(range(len(data))):
    t = torch.Tensor([d[key] for d in data[idx]['fluorescence']])

100%|███████████████████████████████████████████████████████████████████████████| 2281/2281 [00:00<00:00, 81468.80it/s]


In [None]:
mol = molecules[0]
mol['fluorescence']

[{'concentration': 7.72e-07, 'inhibition': -0.0486},
 {'concentration': 3.86e-07, 'inhibition': 0.0359},
 {'concentration': 1.2e-08, 'inhibition': -0.0295},
 {'concentration': 9.9e-08, 'inhibition': 0.0575},
 {'concentration': 0.000198, 'inhibition': -0.256},
 {'concentration': 9.9e-05, 'inhibition': -0.114},
 {'concentration': 4.95e-05, 'inhibition': -0.0913},
 {'concentration': 2.48e-05, 'inhibition': -0.0418},
 {'concentration': 0.000198, 'inhibition': -0.195},
 {'concentration': 1.24e-05, 'inhibition': 0.0501},
 {'concentration': 9.9e-05, 'inhibition': -0.059},
 {'concentration': 7.429999999999999e-06, 'inhibition': 0.127},
 {'concentration': 4.95e-05, 'inhibition': -0.163},
 {'concentration': 1.93e-07, 'inhibition': -0.106},
 {'concentration': 2.48e-05, 'inhibition': -0.0705},
 {'concentration': 1.55e-06, 'inhibition': -0.0493},
 {'concentration': 1.24e-05, 'inhibition': 0.0573},
 {'concentration': 7.72e-07, 'inhibition': 0.0654},
 {'concentration': 7.429999999999999e-06, 'inhibit

In [670]:
titration_curve.to_dict('records')

[{'concentration': 9.95e-05, 'inhibition': 0.7020000000000001},
 {'concentration': 7.4629998207092285e-06, 'inhibition': 0.0881},
 {'concentration': 4.975e-05, 'inhibition': 0.583},
 {'concentration': 2.48799991607666e-06, 'inhibition': 0.0404},
 {'concentration': 2.48799991607666e-05, 'inhibition': 0.358},
 {'concentration': 1.552000045776367e-06, 'inhibition': -0.0335},
 {'concentration': 1.24399995803833e-05, 'inhibition': 0.238},
 {'concentration': 7.760000228881835e-07, 'inhibition': -0.00229},
 {'concentration': 7.4629998207092285e-06, 'inhibition': 0.0596999999999999},
 {'concentration': 3.8800001144409177e-07, 'inhibition': -0.0748},
 {'concentration': 2.48799991607666e-06, 'inhibition': 0.0227999999999999},
 {'concentration': 1.9400000572204589e-07, 'inhibition': -0.0219},
 {'concentration': 1.552000045776367e-06, 'inhibition': -0.0351},
 {'concentration': 1.0000000149011613e-07, 'inhibition': -0.0605999999999999},
 {'concentration': 7.760000228881835e-07, 'inhibition': -0.013

In [669]:
list(map(tuple, titration_curve.values))

[(9.95e-05, 0.7020000000000001),
 (7.4629998207092285e-06, 0.0881),
 (4.975e-05, 0.583),
 (2.48799991607666e-06, 0.0404),
 (2.48799991607666e-05, 0.358),
 (1.552000045776367e-06, -0.0335),
 (1.24399995803833e-05, 0.238),
 (7.760000228881835e-07, -0.00229),
 (7.4629998207092285e-06, 0.0596999999999999),
 (3.8800001144409177e-07, -0.0748),
 (2.48799991607666e-06, 0.0227999999999999),
 (1.9400000572204589e-07, -0.0219),
 (1.552000045776367e-06, -0.0351),
 (1.0000000149011613e-07, -0.0605999999999999),
 (7.760000228881835e-07, -0.0134),
 (5.0000000745058066e-08, -0.0263),
 (3.8800001144409177e-07, -0.0658),
 (1.9400000572204589e-07, -0.034),
 (1.0000000149011613e-07, -0.0836),
 (5.0000000745058066e-08, -0.0286),
 (9.95e-05, 0.617),
 (4.975e-05, 0.432),
 (2.48799991607666e-05, 0.349),
 (1.24399995803833e-05, 0.252),
 (3.8800001144409177e-07, -0.045),
 (1.9400000572204589e-07, -0.048),
 (1.0000000149011613e-07, 0.462),
 (9.95e-05, 1.03),
 (2.489999961853027e-05, 1.01),
 (9.95e-05, 0.919),
 (

In [None]:
)

In [608]:
# m2[4].metadata