# SIDER

In [1]:
import numpy as np
import pandas as pd
import deepchem as dc
from sklearn.ensemble import RandomForestClassifier
from deepchem.models.multitask import SingletaskToMultitask
from deepchem import metrics
from deepchem.metrics import Metric
from deepchem.models.sklearn_models import SklearnModel
from deepchem.splits import RandomStratifiedSplitter, RandomSplitter

In [2]:
from deepchem.molnet.load_function.sider_datasets import load_sider

In [41]:
tasks, datasets, transformers = dc.molnet.load_sider(featurizer='ECFP')

In [42]:
train_dataset, valid_dataset, test_dataset = datasets

In [43]:
test_dataset

<DiskDataset X.shape: (143, 1024), y.shape: (143, 27), w.shape: (143, 27), ids: ['C1=CC=C(C=C1)COC(=O)C2=CC=CC=C2'
 'CN1C2CCC1CC(C2)OC(C3=CC=CC=C3)C4=CC=CC=C4'
 'CC(CC1=CC=CC=C1)N(C)CC2=CC=CC=C2' ...
 'CCC12CC(=C)C3C(C1CC[C@]2(C#C)O)CCC4=CC(=O)CCC34'
 'CC[C@]12CC(=C)[C@H]3[C@H]([C@@H]1CC[C@]2(C#C)O)CCC4=CCCC[C@H]34'
 'CC(C)(C)C1=CC(=C(C=C1NC(=O)C2=CNC3=CC=CC=C3C2=O)O)C(C)(C)C'], task_names: ['Hepatobiliary disorders' 'Metabolism and nutrition disorders'
 'Product issues' ... 'Cardiac disorders' 'Nervous system disorders'
 'Injury, poisoning and procedural complications']>

In [44]:
train_dataset

<DiskDataset X.shape: (1141, 1024), y.shape: (1141, 27), w.shape: (1141, 27), task_names: ['Hepatobiliary disorders' 'Metabolism and nutrition disorders'
 'Product issues' ... 'Cardiac disorders' 'Nervous system disorders'
 'Injury, poisoning and procedural complications']>

MultitaskClassifier

In [6]:
model = dc.models.MultitaskClassifier(n_tasks=27, n_features=1024, layer_sizes=[1000])

In [7]:
model.fit(train_dataset, nb_epoch=10)

0.9188870429992676

In [8]:
metric_model = dc.metrics.Metric(dc.metrics.roc_auc_score)

In [9]:
print('train set score:', model.evaluate(train_dataset, [metric_model], transformers))

train set score: {'roc_auc_score': 0.9290745447301076}


In [10]:
print('test set score:', model.evaluate(test_dataset, [metric_model], transformers))

test set score: {'roc_auc_score': 0.6151992763689574}


MultitaskFitTransformRegressor

In [11]:
model = dc.models.MultitaskFitTransformRegressor(n_tasks=27, n_features=1024)

In [12]:
model.fit(train_dataset, nb_epoch=10)

0.37460130055745444

In [13]:
print('train set score:', model.evaluate(train_dataset, [metric_model], transformers))

train set score: {'roc_auc_score': 0.0576700418843663}


In [14]:
print('test set score:', model.evaluate(test_dataset, [metric_model], transformers))

test set score: {'roc_auc_score': 0.3925719621749391}


GraphConvModel

In [15]:
import warnings
warnings.filterwarnings("ignore") 

In [16]:
tasks, datasets, transformers = dc.molnet.load_sider(featurizer=dc.feat.ConvMolFeaturizer())
train_dataset, valid_dataset, test_dataset = datasets

In [17]:
model = dc.models.GraphConvModel(n_tasks=27, mode='classification')
model.fit(train_dataset, nb_epoch=10)

1.0731877326965331

In [18]:
metric = dc.metrics.Metric(dc.metrics.roc_auc_score)
print('Training set score:', model.evaluate(train_dataset, [metric], transformers))
print('Test set score:', model.evaluate(test_dataset, [metric], transformers))

Training set score: {'roc_auc_score': 0.690278254355499}
Test set score: {'roc_auc_score': 0.5840829973639654}


GATmodel

In [28]:
tasks, datasets, transformers = dc.molnet.load_sider(featurizer=dc.feat.MolGraphConvFeaturizer())
train_dataset, valid_dataset, test_dataset = datasets

In [22]:
from deepchem.models import GATModel
model = dc.models.GATModel(mode='classification', n_tasks=27,batch_size=16, learning_rate=0.001)

In [23]:
model.fit(train_dataset, nb_epoch=10)

1.344274444580078

In [24]:
metric = dc.metrics.Metric(dc.metrics.roc_auc_score)
print('Train set score:', model.evaluate(train_dataset, [metric], transformers))
print('Test set score:', model.evaluate(test_dataset, [metric], transformers))

Train set score: {'roc_auc_score': 0.6382897487279217}
Test set score: {'roc_auc_score': 0.5554644454514539}


GCNModel

In [29]:
from deepchem.models import GCNModel
model = dc.models.GCNModel(mode='classification', n_tasks=27,batch_size=16, learning_rate=0.001)

In [30]:
metric = dc.metrics.Metric(dc.metrics.roc_auc_score)
print('Train set score:', model.evaluate(train_dataset, [metric], transformers))
print('Test set score:', model.evaluate(test_dataset, [metric], transformers))

Train set score: {'roc_auc_score': 0.4794227821960575}
Test set score: {'roc_auc_score': 0.4963033158728781}
