In [1]:
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import ExponentialLR, StepLR
import torch.nn.functional as F

In [2]:
use_cuda = torch.cuda.is_available()

In [3]:
!git clone https://github.com/virenvarma007/drug_repurposing_release
!pip install git+https://github.com/samoturk/mol2vec

Cloning into 'drug_repurposing_release'...
remote: Enumerating objects: 29, done.[K
remote: Counting objects: 100% (29/29), done.[K
remote: Compressing objects: 100% (28/28), done.[K
remote: Total 29 (delta 0), reused 29 (delta 0), pack-reused 0[K
Unpacking objects: 100% (29/29), done.
Collecting git+https://github.com/samoturk/mol2vec
  Cloning https://github.com/samoturk/mol2vec to /tmp/pip-req-build-nxz629px
  Running command git clone -q https://github.com/samoturk/mol2vec /tmp/pip-req-build-nxz629px
Building wheels for collected packages: mol2vec
  Building wheel for mol2vec (setup.py) ... [?25l[?25hdone
  Created wheel for mol2vec: filename=mol2vec-0.1-cp37-none-any.whl size=14028 sha256=4eb9802f75dbd45cd2d42b690a9afe8385f44f7c51e9ba2a7d8e42e5af4cf51a
  Stored in directory: /tmp/pip-ephem-wheel-cache-2s2vfve1/wheels/96/0f/2d/a1092b9677c96453dc244b209544cac61bc8b974cbffb50063
Successfully built mol2vec
Installing collected packages: mol2vec
Successfully installed mol2vec-0.1

In [4]:
import sys
import os
import requests
import subprocess
import shutil
from logging import getLogger, StreamHandler, INFO


logger = getLogger(__name__)
logger.addHandler(StreamHandler())
logger.setLevel(INFO)


def install(
        chunk_size=4096,
        file_name="Miniconda3-latest-Linux-x86_64.sh",
        url_base="https://repo.continuum.io/miniconda/",
        conda_path=os.path.expanduser(os.path.join("~", "miniconda")),
        rdkit_version=None,
        add_python_path=True,
        force=False):
    """install rdkit from miniconda
    ```
    import rdkit_installer
    rdkit_installer.install()
    ```
    """

    python_path = os.path.join(
        conda_path,
        "lib",
        "python{0}.{1}".format(*sys.version_info),
        "site-packages",
    )

    if add_python_path and python_path not in sys.path:
        logger.info("add {} to PYTHONPATH".format(python_path))
        sys.path.append(python_path)

    if os.path.isdir(os.path.join(python_path, "rdkit")):
        logger.info("rdkit is already installed")
        if not force:
            return

        logger.info("force re-install")

    url = url_base + file_name
    python_version = "{0}.{1}.{2}".format(*sys.version_info)

    logger.info("python version: {}".format(python_version))

    if os.path.isdir(conda_path):
        logger.warning("remove current miniconda")
        shutil.rmtree(conda_path)
    elif os.path.isfile(conda_path):
        logger.warning("remove {}".format(conda_path))
        os.remove(conda_path)

    logger.info('fetching installer from {}'.format(url))
    res = requests.get(url, stream=True)
    res.raise_for_status()
    with open(file_name, 'wb') as f:
        for chunk in res.iter_content(chunk_size):
            f.write(chunk)
    logger.info('done')

    logger.info('installing miniconda to {}'.format(conda_path))
    subprocess.check_call(["bash", file_name, "-b", "-p", conda_path])
    logger.info('done')

    logger.info("installing rdkit")
    subprocess.check_call([
        os.path.join(conda_path, "bin", "conda"),
        "install",
        "--yes",
        "-c", "rdkit",
        "python=={}".format(python_version),
        "rdkit" if rdkit_version is None else "rdkit=={}".format(rdkit_version)])
    logger.info("done")

    import rdkit
    logger.info("rdkit-{} installation finished!".format(rdkit.__version__))


if __name__ == "__main__":
    install()

%matplotlib inline
import matplotlib.pyplot as plt
import sys
import os
sys.path.append('/usr/local/lib/python3.7/site-packages/')
!pip install keras-tqdm



add /root/miniconda/lib/python3.7/site-packages to PYTHONPATH
python version: 3.7.10
fetching installer from https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
done
installing miniconda to /root/miniconda
done
installing rdkit
done
rdkit-2020.09.1 installation finished!


Collecting keras-tqdm
  Downloading https://files.pythonhosted.org/packages/16/5c/ac63c65b79a895b8994474de2ad4d5b66ac0796b8903d60cfea3f8308d5c/keras_tqdm-2.0.1-py2.py3-none-any.whl
Installing collected packages: keras-tqdm
Successfully installed keras-tqdm-2.0.1


In [6]:
%cd ./drug_repurposing_release
%cd ./release
import numpy as np
from tqdm import tqdm, trange
import pickle
from rdkit import Chem, DataStructs
from stackRNN import StackAugmentedRNN
from data import GeneratorData
from utils import canonical_smiles

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

/content/drug_repurposing_release
/content/drug_repurposing_release/release


In [9]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [10]:
gen_data_path = '/content/drive/MyDrive/Drug_Repurposing/ReLeaSE-master/ReLeaSE-master/data/chembl_22_clean_1576904_sorted_std_final.smi'

In [11]:
tokens = ['<', '>', '#', '%', ')', '(', '+', '-', '/', '.', '1', '0', '3', '2', '5', '4', '7',
          '6', '9', '8', '=', 'A', '@', 'C', 'B', 'F', 'I', 'H', 'O', 'N', 'P', 'S', '[', ']',
          '\\', 'c', 'e', 'i', 'l', 'o', 'n', 'p', 's', 'r', '\n']
gen_data = GeneratorData(training_data_path=gen_data_path, delimiter='\t', 
                         cols_to_read=[0], keep_header=True, tokens=tokens)

In [12]:

def plot_hist(prediction, n_to_generate):
    print("Mean value of predictions:", prediction.mean())
    print("Proportion of valid SMILES:", len(prediction)/n_to_generate)
    ax = sns.kdeplot(prediction, shade=True)
    ax.set(xlabel='Predicted vina', 
           title='Distribution of predicted vina for generated molecules')
    plt.show()
def estimate_and_update(generator, predictor, n_to_generate, **kwargs):
    generated = []
    pbar = tqdm(range(n_to_generate))
    for i in pbar:
        pbar.set_description("Generating molecules...")
        generated.append(generator.evaluate(gen_data, predict_len=120)[1:-1])

    sanitized = canonical_smiles(generated, sanitize=False, throw_warning=False)[:-1]
    unique_smiles = list(np.unique(sanitized))[1:]
    smiles, prediction, nan_smiles = predictor.predict(unique_smiles, get_features=get_fp)  
                                                       
    plot_hist(prediction, n_to_generate)
        
    return smiles, prediction



In [13]:
hidden_size = 1500
stack_width = 1500
stack_depth = 200
layer_type = 'GRU'
lr = 0.001
optimizer_instance = torch.optim.Adadelta

my_generator = StackAugmentedRNN(input_size=gen_data.n_characters, hidden_size=hidden_size,
                                 output_size=gen_data.n_characters, layer_type=layer_type,
                                 n_layers=1, is_bidirectional=False, has_stack=True,
                                 stack_width=stack_width, stack_depth=stack_depth, 
                                 use_cuda=use_cuda, 
                                 optimizer_instance=optimizer_instance, lr=lr)

#model = torch.load('model/pytorch_resnet50.pth',map_location ='cpu')
#my_generator.torch.load('/content/drive/MyDrive/Drug_Repurposing/checkpoint_biggest_rnn',map_location ='tpu')
!pip install mordred
from data import PredictorData
from utils import get_desc, get_fp
from mordred import Calculator, descriptors
calc = Calculator(descriptors, ignore_3D=True)

Collecting mordred
[?25l  Downloading https://files.pythonhosted.org/packages/93/3d/26c908ece761adafcea06320bf8fe73f4de69979273fb164226dc6038c39/mordred-1.2.0.tar.gz (128kB)
[K     |████████████████████████████████| 133kB 6.4MB/s 
Building wheels for collected packages: mordred
  Building wheel for mordred (setup.py) ... [?25l[?25hdone
  Created wheel for mordred: filename=mordred-1.2.0-cp37-none-any.whl size=176721 sha256=493edd7f6861cc38ef65f935f8cd2327c05c2597069a8b68b7f01f47a23ffa8f
  Stored in directory: /root/.cache/pip/wheels/ac/74/3f/2fd81b1187013f2eadb15620434813f1824c4c03b7bd1f94f6
Successfully built mordred
Installing collected packages: mordred
Successfully installed mordred-1.2.0


In [15]:
pred_data = PredictorData(path='/content/drug_repurposing_release/data/supercompcut.csv', get_features=get_fp)

In [16]:
from predictor import VanillaQSAR
from sklearn.ensemble import RandomForestRegressor as RFR

model_instance = RFR
model_params = {'n_estimators': 500, 'n_jobs': 20}
my_predictor = VanillaQSAR(model_instance=model_instance,
                           model_params=model_params,
                           model_type='regressor')



In [17]:
my_predictor.fit_model(pred_data, cv_split='random')

([0.7839107935383828,
  0.8059966935007675,
  0.7623576399517196,
  0.7845366070365472,
  0.774905047351422],
 'R^2 score')

In [18]:

def get_reward_min(smiles, predictor, invalid_reward=0.0, get_features=get_fp):
    mol, prop, nan_smiles = predictor.predict([smiles], get_features=get_features)
    if len(nan_smiles) == 1:
        return invalid_reward
    return np.exp(-2*prop[0]/3 + 3)

In [19]:
from reinforcement import Reinforcement

my_generator_min = StackAugmentedRNN(input_size=gen_data.n_characters, hidden_size=hidden_size,
                                 output_size=gen_data.n_characters, layer_type=layer_type,
                                 n_layers=1, is_bidirectional=False, has_stack=True,
                                 stack_width=stack_width, stack_depth=stack_depth, 
                                 use_cuda=use_cuda, 
                                 optimizer_instance=optimizer_instance, lr=lr)
model_path = '/content/drive/MyDrive/Drug_Repurposing/checkpoint_biggest_rnn'
weights = torch.load(model_path, map_location ='cpu')
my_generator_min.load_state_dict(weights)

RL_min = Reinforcement(my_generator_min, my_predictor, get_reward_min)

n_to_generate = 200
n_policy_replay = 10
n_policy = 15
n_iterations = 50

def simple_moving_average(previous_values, new_value, ma_window_size=10):
    value_ma = np.sum(previous_values[-(ma_window_size-1):]) + new_value
    value_ma = value_ma/(len(previous_values[-(ma_window_size-1):]) + 1)
    return value_ma

def get_reward_min(smiles, predictor, invalid_reward=0.0, get_features=get_fp):
    mol, prop, nan_smiles = predictor.predict([smiles], get_features=get_features)
    if len(nan_smiles) == 1:
        return invalid_reward
    return np.exp(-2*prop[0]/3 + 3)

In [20]:
smiles_biased_min, prediction_biased_min = estimate_and_update(RL_min.generator, 
                                                           my_predictor,
                                                           n_to_generate=10000)



  return torch.tensor(tensor)
Generating molecules...:   0%|          | 2/10000 [00:12<17:27:59,  6.29s/it]

KeyboardInterrupt: ignored

In [None]:
print(len(smiles_biased_min))
from numpy import save
save('/content/ReLeaSE/output2', smiles_biased_min)

6321
