In [None]:
import math
import scipy
import numpy as np
import pandas as pd

# Import relevant modSAR classes 
import modSAR
from modSAR.network_algorithms import ModSAR
from modSAR.dataset import QSARDataset, QSARDatasetIO

# plotnine is the python version of ggplot2
from plotnine import *

import warnings
warnings.filterwarnings("ignore")

from rdkit import Chem
from rdkit.Chem import AllChem, Draw

from rdkit import RDLogger

import rdkit.Geometry
from rdkit.Chem import rdFMCS, PandasTools
from rdkit.Chem.Draw import IPythonConsole 
from rdkit.Chem import PandasTools

from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
from sklearn.model_selection import RandomizedSearchCV

# 2.5uM in Log units
CUTOFF_ACTIVITY = - np.log10(2.5e-06)

s4_template = Chem.MolFromSmarts('[#6]1:[#7]:[#6]:[#6]:[#7]2:[#6]:1:[#7]:[#7]:[#6]:2')
AllChem.Compute2DCoords(s4_template)

RDLogger.DisableLog('rdApp.info')

from rdkit.Chem import PandasTools

%matplotlib inline

# Load Data

In [None]:
dataset_morgan2 = \
    QSARDatasetIO.load(dataset_name='OSM4',
                   activity_sheetname='activity',
                   smiles_column='Canonical_Smiles',
                   id_column='OSM_ID',
                   filepath='../data/osm_qsar_dataset_morgan2.xlsx',
                   calculate_similarity=False)
    

    
dataset_morgan2

In [None]:
dataset_morgan4 = \
    QSARDatasetIO.load(dataset_name='OSM4',
                   activity_sheetname='activity',
                   smiles_column='Canonical_Smiles',
                   id_column='OSM_ID',
                   filepath='../data/osm_qsar_dataset_morgan4.xlsx',
                   calculate_similarity=False)
    
dataset_morgan4

## Generate Dataset

In [None]:
import random
import matplotlib.pyplot as plt
import joblib
from sklearn.model_selection import ShuffleSplit
from sklearn import model_selection

In [None]:
# Generate ry
ry = np.array([random.uniform(dataset_morgan2.y.values.min(),dataset_morgan2.y.values.max()) for i in range(dataset_morgan2.y.shape[0])])
# Generate rx
rx = np.zeros(dataset_morgan2.X.shape)
for i in range(rx.shape[0]):
    for j in range(rx.shape[1]):
        rx[i][j] = random.randint(0,1)
rx = pd.DataFrame(rx, columns = dataset_morgan2.X.columns, index = dataset_morgan2.X.index)
ry = pd.DataFrame(ry, columns = dataset_morgan2.y.columns, index = dataset_morgan2.y.index)
# Generate py
py = np.random.permutation(dataset_morgan2.y.values)
py = pd.DataFrame(py, columns = dataset_morgan2.y.columns, index = dataset_morgan2.y.index)
joblib.dump(rx, "/mnt/data/results/y-random/rx.joblib")
joblib.dump(ry, "/mnt/data/results/y-random/ry.joblib")
joblib.dump(py, "/mnt/data/results/y-random/py.joblib")

In [None]:
scoring = ['neg_root_mean_squared_error',"neg_mean_absolute_error", "neg_mean_squared_error", "r2"]