## Predicting Docking Success from Molecular Fingerprints

In [None]:
import os
print('Current conda environment:', os.environ['CONDA_DEFAULT_ENV'])

import warnings
warnings.filterwarnings('ignore')

import random
random.seed(42)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, roc_auc_score, classification_report

from rdkit import Chem

sns.set(rc = {'figure.figsize':(15,8)})

In [None]:
df = pd.read_csv('dataframe.csv')

In [None]:
smiles = df['SMILES'].to_numpy()
mols = [Chem.MolFromSmiles(smile) for smile in smiles]
fingerprints = np.array([Chem.RDKFingerprint(mol).ToList() for mol in mols])

In [None]:
X = fingerprints
y = df['Success'].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

In [None]:
rf = RandomForestClassifier(n_estimators=100,
                            criterion='gini',
                            max_features='sqrt',
                            class_weight='balanced',
                            random_state=1984).fit(X_train, y_train)

In [None]:
rf_pred = rf.predict(X_test)

print(classification_report(y_test, rf_pred))