# Import stuff

In [1]:
import tmap as tm
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib.colors import LinearSegmentedColormap

import pandas as pd
from faerun import Faerun

# Load fingerprints and encode them

In [None]:
mh_encoder = tm.Minhash()
lf = tm.LSHForest(256, 128)

In [3]:
fps = pd.read_csv("ibm_rxnfp.csv", header=None)
mhfps = [mh_encoder.from_weight_array(fp, method="I2CWS") for fp in fps.values.tolist()]

# Load data and create groups

Nucleophiles and leaving groups

In [4]:
processed_data = pd.read_csv("processed_data.csv")


In [5]:
nu_symbols = processed_data["nu_symbol"].values
lg_symbols = processed_data["lg_symbol"].values
labels_groups_nu, groups_nu = Faerun.create_categories(nu_symbols)
labels_groups_lg, groups_lg = Faerun.create_categories(lg_symbols)

Reaction smiles labels

In [6]:
reaction_smiles = processed_data["reaction_smiles"].values
labels = reaction_smiles

# Create tree

In [9]:
# slow
lf.batch_add(mhfps)
lf.index()

# Layout
cfg = tm.LayoutConfiguration()
cfg.k = 100
cfg.kc = 100

custom_cm_nu = plt.get_cmap("tab10")
custom_cm_lg = plt.get_cmap("tab10")

# Get tree coordinates
x, y, s, t, _ = tm.layout_from_lsh_forest(lf, config=cfg, create_mst=True)

# Display tree 

In [11]:
f = Faerun(clear_color="#ffffff", coords=False, view="front",)
    
f.add_scatter(
    "SNAr",
    {
        "x": x,
        "y": y,
        "c": [groups_nu, groups_lg],
        "labels": reaction_smiles
    },
    categorical=[True, True],
    legend_labels=[labels_groups_nu, labels_groups_lg],
    colormap=[custom_cm_nu, custom_cm_lg],
    has_legend=True,
    series_title=["Nu symbols", "Lg symbols"],
    point_scale=10,
    shader="smoothCircle",
)
f.add_tree("SNAr_tree", {"from": s, "to": t}, point_helper="SNAr")

f.plot(template="reaction_smiles")

# Save tree 

In [15]:
import pickle

In [19]:
with open("x.pickle", "wb") as file:
    pickle.dump(np.array(x), file)
with open("y.pickle", "wb") as file:
    pickle.dump(np.array(y), file)
with open("s.pickle", "wb") as file:
    pickle.dump(np.array(s), file)
with open("t.pickle", "wb") as file:
    pickle.dump(np.array(t), file)   