# FFF step 1: scaffold selection

In [None]:
%load_ext autoreload
%autoreload 2
import hippo
from os import environ
from pathlib import Path
import json
from mrich import print
import mrich
import shutil
import pandas as pd

## Get BulkDock HIPPO database

In [None]:
target_name = "ZIKA_NS3_helicase"
merging_hypothesis = "RNA-Cleft"

In [None]:
animal = hippo.HIPPO(target_name, Path(environ["BULK"])/"TARGETS"/target_name/f"{target_name}.sqlite")

In [None]:
merging_hypotheses = json.load(open("merging_hypotheses.json", "rt"))
assert merging_hypothesis in merging_hypotheses

## Enter manual designs

These will be used as scaffolds regardless of any curation

In [None]:
manual_designs = [
    # populate tuples in this format:
    # (smiles: str, alias: str, inspirations: list[str], tags: list[str]),
    ("n2nc1n(nc(Cl)cc1)n2", "8um3_soaked", ["8um3-a"], []),
]

In [None]:
for smiles, alias, inspirations, tags in manual_designs:
    for inspiration in inspirations:
        p = animal.poses[inspiration]
        assert p, f"inspiration not found {inspiration}"
    c = animal.register_compound(smiles=smiles, alias=alias)
    for tag in tags:
        c.add_tag(tag)
    c.summary()

## Parse Fragalysis curation CSVs

In [None]:
curation_csvs = Path("../curation").glob("*.csv")
curation_csvs = list(curation_csvs)
mrich.print(curation_csvs)

## Parse Syndirella manual input from chemistry review

In [None]:
pass

## Select and Tag scaffolds

Create a tag for each merging hypothesis's selected scaffolds. Consider using multiple criteria to select your scaffolds:

- BulkDock export tag / metadata
- Human-curation from Fragalysis
- Human-review of chemistry

In [None]:
# start with all BulkDock exported poses
scaffold_poses = animal.poses.get_by_metadata_substring_match("_fragalysis.sdf")
scaffold_poses

In [None]:
# select the best pose (by RMSD to fragments) for each compound
scaffold_poses = scaffold_poses.compounds.best_placed_poses
scaffold_poses

In [None]:
# manually add some poses
animal.poses(tag="[Other] Covalent to Cys262").interactive()

In [None]:
# veto some poses
scaffold_poses -= 159
scaffold_poses -= 1210
scaffold_poses -= 1386
scaffold_poses -= 1388
scaffold_poses -= 1405

In [None]:
# look at the poses
scaffold_poses.interactive()

In [None]:
# clear and add the scaffold tag
tag = f"{merging_hypothesis} scaffolds"
animal.tags.delete(tag)
scaffold_poses.add_tag(tag)
scaffold_poses.compounds.add_tag(tag)

for record in manual_designs:
    smiles = record[0]
    c = animal.compounds(smiles=smiles)
    c.add_tag(tag)

## Create Syndirella inputs

### Scaffold poses

In [None]:
elab_path = Path(f"../syndirella/{merging_hypothesis}")

In [None]:
df = scaffold_poses.to_syndirella(elab_path, separate=True)

In [None]:
df

### Manual designs

In [None]:
dfs = []
for smiles, alias, inspirations, tags in manual_designs:

    out_key = animal.compounds(smiles=smiles).name
    
    inspirations = animal.poses[inspirations]
    reference = inspirations[0]
    d = dict(
        smiles=smiles,
        template=reference.name,
        compound_set=out_key
    )

    for i, p in enumerate(inspirations):
        d[f"hit{i+1}"] = p.name

    data.append(d)

    df = pd.DataFrame([d])

    csv_name = elab_path / f"{out_key}_syndirella_input.csv"
    mrich.writing(csv_name)
    df.to_csv(csv_name, index=False)

    sdf_name = elab_path / f"{out_key}_syndirella_inspiration_hits.sdf"
    inspirations.write_sdf(
            sdf_name,
            inspirations=False,
            tags=False,
            metadata=False,
            reference=False,
            name_col="name",
    )

    template_dir = elab_path / "templates"
    mrich.writing(template_dir / reference.apo_path.name)
    shutil.copy(reference.apo_path, template_dir / reference.apo_path.name)

    dfs.append(df)

pd.concat(dfs)