# Lab 1.1 ESMFold NIM Playground

In [6]:
import os, requests, time, shutil
from pathlib import Path
# load .env file
from dotenv import load_dotenv
from loguru import logger
load_dotenv()

True

## Helper functions

### Directory setup

In [7]:
def preprare_directory(temp):
    """
    Create a new directory and delete the old one if it exists
    :param temp: str: path to the directory
    """
    if os.path.exists(temp):
        # Remove the directory and all its contents
        shutil.rmtree(temp)
    # Recreate the directory
    os.makedirs(temp)

### Interact with the hosted API endpoint at ESMFold Playground 

ESMFold playground can be accessed [here](https://build.stg.ngc.nvidia.com/meta/esmfold?snippet_tab=Python)


In [12]:
class ESMFoldPlayground:
    def __init__(self, NGC_API_KEY, query_url=None):
        """
        Initialize the ESMFoldPlayground class
        NGC_API_KEY: str, the API key to use
        query_url: str, the url to send the request to, default is the ESMFold NIM endpoint
        """
        self.NGC_API_KEY = NGC_API_KEY
        self.query_url = query_url if query_url is not None else "https://health.api.nvidia.com/v1/biology/nvidia/esmfold"

    
    def predict(self,sequence, output_dir=None, output_file_name="predicted_protein.pdb"):
        """
        Main function to run the molecular docking
        sequence: str, single aa sequence
        output_dir: str, the directory to save the output to. If there are existing contents, it will be deleted and recreated. Defaults to None, and it will not save the output PDB file. 
        output_file_name: str, the name of the output PDB file. Defaults to "predicted_protein.pdb". Only used when output_dir is not None.
        return JSON response
        """

        # prepare output directory
        if output_dir is not None:
            preprare_directory(output_dir)
        
        # prepare data
        data = {
            "sequence": sequence,
        }

        # prepare headers
        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {self.NGC_API_KEY}"
        }
        
        # send request
        response = requests.post(self.query_url, headers=headers, json=data)
        
        # check response
        if response.status_code == 200:
            logger.success("Request successful")
            # save the output PDB file
            result = response.json()
            # Write PDB file
            fp = os.path.join(output_dir, output_file_name)
            with open(fp, "w") as f:
                f.write(result["pdbs"][0])
        else:
            logger.error(f"Request failed with status code {response.status_code}. Output file will not be saved.")
            logger.error("Response:", response.text)
            
        return response.json()

## Try out the hosted API endpoint

### Set up the inputs

In [14]:
# get NGC API key
NGC_API_KEY = os.getenv("NGC_API_KEY")

# source of VHH sequence: sdAb_5763_Ca from SdAb-Db: https://www.sdab-db.ca/?Display&ID=sdAb_5763_Ca
VHH_seq = "QVQLQESGGGLVQAGGSLRLSCAASGTISPLPAMGWYRQAPGKEREFVAGIDTGAITNYADSVKGRFTISRDNAKNTVYLQMNSLKPEDTAVYYCAVFPAAYDYYERYYTYWGQGTQVTVSS"

# output directory
output = "output/esmfold_result"


# initialize the ESMFoldPlayground class
esmfold_playground = ESMFoldPlayground(
    NGC_API_KEY=NGC_API_KEY
)

### Running the prediction

In [15]:
%%time 
# run prediction
result = esmfold_playground.predict(
    sequence=VHH_seq,
    output_dir=output
)


[32m2024-11-28 16:40:22.088[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mpredict[0m:[36m39[0m - [32m[1mRequest successful[0m


CPU times: user 7.9 ms, sys: 5.31 ms, total: 13.2 ms
Wall time: 1.38 s


### Analyze the result

In [17]:
result.keys()

dict_keys(['pdbs'])

In [18]:
# only 1 sequence is allowed each time
assert len(result["pdbs"]) == 1