# CMR Resource Configurator

In [None]:
from setvar import *

In [None]:
import re, os, sys

In [None]:
setvar("PATH=/usr/local/cli/bin:/usr/bin:/bin:/usr/sbin:/usr/local/sbin:/sbin")

## Step 1: Select your tenant

In [None]:
!tenants-list

In [None]:
!tenants-init -t tacc.prod

## Step 2: Set your Agave Username and Password

In [None]:
setvar("AGAVE_USERNAME=tg457049")

In [None]:
#os.remove("AGAVE_PASSWD.txt")
readpass("AGAVE_PASSWD")

## Step 3: Provide Information about the Resource
Here you should supply the fully qualified domain name
of the machine, the port it will be accessed on, the
user that will be accessing the machine, etc.

In [None]:
setvar("""
MACHINE_USERNAME=funwave
MACHINE_FULL=shelob.hpc.lsu.edu
PORT=22
ALLOCATION=hpc_startup_funwave
BASE_APP_NAME=crcollaboratory
WORK_DIR=/work/${MACHINE_USERNAME}
HOME_DIR=/home/${MACHINE_USERNAME}
SCRATCH_DIR=/work/${MACHINE_USERNAME}
DEPLOYMENT_PATH=agave-deployment
AGAVE_JSON_PARSER=jq
""")

## Step 4: Provide the login credentials

In [None]:
#os.remove("MACHINE_PASSWD.txt")
readpass("MACHINE_PASSWD")

In [None]:
# Parse out the name of the machine from its domain
g = re.match(r'(\w+)\.(.*)',os.environ["MACHINE_FULL"])
setvar("""
MACHINE={}
DOMAIN={}
""".format(g.group(1),g.group(2)))

In [None]:
setvar("""
BASE_APP_NAME2=${BASE_APP_NAME}-${MACHINE}-${AGAVE_USERNAME}
STORAGE_MACHINE=${MACHINE}-storage-${AGAVE_USERNAME}
EXEC_MACHINE=${MACHINE}-exec-${AGAVE_USERNAME}
""")

In [None]:
!clients-delete -u $AGAVE_USERNAME -p $AGAVE_PASSWD $BASE_APP_NAME2
!clients-create -p $AGAVE_PASSWD -S -N $BASE_APP_NAME2 -u $AGAVE_USERNAME
!auth-tokens-create -u $AGAVE_USERNAME -p $AGAVE_PASSWD

In [None]:
writefile("${STORAGE_MACHINE}.txt","""{
    "id": "${STORAGE_MACHINE}",
    "name": "${MACHINE} storage (${MACHINE_USERNAME})",
    "description": "The ${MACHINE} computer",
    "site": "${DOMAIN}",
    "type": "STORAGE",
    "storage": {
       "host": "${MACHINE_FULL}",
       "port": ${PORT},
       "protocol": "SFTP",
       "rootDir": "/",
       "homeDir": "${HOME_DIR}",
       "auth": {
           "username" : "${MACHINE_USERNAME}",
           "password" : "${MACHINE_PASSWD}",
           "type" : "PASSWORD"
        }
    }
}
""")

In [None]:
!systems-addupdate -F ${STORAGE_MACHINE}.txt

### List files on the STORAGE_MACHINE to ensure access is correctly configured.

In [None]:
!files-list -S ${STORAGE_MACHINE} ./ | head -5

## Step 5: More Machine Details
In order to properly use the execution machine, you will
need to provide agave with details about its queuing system
as well as the default queue that jobs will be run in. Please
edit the next section carefully.

In [None]:
# Configure information about the machine/queue
setvar("""
QUEUE=checkpt
MAX_TIME=72:00:00
NODES=128
PROCS=16
MAX_JOBS=30
""")

os.environ["DIRECTIVES"]=re.sub("\n\\s*",r"\\n","""
#PBS -A ${ALLOCATION}
#PBS -l cput=\${AGAVE_JOB_MAX_RUNTIME}
#PBS -l walltime=\${AGAVE_JOB_MAX_RUNTIME}
#PBS -q \${AGAVE_JOB_BATCH_QUEUE}
#PBS -l nodes=\${AGAVE_JOB_NODE_COUNT}:ppn=16
""".strip())    
writefile("${EXEC_MACHINE}.txt","""
{
    "id": "${EXEC_MACHINE}",
    "name": "${MACHINE} (${MACHINE_USERNAME})",
    "description": "The ${MACHINE} computer",
    "site": "${DOMAIN}",
    "public": false,
    "status": "UP",
    "type": "EXECUTION",
    "executionType": "HPC",
    "scheduler" : "CUSTOM_TORQUE",
    "environment": null,
    "scratchDir" : "${SCRATCH_DIR}",
    "queues": [
       {
            "customDirectives" : "${DIRECTIVES}",
            "name": "${QUEUE}",
            "default": true,
            "maxJobs": ${MAX_JOBS},
            "maxUserJobs": ${MAX_JOBS},
            "maxNodes": ${NODES},
            "maxProcessorsPerNode": ${PROCS},
            "minProcessorsPerNode": 1,
            "maxRequestedTime": "${MAX_TIME}"
        }
    ],
    "login": {
        "auth": {
         "username" : "${MACHINE_USERNAME}",
         "password" : "${MACHINE_PASSWD}",
         "type" : "PASSWORD"
        },
        "host": "${MACHINE_FULL}",
        "port": ${PORT},
        "protocol": "SSH"
    },
    "maxSystemJobs": 50,
    "maxSystemJobsPerUser": 50,
    "storage": {
        "host": "${MACHINE_FULL}",
        "port": ${PORT},
        "protocol": "SFTP",
        "rootDir": "/",
        "homeDir": "${HOME_DIR}",
        "auth": {
         "username" : "${MACHINE_USERNAME}",
         "password" : "${MACHINE_PASSWD}",
         "type" : "PASSWORD"
        }
     }
    },
    "workDir": "${WORK_DIR}"
}""")                        

!systems-addupdate -F ${EXEC_MACHINE}.txt

### List files on the EXEC_MACHINE to ensure access is correctly configured.

In [None]:
!files-list -S ${EXEC_MACHINE} ./ | head -5

### Create the batch script used to run jobs. This should not need editing.

In [None]:
writefile("${BASE_APP_NAME2}-wrapper.txt","""
#!/bin/bash
handle_trap() {
  rc=\$?
  if [ "\$rc" != 0 ]
  then
    \$(\${AGAVE_JOB_CALLBACK_FAILURE})
  fi
}

trap 'handle_trap' ERR EXIT
echo 'running \${simagename} model'
# Setting the x flag will echo every
# command onto stderr. This is
# for debugging, so we can see what's
# going on.
set -x
set -e
echo ==PWD=============
# We also print out the execution
# directory. Again, for debugging purposes.
pwd
echo ==JOB=============

if [ "\${PBS_NODEFILE}" = "" ]
then
 # When running on a system managed by Torque
 # this variable should be set. If it's not,
 # that's a problem.
 echo "The PBS_NODEFILE was not set"
 exit 2
fi

# By default, the PBS_NODEFILE lists nodes multiple
# times, once for each MPI process that should run
# there. We only want one MPI process per node, so
# we create a new file with "sort -u".
LOCAL_NODEFILE=nodefile.txt
sort -u < \${PBS_NODEFILE} > \${LOCAL_NODEFILE}
PROCS=\$(wc -l < \${LOCAL_NODEFILE})

if [ "\${PROCS}" = "" ]
then
 echo "PROCS was not set"
 exit 3
fi

# Prepare the nodes to run the image
export SING_OPTS="--bind \$PWD:/workdir \$SING_OPTS"
for host in \$(cat nodefile.txt)
do
    hostfile="\$HOME/.bash.\${host}.sh"
    echo "export SING_IMAGE=/project/sbrandt/chemora/images/\${simagename}.simg" > \$hostfile
    echo "export SING_OPTS='\$SING_OPTS'" >> \$hostfile
done

# Create a nodefile that matches our choices at submit time
touch nodes.txt
for i in \$(seq 1 \${AGAVE_JOB_PROCESSORS_PER_NODE})
do
    cat nodefile.txt >> nodes.txt
done

export NP=\$(wc -l < nodes.txt)

tar xzvf input.tgz

mkdir -p output

/project/singularity/bin/singularity exec \$SING_OPTS /project/sbrandt/chemora/images/\${simagename}.simg bash /usr/local/bin/runapp.sh
mv input/* output/
rm -f output/PRINT*
tar cvzf output.tar.gz output
""")

!files-mkdir -S ${STORAGE_MACHINE} -N ${DEPLOYMENT_PATH}
!files-mkdir -S ${STORAGE_MACHINE} -N inputs
!files-upload -F ${BASE_APP_NAME2}-wrapper.txt -S ${STORAGE_MACHINE} ${DEPLOYMENT_PATH}/

writefile("test.txt","""
parfile="input.txt"
${BASE_APP_NAME2}-wrapper.txt
""")

!files-mkdir -S ${STORAGE_MACHINE} -N ${DEPLOYMENT_PATH}
!files-upload -F test.txt -S ${STORAGE_MACHINE} ${DEPLOYMENT_PATH}/

writefile("${BASE_APP_NAME2}.txt","""
{  
    "name":"${BASE_APP_NAME2}",
    "version":"2.0",
    "label":"${BASE_APP_NAME2}",
    "shortDescription":"Run ISAAC app",
    "longDescription":"",
    "deploymentSystem":"${STORAGE_MACHINE}",
    "deploymentPath":"${DEPLOYMENT_PATH}",
    "templatePath":"${BASE_APP_NAME2}-wrapper.txt",
    "testPath":"test.txt",
    "executionSystem":"${EXEC_MACHINE}",
    "executionType":"HPC",
    "parallelism":"PARALLEL",
    "allocation":"${ALLOCATION}",
    "modules":[],
    "inputs":[
        {   
        "id":"input tarball",
        "details":{  
            "label":"input tarball",
            "description":"",
            "argument":null,
            "showArgument":false
        },
        "value":{  
            "default":"",
            "order":0,
            "required":false,
            "validator":"",
            "visible":true
        }
    }   

],
"parameters":[
{
  "id": "simagename",
  "value": {
    "visible": true,
    "required": false,
    "type": "string",
    "order": 0,
    "enquote": false,
    "default": "python",
    "validator": null
  },
  "details": {
    "label": "Singularity Image",
    "description": "The Singularity image to run: swan, funwave",
    "argument": null,
    "showArgument": false,
    "repeatArgument": false
  },
  "semantics": {
    "minCardinality": 0,
    "maxCardinality": 1,
    "ontology": []
  }
}
],
"outputs":[  
    {  
        "id":"Output",
        "details":{  
            "description":"The output",
            "label":"tables"
        },
        "value":{  
            "default":"",
            "validator":""
        }
    }
  ]
}
""")


!apps-addupdate -F ${BASE_APP_NAME2}.txt

setvar("APP_NAME=${BASE_APP_NAME2}-2.0")

print ("Successfully configured Agave")

In [None]:
import json, os
from command import cmd
metadata = {"name":os.environ["EXEC_MACHINE"]+"::queue","value":os.environ["QUEUE"]}
c = cmd(["metadata-list","-Q",'{"name":"${EXEC_MACHINE}::queue"}'])
found = False
for k in c["stdout"]:
    ks = k.strip()
    if ks != '':
        cmd(["metadata-addupdate","-F","-",ks],inputs=json.dumps(metadata))
        found = True
        break
if not found:
    cmd(["metadata-addupdate","-F","-"],inputs=json.dumps(metadata))

## Step 6: Grant Access
By default, no one will be able to use the resource you configure unless you specifically grant them access.
The following two functions can be used to grant or revoke access to an Agave user.

In [None]:
# The following two commands enable you to grant or revoke the ability
# to use a given application to a given user
from command import cmd
def grant_user(user):
    cmd("apps-pems-update -u {u} -p READ_EXECUTE $APP_NAME".format(u=user))
    cmd("systems-roles-addupdate -r USER -u {u} $STORAGE_MACHINE".format(u=user))
    cmd("systems-roles-addupdate -r USER -u {u} $EXEC_MACHINE".format(u=user))
    c = cmd(["metadata-list","-Q",'{"name":"${EXEC_MACHINE}::queue"}'])
    for line in c["stdout"]:
        line = line.strip()
        if line != '':
            cmd(["metadata-pems-addupdate","-u",user,"-p","READ",line])
def revoke_user(user):
    cmd("apps-pems-update -u {u} -p NONE $APP_NAME".format(u=user))
    cmd("systems-roles-addupdate -r NONE -u {u} $STORAGE_MACHINE".format(u=user))
    cmd("systems-roles-addupdate -r NONE -u {u} $EXEC_MACHINE".format(u=user))
    c = cmd(["metadata-list","-Q",'{"name":"${EXEC_MACHINE}::queue"}'])
    for line in c["stdout"]:
        line = line.strip()
        if line != '':
            cmd(["metadata-pems-addupdate","-u",user,"-p","NONE",line])

In [None]:
!auth-tokens-refresh

In [None]:
grant_user("nanw")