In [5]:
import json
from unskript import nbparams
from unskript.fwk.workflow import Task, Workflow
from unskript.secrets import ENV_MODE, ENV_MODE_LOCAL

env = {"ENV_MODE": "ENV_MODE_LOCAL"}
secret_store_cfg = {"SECRET_STORE_TYPE": "SECRET_STORE_TYPE_LOCAL"}

paramDict = {"Bucket": None, "Threshold": float(100), "channel": None, "dirs_to_anaylze": "/home", "instance_id": None, "prefix": "test/", "region": None}
unSkriptOutputParamDict = {}
paramDict.update(env)
paramDict.update(secret_store_cfg)
paramsJson = json.dumps(paramDict)
nbParamsObj = nbparams.NBParams(paramsJson)
Bucket = nbParamsObj.get('Bucket')
Threshold = nbParamsObj.get('Threshold')
channel = nbParamsObj.get('channel')
dirs_to_anaylze = nbParamsObj.get('dirs_to_anaylze')
instance_id = nbParamsObj.get('instance_id')
prefix = nbParamsObj.get('prefix')
region = nbParamsObj.get('region')
w = Workflow(env, secret_store_cfg, None, global_vars=globals())

<center><img src="https://unskript.com/assets/favicon.png" alt="unSkript.com" width="100" height="100">
<h1 id="unSkript-Runbooks">unSkript Runbooks<a class="jp-InternalAnchorLink" href="#unSkript-Runbooks" target="_self">&para;</a></h1>
<div class="alert alert-block alert-success">
<h3 id="Objective"><strong>Objective</strong><a class="jp-InternalAnchorLink" href="#Objective" target="_self">&para;</a></h3>
<strong>Archive large files to S3 to free up EC2 disk space.</strong></div>
</center><center>
<h2 id="EC2-Disk-Cleanup">EC2 Disk Cleanup<a class="jp-InternalAnchorLink" href="#EC2-Disk-Cleanup" target="_self">&para;</a></h2>
</center>
<h1 id="Steps-Overview">Steps Overview<a class="jp-InternalAnchorLink" href="#Steps-Overview" target="_self">&para;</a></h1>
<ol>
<li>Find the IP address of the instance</li>
<li>Find large files in the specified path</li>
<li>Map remote file names to S3 object names</li>
<li>Back up files to S3</li>
<li>Delete files from the instance</li>
<li>Send a message to Slack</li>
</ol>

<h3 id="Get-AWS-Instance-Details">Get AWS Instance Details: Find SSH IP</h3>
<p>Here we will use unSkript&nbsp;<strong>Get AWS Instance Details</strong> action. This action is used to find out all the details of the EC2 instance.</p>
<blockquote>
<p><strong>Input parameters:</strong> <code>instance_id, region</code></p>
</blockquote>
<blockquote>
<p><strong>Output variable:</strong> <code>InstanceDetails</code></p>
</blockquote>

In [3]:
##
# Copyright (c) 2021 unSkript, Inc
# All rights reserved.
##
from pydantic import BaseModel, Field


from beartype import beartype
@beartype
def aws_get_instance_details(
    handle,
    instance_id: str,
    region: str,
):

    ec2client = handle.client('ec2', region_name=region)
    instances = []
    response = ec2client.describe_instances(
        Filters=[{"Name": "instance-id", "Values": [instance_id]}])
    for reservation in response["Reservations"]:
        for instance in reservation["Instances"]:
            instances.append(instance)

    return instances[0]


task = Task(Workflow())
task.configure(printOutput=True)
task.configure(inputParamsJson='''{
    "instance_id": "instance_id",
    "region": "region"
    }''')
task.configure(outputName="InstanceDetails")

(err, hdl, args) = task.validate(vars=vars())
if err is None:
    task.output = task.execute(aws_get_instance_details, hdl=hdl, args=args)
    if task.output_name != None:
        globals().update({task.output_name: task.output[0]})

if hasattr(task, 'output'):
    if isinstance(task.output, (list, tuple)):
        for item in task.output:
            print(f'item: {item}')
    elif isinstance(task.output, dict):
        for item in task.output.items():
            print(f'item: {item}')
    else:
        print(f'Output for {task.name}')
        print(task.output)
    w.tasks[task.name]= task.output
    ssh_ip = InstanceDetails["PrivateIpAddress"]

<h3 id="SSH:-Execute-Remote-Command">SSH: Execute Remote Command: Locate large files with du</h3>
<p>Here we will use unSkript&nbsp;<strong>SSH: Locate large files on host</strong> action. This action is used to scan the file system on a given host and returns a dict of large files. The command used to perform the scan is "find inspect_folder -type f -exec du -sk '{}' + | sort -rh | head -n count.</p>
<blockquote>
<p><strong>Input parameters:</strong> <code>host, inspect_folder,&nbsp;threshold, sudo,&nbsp;count</code></p>
</blockquote>
<blockquote>
<p><strong>Output variable:</strong> <code>FileLocation</code></p>
</blockquote>
<p>&nbsp;</p>

In [30]:
##
# Copyright (c) 2021 unSkript, Inc
# All rights reserved.
##
import json
import tempfile
import os
from pydantic import BaseModel, Field
from pssh.clients import ParallelSSHClient
from typing import List, Optional
from unskript.connectors import ssh

from unskript.fwk.cellparams import CellParams
from unskript import connectors


from beartype import beartype
@beartype
def ssh_find_large_files(
    sshClient,
    host: str,
    inspect_folder: str,
    threshold: int = 0,
    sudo: bool = False,
    count: int = 10) -> dict:
    print(sshClient)

    client = sshClient([host], None)

    # find size in Kb
    command = "find " + inspect_folder + \
        " -type f -exec du -sm '{}' + | sort -rh | head -n " + str(count)
    runCommandOutput = client.run_command(command=command, sudo=sudo)
    client.join()
    res = {}

    for host_output in runCommandOutput:
        for line in host_output.stdout:
            # line is of the form {size} {fullfilename}
            (size, filename) = line.split()
            if int(size) > threshold:
                res[filename] = int(size)

    return res


task = Task(Workflow())
task.configure(printOutput=True)
task.configure(inputParamsJson='''{
    "count": "10",
    "host": "ssh_ip",
    "inspect_folder": "dirs_to_anaylze",
    "sudo": "False",
    "threshold": "int(Threshold)"
    }''')
task.configure(outputName="FileLocation")

(err, hdl, args) = task.validate(vars=vars())
if err is None:
    task.output = task.execute(ssh_find_large_files, hdl=hdl, args=args)
    if task.output_name != None:
        globals().update({task.output_name: task.output[0]})

if hasattr(task, 'output'):
    if isinstance(task.output, (list, tuple)):
        for item in task.output:
            print(f'item: {item}')
    elif isinstance(task.output, dict):
        for item in task.output.items():
            print(f'item: {item}')
    else:
        print(f'Output for {task.name}')
        print(task.output)
    w.tasks[task.name]= task.output

<h3 id="Create-local-filenames-from-remote-filenames">Custom Step: Create local filenames from remote filenames</h3>
<p>This action takes data from step 2 and sorts the output to get the remote files and local files.</p>

In [8]:
remote_files = [x for x in FileLocation.keys()]
if len(remote_files) == 0:
    print("No files to process, exiting")
    if hasattr(Workflow(), "Done"):
        Workflow().Done()

local_files = [ "/tmp/" + x.lstrip("/").replace("/", "_") for x in remote_files ]
mapping = []
for i in range(len(remote_files)):
    mapping.append( {'remote': remote_files[i], 'local': local_files[i]} )
print(json.dumps(mapping, indent=2))


<h3 id="SCP:-Remote-file-transfer-over-SSH">SCP: Remote file transfer over SSH</h3>
<p>Here we will use unSkript&nbsp;<strong>SCP: Remote file transfer over SSH</strong> action. This action is used to Copy files from or to the remote host. Files are copied over SCP.</p>
<blockquote>
<p><strong>Input parameters:</strong> <code>host, remote_file, local_file, direction</code></p>
</blockquote>
<blockquote>
<p><strong>Output variable:</strong> <code>transfer_files</code></p>
</blockquote>

In [None]:
##
# Copyright (c) 2021 unSkript, Inc
# All rights reserved.
##

from pydantic import BaseModel, Field
from gevent import joinall


from beartype import beartype
@beartype
def ssh_scp(
        sshClient,
        host: str,
        remote_file: str,
        local_file: str,
        direction: bool = True) -> bool:

    client = sshClient([host], None)
    copy_args = [{'local_file': local_file, 'remote_file': remote_file}]

    if direction is True:
        cmds = client.copy_remote_file(remote_file=remote_file, local_file=local_file,
                                       recurse=False,
                                       suffix_separator="", copy_args=copy_args,
                                       encoding='utf-8')

    else:
        cmds = client.copy_file(local_file=local_file, remote_file=remote_file,
                                recurse=False, copy_args=None)

    try:
        joinall(cmds, raise_error=True)
        if direction is True:
            print(f"Successfully copied file {host}://{remote_file} to {local_file}")
        else:
            print(f"Successfully copied file {local_file} to {host}://{remote_file}")

    except Exception as e:
        print(f"Error encountered while copying files {e}")
        return False

    return True


task = Task(Workflow())
task.configure(printOutput=True)
task.configure(inputParamsJson='''{
    "direction": "True",
    "host": "ssh_ip",
    "local_file": "iter.get(\\"local\\")",
    "remote_file": "iter.get(\\"remote\\")"
    }''')
task.configure(outputName="transfer_files")

(err, hdl, args) = task.validate(vars=vars())
if err is None:
    task.output = task.execute(ssh_scp, hdl=hdl, args=args)
    if task.output_name != None:
        globals().update({task.output_name: task.output[0]})

if hasattr(task, 'output'):
    if isinstance(task.output, (list, tuple)):
        for item in task.output:
            print(f'item: {item}')
    elif isinstance(task.output, dict):
        for item in task.output.items():
            print(f'item: {item}')
    else:
        print(f'Output for {task.name}')
        print(task.output)
    w.tasks[task.name]= task.output

<h3 id="Upload-file-to-S3">Upload file to S3</h3>
<p>Here we will use the unSkript <strong>Upload file to S3</strong> action. This action is used to Upload a local file to an S3 bucket.</p>
<blockquote>
<p><strong>Input parameters:</strong> <code>bucketName, file, prefix</code></p>
</blockquote>
<blockquote>
<p><strong>Output variable:</strong> <code>upload_output</code></p>
</blockquote>

In [None]:
##
# Copyright (c) 2021 unSkript, Inc
# All rights reserved.
##


from pydantic import BaseModel, Field


from beartype import beartype
@beartype
def aws_upload_file_to_s3(handle, bucketName: str, file: str, prefix: str = ""):

    s3 = handle.client('s3')
    objName = prefix + file.split("/")[-1]
    try:
        with open(file, "rb") as f:
            s3.upload_fileobj(f, bucketName, objName)
    except Exception as e:
        print(f"Error: {e}")
        raise e

    print(f"Successfully copied {file} to bucket:{bucketName} object:{objName}")
    return None


task = Task(Workflow())
task.configure(printOutput=True)
task.configure(inputParamsJson='''{
    "bucketName": "Bucket",
    "file": "iter.get(\\"local\\")",
    "prefix": "prefix or f\\"{instance_id}/{str(datetime.date.today())}/\\""
    }''')

task.configure(iterJson='''{
    "iter_enabled": true,
    "iter_list_is_const": false,
    "iter_list": "mapping",
    "iter_parameter": ""
    }''')
task.configure(outputName="upload_output")

(err, hdl, args) = task.validate(vars=vars())
if err is None:
    task.output = task.execute(aws_upload_file_to_s3, hdl=hdl, args=args)
    if task.output_name != None:
        globals().update({task.output_name: task.output[0]})

if hasattr(task, 'output'):
    if isinstance(task.output, (list, tuple)):
        for item in task.output:
            print(f'item: {item}')
    elif isinstance(task.output, dict):
        for item in task.output.items():
            print(f'item: {item}')
    else:
        print(f'Output for {task.name}')
        print(task.output)
    w.tasks[task.name]= task.output

<h3 id="SSH-Execute-Remote-Command">SSH Execute Remote Command: Remove Files</h3>
<p>Here we will use unSkript&nbsp;<strong>SSH Execute Remote Command</strong> action. This action is used to SSH Execute Remote Commands to remove files.</p>
<blockquote>
<p><strong>Input parameters:</strong> <code>hosts, command, sudo</code></p>
</blockquote>
<blockquote>
<p><strong>Output variable:</strong> <code>remove_output</code></p>
</blockquote>
<p>&nbsp;</p>

In [None]:
##
# Copyright (c) 2021 unSkript, Inc
# All rights reserved.
##
import json
import tempfile
import os
from pydantic import BaseModel, Field
from pssh.clients import ParallelSSHClient
from typing import List, Optional
from unskript.connectors import ssh

from unskript.legos.cellparams import CellParams
from unskript import connectors


from beartype import beartype
@beartype
def ssh_execute_remote_command(sshClient, hosts: List[str], command: str, sudo: bool = False):

    client = sshClient(hosts, None)
    runCommandOutput = client.run_command(command=command, sudo=sudo)
    client.join()
    res = {}

    for host_output in runCommandOutput:
        hostname = host_output.host
        output = []
        for line in host_output.stdout:
            output.append(line)
        res[hostname] = output

        o = "\n".join(output)
        print(f"Output from host {hostname}\n{o}\n")

    return res


task = Task(Workflow())
task.configure(printOutput=True)
task.configure(inputParamsJson='''{
    "command": "\\"rm -v \\" + \\" \\".join(remote_files)",
    "hosts": "[ ssh_ip ]",
    "sudo": "False"
    }''')
task.configure(outputName="remove_output")

(err, hdl, args) = task.validate(vars=vars())
if err is None:
    task.output = task.execute(ssh_execute_remote_command, hdl=hdl, args=args)
    if task.output_name != None:
        globals().update({task.output_name: task.output[0]})

if hasattr(task, 'output'):
    if isinstance(task.output, (list, tuple)):
        for item in task.output:
            print(f'item: {item}')
    elif isinstance(task.output, dict):
        for item in task.output.items():
            print(f'item: {item}')
    else:
        print(f'Output for {task.name}')
        print(task.output)
    w.tasks[task.name]= task.output

<h3 id="Clean-up-local-files">Clean up local files</h3>
<p>This action is an extension of Step 5 where we will clean up the files locally.</p>

In [8]:
from subprocess import PIPE, run

o = run(f"rm -fv {' '.join(local_files)}", stdout=PIPE, stderr=PIPE, universal_newlines=True, shell=True)
print(o.stdout)

<h3 id="Post-Slack-Message">Post Slack Message</h3>
<p>Here we will use unSkript&nbsp;<strong>Post Slack Message</strong> action. This action is used to post the message to the slack channel.</p>
<blockquote>
<p><strong>Input parameters:</strong> <code>channel, message</code></p>
</blockquote>
<blockquote>
<p><strong>Output variable:</strong>&nbsp;<code>slack_status</code></p>
</blockquote>
<p>&nbsp;</p>

In [None]:
##
# Copyright (c) 2021 unSkript, Inc
# All rights reserved.
##

import pprint

from pydantic import BaseModel, Field
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError

pp = pprint.PrettyPrinter(indent=2)


from beartype import beartype
def legoPrinter(func):
    def Printer(*args, **kwargs):
        output = func(*args, **kwargs)
        if output:
            channel = kwargs["channel"]
            pp.pprint(print(f"Message sent to Slack channel {channel}"))
        return output
    return Printer


@legoPrinter
@beartype
def slack_post_message(
        handle: WebClient,
        channel: str,
        message: str) -> bool:

    try:
        response = handle.chat_postMessage(
            channel=channel,
            text=message)
        return True
    except SlackApiError as e:
        print("\n\n")
        pp.pprint(
            f"Failed sending message to slack channel {channel}, Error: {e.response['error']}")
        return False
    except Exception as e:
        print("\n\n")
        pp.pprint(
            f"Failed sending message to slack channel {channel}, Error: {e.__str__()}")
        return False


task = Task(Workflow())
task.configure(printOutput=True)
task.configure(inputParamsJson='''{
    "channel": "channel",
    "message": "f\\"Deleted {len(remote_files)} files from host {ssh_ip}\\""
    }''')
task.configure(outputName="slack_status")

(err, hdl, args) = task.validate(vars=vars())
if err is None:
    task.output = task.execute(slack_post_message, hdl=hdl, args=args)
    if task.output_name != None:
        globals().update({task.output_name: task.output[0]})

<h3 id="Conclusion">Conclusion</h3>
<p>In this Runbook, we demonstrated the use of unSkript's AWS and SSH lego to perform AWS and SSH actions and this runbook locates large files in a given path inside an EC2 instance and backs them up into a given S3 bucket. Afterwards, it deletes the files backed up and sends a message on slack. To view the full platform capabilities of unSkript please visit&nbsp;<a href="https://us.app.unskript.io" target="_blank" rel="noopener">https://us.app.unskript.io</a></p>