In [8]:
import boto3
import subprocess
import shlex
import json

def run_command(cmd):
    args = shlex.split(cmd)
    p = subprocess.run(args, capture_output=True)
    
    # parse in, check whether ret code is wrong -> throw exception
    if 0 != p.returncode:
        raise Exception('Process failed: {}'.format(p.stderr.decode()))
        
    # return stdout
    return p.stdout.decode()

def run_awscli(cmd):
    return json.loads(run_command(cmd))

In [9]:
run_awscli('aws emr-serverless list-applications')

{'applications': [{'id': '00eublscuhvnu609',
   'name': 'emr-zillow',
   'arn': 'arn:aws:emr-serverless:us-east-1:587583095482:/applications/00eublscuhvnu609',
   'releaseLabel': 'emr-6.5.0-preview',
   'type': 'SPARK',
   'state': 'CREATED',
   'stateDetails': '',
   'createdAt': 1643650052.768,
   'updatedAt': 1643650053.761}]}

https://docs.aws.amazon.com/emr/latest/EMR-Serverless-UserGuide/getting-started.html

In [10]:
# create bucket

```
aws emr-serverless start-job-run \
    --application-id <application_id> \
    --execution-role-arn <execution_role_arn> \
    --job-driver '{
        "sparkSubmit": {
            "entryPoint": "s3://us-east-1.elasticmapreduce/emr-containers/samples/wordcount/scripts/wordcount.py",
            "entryPointArguments": ["s3://DOC-EXAMPLE-BUCKET/output"],
            "sparkSubmitParameters": "--conf spark.executor.cores=1 --conf spark.executor.memory=4g --conf spark.driver.cores=1 --conf spark.driver.memory=4g --conf spark.executor.instances=1"
        }
    }' \
    --configuration-overrides '{
        "monitoringConfiguration": {
           "s3MonitoringConfiguration": {
             "logUri": "s3://DOC-EXAMPLE-BUCKET/logs"
           }
        }
    }'
```
    

```
aws emr-serverless get-job-run \
--application-id <application_id> \
--job-run-id <job_run_id>
```

In [12]:
s3 = boto3.client('s3')

In [13]:
s3.list_buckets()

{'ResponseMetadata': {'RequestId': 'N1Y4MANRQ0EYP0GB',
  'HostId': 'We4oYC5JWNlfo+Cd5c9QqgcFi00ONPPF0hmXLRX367w9xGyvGQ6uo9Z57PeVsVe2c9ZL4xcCxMU=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'We4oYC5JWNlfo+Cd5c9QqgcFi00ONPPF0hmXLRX367w9xGyvGQ6uo9Z57PeVsVe2c9ZL4xcCxMU=',
   'x-amz-request-id': 'N1Y4MANRQ0EYP0GB',
   'date': 'Mon, 31 Jan 2022 20:34:40 GMT',
   'content-type': 'application/xml',
   'transfer-encoding': 'chunked',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'Buckets': [{'Name': 'aws-deepracer-3f4fbafa-e09c-412c-8491-baeb4b0bffb7',
   'CreationDate': datetime.datetime(2020, 3, 2, 22, 55, 55, tzinfo=tzutc())},
  {'Name': 'bbsn00',
   'CreationDate': datetime.datetime(2020, 3, 16, 14, 12, 58, tzinfo=tzutc())},
  {'Name': 'bmwcpo',
   'CreationDate': datetime.datetime(2019, 3, 4, 17, 32, 42, tzinfo=tzutc())},
  {'Name': 'pywren-leonhard',
   'CreationDate': datetime.datetime(2021, 11, 22, 19, 54, 24, tzinfo=tzutc())},
  {'Name': 'results-leonhard',
   'Creat

In [18]:
emr_bucket = 'serverless-emr'

In [25]:
!aws emr-serverless start-application --application-id '00eublscuhvnu609'

# then need to wait till application is started
!aws emr-serverless get-application --application-id '00eublscuhvnu609'

usage: Note: AWS CLI version 2, the latest major version of the AWS CLI, is now stable and recommended for general use. For more information, see the AWS CLI version 2 installation instructions at: https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html

usage: aws [options] <command> <subcommand> [<subcommand> ...] [parameters]
To see help text, you can run:

  aws help
  aws <command> help
  aws <command> <subcommand> help
aws: error: the following arguments are required: --application-id


In [26]:
# start of application takes a while...
aws emr-serverless get-application --application-id '00eublscuhvnu609' | jq '."application"."state"'

SyntaxError: invalid syntax (<ipython-input-26-8a738b112381>, line 2)

In [23]:
!aws emr-serverless start-job-run \
    --application-id '00eublscuhvnu609' \
    --execution-role-arn 'arn:aws:iam::587583095482:role/emrExecutionRole' \
    --job-driver '{
        "sparkSubmit": {
            "entryPoint": "s3://us-east-1.elasticmapreduce/emr-containers/samples/wordcount/scripts/wordcount.py",
            "entryPointArguments": ["s3://serverless-emr/wordcount/output"],
            "sparkSubmitParameters": "--conf spark.executor.cores=1 --conf spark.executor.memory=4g --conf spark.driver.cores=1 --conf spark.driver.memory=4g --conf spark.executor.instances=1"
        }
    }' \
    --configuration-overrides '{
        "monitoringConfiguration": {
           "s3MonitoringConfiguration": {
             "logUri": "s3://serverless-emr/wordcount/logs"
           }
        }
    }'

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 7)

In [27]:
"00eubpcad7ceub01"

'00eubpcad7ceub01'

In [29]:
!aws emr-serverless get-job-run \
    --application-id '00eublscuhvnu609' \
    --job-run-id "00eubpcad7ceub01"

{
    "jobRun": {
        "applicationId": "00eublscuhvnu609",
        "jobRunId": "00eubpcad7ceub01",
        "arn": "arn:aws:emr-serverless:us-east-1:587583095482:/applications/00eublscuhvnu609/jobruns/00eubpcad7ceub01",
        "createdBy": "arn:aws:iam::587583095482:user/Leonhard",
        "createdAt": 1643661788.49,
        "updatedAt": 1643661794.339,
        "executionRole": "arn:aws:iam::587583095482:role/emrExecutionRole",
        "state": "FAILED",
        "stateDetails": "",
        "releaseLabel": "emr-6.5.0-preview",
        "configurationOverrides": {
            "monitoringConfiguration": {
                "s3MonitoringConfiguration": {
                    "logUri": "s3://serverless-emr/wordcount/logs"
                }
            }
        },
        "jobDriver": {
            "sparkSubmit": {
                "entryPoint": "s3://us-east-1.elasticmapreduce/emr-containers/samples/wordcount/scripts/wordcount.py",
                "entryPointArguments"

In [21]:
!aws iam get-role --role-name emrExecutionRole | jq -r '."Role"."Arn"'

arn:aws:iam::587583095482:role/emrExecutionRole
