In [2]:
import os
import traceback
import yaml
from pathlib import Path
import pandas as pd

total_count = 0

def parse_provider(config, permissions):
    if 'provider' not in config:
        return

    provider = config['provider']

    if not isinstance(provider, dict):
        return

    if 'iam' in provider:
        if 'role' in provider['iam']:
            role = provider['iam']['role']

            if isinstance(role, str):
                return

            if isinstance(role, dict) and 'statements' in role:
                for stmt in role['statements']:
                    if isinstance(stmt, dict):
                        actions = stmt['Action']
                        if isinstance(actions, list):
                            for action in actions:
                                permissions.add(action)

            if 'managedPolicies' in role:
                managedPolicies = role['managedPolicies']
                if isinstance(managedPolicies, list):
                    for policy in managedPolicies:
                        permissions.add(policy)

    elif 'iamRoleStatements' in provider:
        stmts = provider['iamRoleStatements']
        if isinstance(stmts, list):
            for stmt in stmts:
                if isinstance(stmt, dict):
                    actions = stmt['Action']
                    if isinstance(actions, list):
                        for action in actions:
                            permissions.add(action)

def parse_functions(config, permissions):
    if 'functions' not in config:
        return

    functions = config['functions']

    if isinstance(functions, dict):
        for function_name, function in functions.items():
            if isinstance(function, dict):
                if 'iamRoleStatements' in function:
                    stmts = function['iamRoleStatements']
                    if isinstance(stmts, list):
                        for stmt in stmts:
                            if isinstance(stmt, dict):
                                actions = stmt['Action']
                                if isinstance(actions, list):
                                    for action in actions:
                                        permissions.add(action)
    elif isinstance(functions, list):
        # TODO resolve additional YML
        pass

def parse_roles(config, permissions):
    if 'resources' not in config:
        return

    resources = config['resources']
    if not isinstance(resources, dict):
        return

    if 'Resources' not in resources:
        return

    resources = resources['Resources']
    if not isinstance(resources, dict):
        return

    for resource_name, resource in resources.items():
        if not isinstance(resource, dict):
            continue

        if 'Type' not in resource:
            continue

        if resource['Type'] == 'AWS::IAM::Role':
            if 'Policies' in resource:
                for policy in resource['Policies']:
                    try:
                        stmts = policy['PolicyDocument']['Statement']
                        if isinstance(stmts, list):
                            for stmt in stmts:
                                actions = stmt['Action']
                                if isinstance(actions, list):
                                    for action in actions:
                                        permissions.add(action)
                    except KeyError:
                        pass


def process_project(project_path):
    global total_count
    permissions = set()

    for path in Path(project_path).rglob('*serverless*.y*ml'):
        total_count += 1

        with open(path, 'r') as file:
            try:
                config = yaml.safe_load(file)
                if not isinstance(config, dict):
                    continue

                parse_provider(config, permissions)
                parse_functions(config, permissions)
                parse_roles(config, permissions)
            except yaml.constructor.ConstructorError:
                continue
            except yaml.scanner.ScannerError:
                continue
            except yaml.composer.ComposerError:
                continue
            except yaml.parser.ParserError:
                continue
            except Exception as e:
                print(traceback.format_exc())
                continue

    return permissions

if __name__ == '__main__':
    users = [f.path for f in os.scandir('/data/serverless-dataset') if f.is_dir()]
    projects = []
    for user in users:
        projects += [f.path for f in os.scandir(user) if f.is_dir()]

    # print('{} Projects found.'.format(len(projects)))

    permissions = {}

    for project in projects:
        project_permissions = process_project(project)

        for permission in project_permissions:
            if permission not in permissions:
                permissions[permission] = 0

            permissions[permission] += 1

    # print('{} candidate ymls found.'.format(total_count))

    df = pd.DataFrame(list(permissions.items()), columns=['Permission', 'Count'])

# Count of Permissions Used in Projects

In [6]:
pd.set_option('display.max_rows', 600)
df.sort_values('Count', ascending=False)

Unnamed: 0,Permission,Count
25,dynamodb:PutItem,220
6,dynamodb:GetItem,197
27,dynamodb:Scan,171
2,dynamodb:Query,169
1,dynamodb:UpdateItem,166
19,dynamodb:DeleteItem,160
4,s3:PutObject,96
21,s3:GetObject,91
58,dynamodb:DescribeTable,75
17,lambda:InvokeFunction,75


# Service Permissions Used

In [7]:
tmp = df['Permission'].str.split(':', n=1, expand=True)
tmp = tmp.rename(columns={0: 'Service', 1: 'Action'})
tmp['Service'] = tmp['Service'].str.lower()
tmp['Action'] = tmp['Action'].str.lower()
pd.set_option('display.max_rows', 100)
tmp.groupby('Service').size().sort_values(ascending=False)

Service
s3                         44
dynamodb                   40
ec2                        36
iam                        30
ecs                        23
rds                        21
sns                        20
cloudformation             19
lambda                     18
cognito-idp                14
sqs                        13
iot                        13
kms                        11
cloudwatch                 11
logs                       10
rekognition                10
cloudfront                 10
ses                        10
elasticloadbalancing       10
kinesis                     9
ecr                         8
xray                        8
route53                     7
autoscaling                 7
ssm                         7
states                      7
sagemaker                   6
events                      6
es                          5
firehose                    5
comprehend                  5
elastictranscoder           4
apigateway                  4
co

In [8]:
df[df['Permission'].str.contains('arn:')]#.groupby('Action').size().sort_values(ascending=False)

Unnamed: 0,Permission,Count
467,arn:aws:iam::aws:policy/service-role/AWSLambda...,1
468,arn:aws:iam::aws:policy/service-role/AWSLambda...,1
