# AWS CloudFortmation to Terraform Resources

In [1]:
from typing import List, Any

import re
import json
import requests
import urllib.parse
import functools

from bs4 import BeautifulSoup
from lxml import etree

In [2]:
def dump_json(filename: str, data: Any):
    with open(filename, "w") as outfile:
        outfile.write(json.dumps(data, indent=2))

## Terraform AWS Provider Docs

[Reference Documentation](https://registry.terraform.io/providers/hashicorp/aws/latest/docs)

In [3]:
AWS_PROVIDER_VERSION = "16565" # terraform-provider-aws v3.63.0
AWS_PROVIDER_DOCS_URL = (
    "https://registry.terraform.io/v2/provider-versions/"
    f"{AWS_PROVIDER_VERSION}?include=provider-docs")

aws_provider_docs = requests.get(AWS_PROVIDER_DOCS_URL).json()

aws_provider_resources = list(filter(
    lambda item: item["attributes"]["category"] == "resources", aws_provider_docs["included"]
))

In [4]:
aws_tf_resources = set(map(lambda item: "aws_" + item["attributes"]["title"], aws_provider_resources))

In [5]:
dump_json("dumps/aws_terraform_provider_docs.json", aws_provider_docs)

In [6]:
dump_json("dumps/aws_tf_resources.json", sorted(aws_tf_resources))

---

## AWS CloudFormation Resources

[Reference Documentation](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-template-resource-type-ref.html)

In [7]:
AWS_REF_URL = ("https://docs.aws.amazon.com/AWSCloudFormation/latest/"
               "UserGuide/aws-template-resource-type-ref.html")
AWS_TOC_URL = "https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/toc-{service}.json"

In [8]:
ref_docs_body = requests.get(AWS_REF_URL).text

In [9]:
soup = BeautifulSoup(ref_docs_body, "html.parser")
dom = etree.HTML(str(soup))

In [10]:
def parse_toc(contents: List[Any]):
    resources = []
    for item in contents:
        if "AWS::" in item.get("title", ""):
            resources.append(item["title"])

        if len(item.get("contents", [])):
            resources.extend(parse_toc(item["contents"]))
    return resources

In [11]:
aws_cloud_formation_resource_docs = {}
aws_cloud_formation_resources = []

for link in dom.xpath('//*[@class="highlights"]/ul/li/a'):
    service = link.get("href").replace("./", "").split(".")[0]
    try:
        resource_toc = requests.get(AWS_TOC_URL.format(service=service)).json()
        aws_cloud_formation_resource_docs[service] = resource_toc
        aws_cloud_formation_resources.extend(parse_toc(resource_toc["contents"]))
    except json.decoder.JSONDecodeError:
        print(f"Unable to parse the following: {service}")

Unable to parse the following: cfn-reference-shared


In [12]:
aws_cloud_formation_resource_docs

{'AWS_ACMPCA': {'contents': [{'title': 'ACM PCA',
    'href': 'AWS_ACMPCA.html',
    'contents': [{'title': 'AWS::ACMPCA::Certificate',
      'href': 'aws-resource-acmpca-certificate.html',
      'contents': [{'title': 'ApiPassthrough',
        'href': 'aws-properties-acmpca-certificate-apipassthrough.html'},
       {'title': 'EdiPartyName',
        'href': 'aws-properties-acmpca-certificate-edipartyname.html'},
       {'title': 'ExtendedKeyUsage',
        'href': 'aws-properties-acmpca-certificate-extendedkeyusage.html'},
       {'title': 'Extensions',
        'href': 'aws-properties-acmpca-certificate-extensions.html'},
       {'title': 'GeneralName',
        'href': 'aws-properties-acmpca-certificate-generalname.html'},
       {'title': 'KeyUsage',
        'href': 'aws-properties-acmpca-certificate-keyusage.html'},
       {'title': 'OtherName',
        'href': 'aws-properties-acmpca-certificate-othername.html'},
       {'title': 'PolicyInformation',
        'href': 'aws-properties-a

In [13]:
dump_json("dumps/aws_cf_resources_docs.json", aws_cloud_formation_resource_docs)

In [14]:
dump_json("dumps/aws_cf_resources.json", aws_cloud_formation_resources)

---

## AWS Service Authorization Reference

In [15]:
AWS_SERVICE_AUTHZ_LIST_URL = "https://docs.aws.amazon.com/service-authorization/latest/reference/reference_policies_actions-resources-contextkeys.html"
AWS_REFERENCES_ROOT_URL = "https://docs.aws.amazon.com/service-authorization/latest/reference/"

In [16]:
service_list_body = requests.get(AWS_SERVICE_AUTHZ_LIST_URL).text

In [17]:
services_soup = BeautifulSoup(service_list_body, "html.parser")
service_list_dom = etree.HTML(str(services_soup))

In [18]:
services_links = {}

for link in service_list_dom.xpath('//*[@class="highlights"]/ul/li/a'):
    services_links[link.text] = urllib.parse.urljoin(AWS_REFERENCES_ROOT_URL, link.get("href"))
    
len(services_links.keys())

299

In [19]:
def parse_service_action(url: str):
    body = requests.get(url).text
    soup = BeautifulSoup(body, "html.parser")
    dom = etree.HTML(str(soup))
    actions = []
    prefix = None
    try:
        service_prefix = dom.xpath('//*[@id="main-col-body"]/p[1]/code')[0]
        prefix = service_prefix.text
    except IndexError:
        prefix = url
    
    for i in range(3,6):
        tables = dom.xpath(f'//*[@id="main-col-body"]/div[{i}]/div/table')
        if len(tables):
            table = tables[0]
            break
    
    rows = table.findall("tr")
    for tr in rows:
        
        tds = tr.findall("td")
        if len(tds) < 5:
            continue
        
        action_td, description_td, *_ = tds
        
        try:
            action_text = action_td.text.strip()
            if len(action_td.text) < 3:
                _, action_link, *_ = action_td.findall("a")
                action_text = action_link.text.strip()
        except (ValueError, AttributeError):
            # individual actions are not critical for my use case.
            # Needs to be tuned in order to have 100% accuracy.
            continue
        
        try:
            tmp = {}
            tmp[action_text] = re.sub(r"\s+", " ", description_td.text.strip().replace("\n", " "))
            actions.append(tmp)
        except AttributeError:
            continue
        
    return prefix, actions
            
# parse_service_action(services_links["Amazon EC2"])

In [20]:
aws_service_authz_mapping = []

for service_name, service_link in  services_links.items():
    print(service_name)
    prefix, actions = parse_service_action(service_link)
    
    aws_service_authz_mapping.append({
        "name": service_name,
        "prefix": prefix,
        "docs": service_link,
        "actions": actions,
    })

AWS Account Management
AWS Activate
Alexa for Business
AmazonMediaImport
AWS Amplify
AWS Amplify Admin
Apache Kafka APIs for Amazon MSK clusters
Amazon API Gateway
Amazon API Gateway Management
Amazon API Gateway Management V2
AWS App Mesh
AWS App Mesh Preview
AWS App Runner
AWS AppConfig
Amazon AppFlow
Amazon AppIntegrations
Application Auto Scaling
AWS Application Cost Profiler Service
Application Discovery
Application Discovery Arsenal
AWS Application Migration Service
Amazon AppStream 2.0
AWS AppSync
AWS Artifact
Amazon Athena
AWS Audit Manager
AWS Auto Scaling
AWSDataSync
AWS Backup
AWS Backup storage
AWS Batch
AWS Billing and Cost Management
Amazon Braket
AWS Budget Service
AWS BugBust
AWS Certificate Manager
AWS Certificate Manager Private Certificate Authority
AWS Chatbot
Amazon Chime
AWS Cloud Control API
Amazon Cloud Directory
AWS Cloud Map
AWS Cloud9
AWS CloudFormation
Amazon CloudFront
AWS CloudHSM
Amazon CloudSearch
AWS CloudShell
AWS CloudTrail
Amazon CloudWatch
CloudWatc

In [21]:
dump_json("dumps/aws_actions_reference.json", aws_service_authz_mapping)

In [22]:

def name_to_prefix(out, item):
    out[item["name"]] = item["prefix"]
    return out
    
action_name_to_prefix_mapping = functools.reduce(name_to_prefix, aws_service_authz_mapping, {})
action_name_to_prefix_mapping

{'AWS Account Management': 'account',
 'AWS Activate': 'activate',
 'Alexa for Business': 'a4b',
 'AmazonMediaImport': 'mediaimport',
 'AWS Amplify': 'amplify',
 'AWS Amplify Admin': 'amplifybackend',
 'Apache Kafka APIs for Amazon MSK clusters': 'kafka-cluster',
 'Amazon API Gateway': 'execute-api',
 'Amazon API Gateway Management': 'apigateway',
 'Amazon API Gateway Management V2': 'apigateway',
 'AWS App Mesh': 'appmesh',
 'AWS App Mesh Preview': 'appmesh-preview',
 'AWS App Runner': 'apprunner',
 'AWS AppConfig': 'appconfig',
 'Amazon AppFlow': 'appflow',
 'Amazon AppIntegrations': 'app-integrations',
 'Application Auto Scaling': 'application-autoscaling',
 'AWS Application Cost Profiler Service': 'application-cost-profiler',
 'Application Discovery': 'discovery',
 'Application Discovery Arsenal': 'arsenal',
 'AWS Application Migration Service': 'mgn',
 'Amazon AppStream 2.0': 'appstream',
 'AWS AppSync': 'appsync',
 'AWS Artifact': 'artifact',
 'Amazon Athena': 'athena',
 'AWS Aud

In [23]:
len(action_name_to_prefix_mapping.keys())

299

In [24]:
dump_json("dumps/aws_actions_name_to_prefix.json", action_name_to_prefix_mapping)