In [1]:
from typing import Mapping, List, Any

import json
import re

from difflib import SequenceMatcher
from collections import defaultdict

# Analyze Gathered Data

## Load Data

In [2]:
def load(filename: str) -> Mapping[Any, Any]:
    with open(filename, "r") as infile:
        return json.loads(infile.read())
    
def dump(filename: str, data: Any):
    with open(filename, "w") as outfile:
        outfile.write(json.dumps(data, indent=2))

In [3]:
aws_tf_provider_docs = load("dumps/aws_terraform_provider_docs.json")

In [4]:
aws_actions_mapping = load("dumps/aws_actions_name_to_prefix.json")

## Terraform Provider Mapping

In [5]:
tf_mapping = defaultdict(list)

for item in aws_tf_provider_docs["included"]:
    attr = item["attributes"]
    
    if not attr["category"] == "resources":
        continue
    
    tf_mapping[attr["subcategory"]].append("aws_" + attr["title"])
    
len(tf_mapping.keys())

138

## Match Automatically

In [6]:
replace_patterns = [
    ("DocumentDB", "*"*100), # skip
    ("ElasticSearch", "Amazon OpenSearch Service (successor to Amazon Elasticsearch Service)"),
    ("^(:?VPC|EC2)$", 'Amazon EC2'),
    ("SSO Admin", "AWS SSO"),
    ("ACM PCA", "AWS Certificate Manager Private Certificate Authority"),
    ("File System \(FSx\)", "Amazon FSx"),
    ("EventBridge \(CloudWatch Events\)", "Amazon EventBridge"),
    ("Service Discovery","AWS Cloud Map"),
    ("^EFS$", "Amazon Elastic File System"),
    ("^(:?S3 Control|S3 Outposts)$", "Amazon S3 on Outposts"),
    ("Autoscaling Plans", "AWS Auto Scaling"),
    ("^Autoscaling$", "Amazon EC2 Auto Scaling"),
    ("Quantum Ledger Database \(QLDB\)", "Amazon QLDB")
]

# add automatic mapping of three leters services

for name, prefix in aws_actions_mapping.items():
    if len(prefix) == 3:
        replace_patterns.append(
            (f"^{prefix.upper()}$", name)
        )

def match_cf(tf_service_name:str) -> str:
    output = tf_service_name
    for search, replace in replace_patterns:
        output = re.sub(search, replace, output)
    return output


def find_match(search:str, targets: List[str]):
    best_match: str = ""
    best_score: float = 0
    
    for target in targets:
        score = SequenceMatcher(None, search, target).ratio()
        if score > best_score:
            best_score = score
            best_match = target
    return best_score, best_match

In [7]:
tf_to_services_mapping = {}

aws_actions = list(aws_actions_mapping.keys())

for tf_service in tf_mapping.keys():
    score, match = find_match(match_cf(tf_service), aws_actions)
    if score > 0.05:
        tf_to_services_mapping[tf_service] = match

dict(tf_to_services_mapping)

{'SQS': 'Amazon SQS',
 'CodeBuild': 'AWS CodeBuild',
 'Lightsail': 'Amazon Lightsail',
 'Sagemaker': 'Amazon SageMaker',
 'Service Catalog': 'AWS Service Catalog',
 'Security Hub': 'AWS Security Hub',
 'SSM': 'AWS Systems Manager',
 'Neptune': 'Amazon Neptune',
 'OpsWorks': 'AWS OpsWorks',
 'Pinpoint': 'Amazon Pinpoint',
 'Route53 Recovery Control Config': 'Amazon Route 53 Recovery Controls',
 'CodeArtifact': 'AWS CodeArtifact',
 'Storage Gateway': 'Amazon Storage Gateway',
 'API Gateway (REST APIs)': 'Amazon API Gateway',
 'ElastiCache': 'Amazon ElastiCache',
 'Cognito': 'Amazon Cognito Sync',
 'EC2': 'Amazon EC2',
 'GuardDuty': 'Amazon GuardDuty',
 'Elastic Load Balancing (ELB Classic)': 'Elastic Load Balancing',
 'Directory Service': 'AWS Directory Service',
 'IAM': 'Identity And Access Management',
 'WAF Regional': 'AWS WAF Regional',
 'Config': 'AWS Config',
 'Kinesis Video': 'Amazon Kinesis Video Streams',
 'QuickSight': 'Amazon QuickSight',
 'VPC': 'Amazon EC2',
 'RDS': 'Amazon 

## AWS Service Prefix to Terraform Resources

In [8]:
aws_service_prefix_to_tf_resources = defaultdict(list)

for tf_category, resources in tf_mapping.items():
    try:
        aws_service_name = tf_to_services_mapping[tf_category]
        aws_service_prefix = aws_actions_mapping[aws_service_name]
        aws_service_prefix_to_tf_resources[aws_service_prefix].extend(resources)
    except KeyError:
        print(f"Missing TF category: {tf_category}")
aws_service_prefix_to_tf_resources

Missing TF category: DocumentDB


defaultdict(list,
            {'sqs': ['aws_sqs_queue_policy', 'aws_sqs_queue'],
             'codebuild': ['aws_codebuild_webhook',
              'aws_codebuild_source_credential',
              'aws_codebuild_project',
              'aws_codebuild_report_group'],
             'lightsail': ['aws_lightsail_domain',
              'aws_lightsail_instance',
              'aws_lightsail_instance_public_ports',
              'aws_lightsail_key_pair',
              'aws_lightsail_static_ip_attachment',
              'aws_lightsail_static_ip'],
             'sagemaker': ['aws_sagemaker_flow_definition',
              'aws_sagemaker_workforce',
              'aws_sagemaker_endpoint_configuration',
              'aws_sagemaker_app_image_config',
              'aws_sagemaker_workteam',
              'aws_sagemaker_model',
              'aws_sagemaker_model_package_group',
              'aws_sagemaker_notebook_instance_lifecycle_configuration',
              'aws_sagemaker_human_task_ui',
       

In [9]:
dump("dumps/aws_action_to_tf_resource.json", aws_service_prefix_to_tf_resources)