In [1]:
from typing import Mapping, List, Any

import json
import re

from difflib import SequenceMatcher
from collections import defaultdict

# Analyze Gathered Data

## Load Data

In [2]:
def load(filename: str) -> Mapping[Any, Any]:
    with open(filename, "r") as infile:
        return json.loads(infile.read())
    
def dump(filename: str, data: Any):
    with open(filename, "w") as outfile:
        outfile.write(json.dumps(data, indent=2))

In [3]:
aws_tf_provider_docs = load("dumps/aws_terraform_provider_docs.json")

In [4]:
aws_actions_mapping = load("dumps/aws_actions_name_to_prefix.json")

## Terraform Provider Mapping

In [5]:
tf_mapping = defaultdict(list)

for item in aws_tf_provider_docs["included"]:
    attr = item["attributes"]
    
    if not attr["category"] == "resources":
        continue
    
    tf_mapping[attr["subcategory"]].append("aws_" + attr["title"])
    
len(tf_mapping.keys())

146

## Match Automatically

In [6]:
replace_patterns = [
    ("DocumentDB", "*"*100), # skip
    ("ElasticSearch", "Amazon OpenSearch Service (successor to Amazon Elasticsearch Service)"),
    ("^(:?VPC|EC2)$", 'Amazon EC2'),
    ("SSO Admin", "AWS SSO"),
    ("ACM PCA", "AWS Certificate Manager Private Certificate Authority"),
    ("File System \(FSx\)", "Amazon FSx"),
    ("EventBridge \(CloudWatch Events\)", "Amazon EventBridge"),
    ("Service Discovery","AWS Cloud Map"),
    ("^EFS$", "Amazon Elastic File System"),
    ("^(:?S3 Control|S3 Outposts)$", "Amazon S3 on Outposts"),
    ("Autoscaling Plans", "AWS Auto Scaling"),
    ("^Autoscaling$", "Amazon EC2 Auto Scaling"),
    ("Quantum Ledger Database \(QLDB\)", "Amazon QLDB")
]

# add automatic mapping of three leters services

for name, prefix in aws_actions_mapping.items():
    if len(prefix) == 3:
        replace_patterns.append(
            (f"^{prefix.upper()}$", name)
        )

def match_cf(tf_service_name:str) -> str:
    output = tf_service_name
    for search, replace in replace_patterns:
        output = re.sub(search, replace, output)
    return output


def find_match(search:str, targets: List[str]):
    best_match: str = ""
    best_score: float = 0
    
    for target in targets:
        score = SequenceMatcher(None, search, target).ratio()
        if score > best_score:
            best_score = score
            best_match = target
    return best_score, best_match

In [7]:
tf_to_services_mapping = {}

aws_actions = list(aws_actions_mapping.keys())

for tf_service in tf_mapping.keys():
    score, match = find_match(match_cf(tf_service), aws_actions)
    if score > 0.05:
        tf_to_services_mapping[tf_service] = match

dict(tf_to_services_mapping)

{'SSM': 'AWS Systems Manager',
 'Timestream Write': 'Amazon Timestream',
 'VPC': 'Amazon EC2',
 'Config': 'AWS Config',
 'Gamelift': 'Amazon GameLift',
 'Image Builder': 'Amazon EC2 Image Builder',
 'EC2': 'Amazon EC2',
 'Pinpoint': 'Amazon Pinpoint',
 'AppMesh': 'AWS App Mesh',
 'CloudFront': 'Amazon CloudFront',
 'ElastiCache': 'Amazon ElastiCache',
 'IAM': 'Identity And Access Management',
 'MQ': 'Amazon MQ',
 'SSO Admin': 'AWS SSO',
 'Connect': 'Amazon Connect',
 'WAF': 'AWS WAF',
 'Route53 Recovery Readiness': 'Amazon Route 53 Recovery Readiness',
 'CodeCommit': 'AWS CodeCommit',
 'Glue': 'AWS Glue',
 'Route53': 'Amazon Route 53',
 'API Gateway (REST APIs)': 'Amazon API Gateway',
 'Database Migration Service (DMS)': 'AWS Database Migration Service',
 'KMS': 'AWS Key Management Service',
 'RAM': 'AWS Resource Access Manager',
 'S3 Control': 'Amazon S3 on Outposts',
 'Sagemaker': 'Amazon SageMaker',
 'Security Hub': 'AWS Security Hub',
 'Service Discovery': 'AWS Cloud Map',
 'SES': 

## AWS Service Prefix to Terraform Resources

In [8]:
aws_service_prefix_to_tf_resources = defaultdict(list)

for tf_category, resources in tf_mapping.items():
    try:
        aws_service_name = tf_to_services_mapping[tf_category]
        aws_service_prefix = aws_actions_mapping[aws_service_name]
        aws_service_prefix_to_tf_resources[aws_service_prefix].extend(resources)
    except KeyError:
        print(f"Missing TF category: {tf_category}")
aws_service_prefix_to_tf_resources

Missing TF category: DocumentDB


defaultdict(list,
            {'ssm': ['aws_ssm_maintenance_window',
              'aws_ssm_maintenance_window_target',
              'aws_ssm_maintenance_window_task',
              'aws_ssm_patch_group',
              'aws_ssm_resource_data_sync',
              'aws_ssm_activation',
              'aws_ssm_document',
              'aws_ssm_association',
              'aws_ssm_parameter',
              'aws_ssm_patch_baseline'],
             'timestream': ['aws_timestreamwrite_database',
              'aws_timestreamwrite_table'],
             'ec2': ['aws_vpc',
              'aws_ec2_managed_prefix_list',
              'aws_default_network_acl',
              'aws_internet_gateway_attachment',
              'aws_vpc_ipam_pool',
              'aws_ec2_subnet_cidr_reservation',
              'aws_internet_gateway',
              'aws_network_acl_rule',
              'aws_vpc_ipam_preview_next_cidr',
              'aws_network_acl_association',
              'aws_vpc_peering_connection_o

In [9]:
dump("dumps/aws_action_to_tf_resource.json", aws_service_prefix_to_tf_resources)