In [1]:
from typing import Mapping, List, Any

import json
import re

from difflib import SequenceMatcher
from collections import defaultdict

# Analyze Gathered Data

## Load Data

In [2]:
def load(filename: str) -> Mapping[Any, Any]:
    with open(filename, "r") as infile:
        return json.loads(infile.read())
    
def dump(filename: str, data: Any):
    with open(filename, "w") as outfile:
        outfile.write(json.dumps(data, indent=2))

In [3]:
aws_tf_provider_docs = load("dumps/aws_terraform_provider_docs.json")

In [4]:
aws_actions_mapping = load("dumps/aws_actions_name_to_prefix.json")

## Terraform Provider Mapping

In [6]:
tf_mapping = defaultdict(list)

for item in aws_tf_provider_docs["included"]:
    attr = item["attributes"]
    
    if not attr["category"] == "resources":
        continue
    
    tf_mapping[attr["subcategory"]].append("aws_" + attr["title"])
    
len(tf_mapping.keys())

146

## Match Automatically

In [9]:
replace_patterns = [
    ("DocumentDB", "*"*100), # skip
    ("Elasticsearch", "Amazon OpenSearch Service (successor to Amazon Elasticsearch Service)"),
    ("^(:?VPC|EC2)$", 'Amazon EC2'),
    ("SSO Admin", "AWS SSO"),
    ("ACM PCA", "AWS Certificate Manager Private Certificate Authority"),
    ("File System \(FSx\)", "Amazon FSx"),
    ("EventBridge \(CloudWatch Events\)", "Amazon EventBridge"),
    ("Service Discovery","AWS Cloud Map"),
    ("^EFS$", "Amazon Elastic File System"),
    ("^(:?S3 Control|S3 Outposts)$", "Amazon S3 on Outposts"),
    ("Autoscaling Plans", "AWS Auto Scaling"),
    ("^Autoscaling$", "Amazon EC2 Auto Scaling"),
    ("Quantum Ledger Database \(QLDB\)", "Amazon QLDB")
]

# add automatic mapping of three leters services

for name, prefix in aws_actions_mapping.items():
    if len(prefix) == 3:
        replace_patterns.append(
            (f"^{prefix.upper()}$", name)
        )

def match_cf(tf_service_name:str) -> str:
    output = tf_service_name
    for search, replace in replace_patterns:
        output = re.sub(search, replace, output)
    return output


def find_match(search:str, targets: List[str]):
    best_match: str = ""
    best_score: float = 0
    
    for target in targets:
        score = SequenceMatcher(None, search, target).ratio()
        if score > best_score:
            best_score = score
            best_match = target
    return best_score, best_match

In [12]:
tf_to_services_mapping = {}

aws_actions = list(aws_actions_mapping.keys())

for tf_service in tf_mapping.keys():
    score, match = find_match(match_cf(tf_service), aws_actions)
    if score > 0.05:
        tf_to_services_mapping[tf_service] = match

tf_to_services_mapping.keys()

dict_keys(['SSM', 'Timestream Write', 'VPC', 'Config', 'Gamelift', 'Image Builder', 'EC2', 'Pinpoint', 'AppMesh', 'CloudFront', 'ElastiCache', 'IAM', 'MQ', 'SSO Admin', 'Connect', 'WAF', 'Route53 Recovery Readiness', 'CodeCommit', 'Glue', 'Route53', 'API Gateway (REST APIs)', 'Database Migration Service (DMS)', 'KMS', 'RAM', 'S3 Control', 'Sagemaker', 'Security Hub', 'Service Discovery', 'SES', 'Lambda', 'Lex', 'Service Catalog', 'File System (FSx)', 'EFS', 'Elastic Load Balancing v2 (ALB/NLB)', 'MemoryDB', 'Step Function (SFN)', 'RDS', 'EventBridge (CloudWatch Events)', 'Route53 Resolver', 'Chime', 'CloudSearch', 'OpsWorks', 'Backup', 'DataPipeline', 'Neptune', 'Athena', 'Cognito', 'Elastic Map Reduce (EMR)', 'ECS', 'DataSync', 'Direct Connect', 'S3', 'Device Farm', 'CodePipeline', 'Global Accelerator', 'AppStream', 'Cost and Usage Report', 'API Gateway v2 (WebSocket and HTTP APIs)', 'CloudWatch', 'Kinesis Firehose', 'Storage Gateway', 'Organizations', 'Kinesis', 'Redshift', 'Resource

## AWS Service Prefix to Terraform Resources

In [16]:
aws_service_prefix_to_tf_resources = defaultdict(list)

for tf_category, resources in tf_mapping.items():
    try:
        aws_service_name = tf_to_services_mapping[tf_category]
        aws_service_prefix = aws_actions_mapping[aws_service_name]
        aws_service_prefix_to_tf_resources[aws_service_prefix].extend(resources)
    except KeyError:
        print(f"Missing TF category: {tf_category}")

aws_service_prefix_to_tf_resources.keys()

Missing TF category: DocumentDB


dict_keys(['ssm', 'timestream', 'ec2', 'config', 'gamelift', 'imagebuilder', 'mobiletargeting', 'appmesh', 'cloudfront', 'elasticache', 'iam', 'mq', 'sso', 'connect', 'waf', 'route53-recovery-readiness', 'codecommit', 'glue', 'route53', 'execute-api', 'dms', 'kms', 'ram', 's3-outposts', 'sagemaker', 'securityhub', 'servicediscovery', 'ses', 'lambda', 'lex', 'servicecatalog', 'fsx', 'elasticfilesystem', 'elasticloadbalancing', 'memorydb', 'states', 'rds', 'events', 'route53resolver', 'chime', 'cloudsearch', 'opsworks', 'backup', 'datapipeline', 'neptune-db', 'athena', 'cognito-sync', 'elasticmapreduce', 'ecs', 'datasync', 'directconnect', 's3', 'devicefarm', 'codepipeline', 'globalaccelerator', 'appstream', 'cur', 'apigateway', 'cloudwatch', 'firehose', 'storagegateway', 'organizations', 'kinesis', 'redshift', 'resource-groups', 'glacier', 'lightsail', 'ecr', 'waf-regional', 'codebuild', 'shield', 'appconfig', 'cloudformation', 'autoscaling', 'lakeformation', 'awsconnector', 'kafkaconne

In [15]:
dump("dumps/aws_action_to_tf_resource.json", aws_service_prefix_to_tf_resources)