Skip to content

Commit

Permalink
Add a script for warming/cooling an autoscaling ECS cluster
Browse files Browse the repository at this point in the history
  • Loading branch information
alexwlchan committed Oct 19, 2020
1 parent eaf89ca commit 251e348
Show file tree
Hide file tree
Showing 4 changed files with 242 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
.DS_Store
.releases
*.plan
*.pyc

# Created by https://www.gitignore.io/api/terraform
# Edit at https://www.gitignore.io/?templates=terraform
Expand Down
31 changes: 31 additions & 0 deletions scripts/_aws.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import boto3


ACCOUNT_IDS = {
"platform": "760097843905",
"storage": "975596993436",
"catalogue": "756629837203",
}


sts_client = boto3.client("sts")


def get_aws_client(resource, *, role_arn):
assumed_role_object = sts_client.assume_role(
RoleArn=role_arn, RoleSessionName="AssumeRoleSession1"
)
credentials = assumed_role_object["Credentials"]
return boto3.client(
resource,
aws_access_key_id=credentials["AccessKeyId"],
aws_secret_access_key=credentials["SecretAccessKey"],
aws_session_token=credentials["SessionToken"],
)


def get_s3_object(s3_client, *, bucket, key):
"""
Retrieve the contents of an object from S3.
"""
return s3_client.get_object(Bucket=bucket, Key=key)["Body"].read()
8 changes: 8 additions & 0 deletions scripts/_ecs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
def get_ecs_cluster_arns(ecs_client):
"""
Generates the ARN of every ECS cluster in your AWS account.
"""
paginator = (ecs_client).get_paginator("list_clusters")

for page in paginator.paginate():
yield from page["clusterArns"]
202 changes: 202 additions & 0 deletions scripts/warmup_ecs_cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
#!/usr/bin/env python
"""
A lot of the services we run in ECS are started/stopped by Autoscaling, and
scale down to zero when they're not in use.
While good for costs, it's a pain for debugging -- if you've deployed a new
service, you need to wait for it to scale up before you can see the effect of
your changes.
This script allows you to temporarily "warm up" the services in our ECS cluster --
that is, set the min capacity to 1 so services will constantly be running.
The same script can "cool down" a cluster -- turn the capacity back down to the
min defined by Terraform.
"""

import os
import sys

import click
import hcl
import inquirer

from _aws import get_aws_client, get_s3_object, ACCOUNT_IDS
from _ecs import get_ecs_cluster_arns


def choose(*, question, choices):
"""
Ask the user to choose from a series of choices.
"""
answers = inquirer.prompt(
[inquirer.List("choose", message=question, choices=sorted(choices))]
)
return answers["choose"]


def _find_autoscaling_config(tfstate):
"""
Returns the min capacity of every service defined in the Terraform state.
"""
min_capacities = {}

for res in tfstate["resources"]:
if res["type"] == "aws_appautoscaling_target":
assert len(res["instances"]) <= 1, res
try:
attributes = res["instances"][0]["attributes"]
except IndexError:
continue
min_capacities[attributes["id"]] = attributes["min_capacity"]

return min_capacities


def _get_tfstate_location(*, account_name, cluster_name):
if account_name == "platform":
tfstate_bucket = "wellcomecollection-platform-infra"

tf_key_lookup = {
"calm-adapter": "terraform/calm_adapter.tfstate",
"reindexer": "terraform/catalogue/reindexer.tfstate",
"mets-adapter": "terraform/catalogue/mets_adapter.tfstate",
}

if cluster_name.startswith("catalogue-"):
tfstate_key = "terraform/catalogue/pipeline.tfstate"
elif cluster_name.startswith("sierra-adapter-"):
tfstate_key = "terraform/sierra_adapter.tfstate"
elif cluster_name in tf_key_lookup:
tfstate_key = tf_key_lookup[cluster_name]

elif account_name == "storage":
tfstate_bucket = "wellcomecollection-storage-infra"

tfstate_key = {
"storage-staging": "terraform/storage-service/stack_staging.tfstate",
"storage-prod": "terraform/storage-service/stack_prod.tfstate",
}

try:
return {
"bucket": tfstate_bucket,
"key": tfstate_key,
}
except NameError:
sys.exit(f"Don't know tfstate location for {cluster_name}")


def _get_tf_min_capacities(*, account_name, role_arn, cluster_name):
"""
Ask the Terraform: what's the min these services should ever be warmed to?
"""
tfstate_location = _get_tfstate_location(
account_name=account_name, cluster_name=cluster_name
)

s3_client = get_aws_client("s3", role_arn=role_arn)
tfstate = hcl.loads(get_s3_object(s3_client, **tfstate_location))

min_capacities = _find_autoscaling_config(tfstate)

return {
name: capacity
for name, capacity in min_capacities.items()
if name.startswith(f"service/{cluster_name}/")
}


def _get_current_scalable_targets(autoscaling_client, *, service_names):
min_capacities = {}

paginator = autoscaling_client.get_paginator("describe_scalable_targets")

for page in paginator.paginate(
ResourceIds=list(service_names), ServiceNamespace="ecs"
):
for target in page["ScalableTargets"]:
min_capacities[target["ResourceId"]] = target

return min_capacities


def warm_ecs_cluster(*, account_name, role_arn, cluster_name):
"""
Warm or cool an ECS cluster.
"""
tf_min_capacities = _get_tf_min_capacities(
account_name=account_name, role_arn=role_arn, cluster_name=cluster_name
)

print(
f"Detected autoscaling config for {click.style(str(len(tf_min_capacities)), 'blue')} service{'s' if len(tf_min_capacities) > 1 else ''}"
)

autoscaling_client = get_aws_client("application-autoscaling", role_arn=role_arn)
current_scalable_targets = _get_current_scalable_targets(
autoscaling_client, service_names=tf_min_capacities.keys()
)

common_prefix = os.path.commonprefix(list(tf_min_capacities.keys()))

for name, tf_min in sorted(tf_min_capacities.items()):
display_name = (
name[len(common_prefix) :] or name[len(f"service/{cluster_name}/") :]
)

target = current_scalable_targets[name]
curr_min = target["MinCapacity"]

if tf_min == 0 and curr_min == 0:
print(click.style("warming", "green"), display_name)
new_min_capacity = 1
elif tf_min == 0 and curr_min > 0:
print(click.style(f"cooling", "blue"), display_name)
new_min_capacity = 0
else:
print(
click.style("???????", "red"),
f"{name}, tf_min={tf_min}, curr_min={curr_min}",
)
continue

del target["CreationTime"]
target["MinCapacity"] = new_min_capacity
autoscaling_client.register_scalable_target(**target)


@click.command()
@click.option(
"--account",
"account_name",
type=click.Choice(["platform", "storage", "catalogue"]),
prompt="Which account are you working in?",
)
@click.option("--cluster", "cluster_name")
def main(account_name, cluster_name):
account_id = ACCOUNT_IDS[account_name]
role_arn = f"arn:aws:iam::{account_id}:role/{account_name}-developer"

ecs_client = get_aws_client("ecs", role_arn=role_arn)

ecs_cluster_names = {arn.split("/")[-1] for arn in get_ecs_cluster_arns(ecs_client)}

if cluster_name is not None:
if cluster_name not in ecs_cluster_names:
sys.exit(
f"There is no cluster named {cluster_name} in the {account_name} account"
)
else:
cluster_name = choose(
question="Which ECS cluster would you like to warm?",
choices=ecs_cluster_names,
)

warm_ecs_cluster(
account_name=account_name, role_arn=role_arn, cluster_name=cluster_name
)


if __name__ == "__main__":
main()

0 comments on commit 251e348

Please sign in to comment.