Skip to content

Commit

Permalink
- Fixed CapacityProvider provisioning to ensure the cluster has the c…
Browse files Browse the repository at this point in the history
…orrect providers attached.

- Added NewInstancesProtectedFromScaleIn to ASG CF when a capacity provider is enabled and it is configured to manage instance scale in.
  • Loading branch information
gitwater committed Mar 11, 2021
1 parent 3e58d9d commit 52f9c73
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 53 deletions.
67 changes: 46 additions & 21 deletions src/paco/aws_api/ecs/capacityprovider.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,26 +31,37 @@ def provision(self):
"Provision ECS Capacity Provider resource"
# get aws info
response = self.ecs_client.describe_capacity_providers(
capacityProviders=[self.capacity_provider.aws_name],
#capacityProviders=[self.capacity_provider.aws_name],
)
cap_info = response['capacityProviders']
provider_exists = False
for cap_info in response['capacityProviders']:
# Filter out providers that do not belong to the associated ASG
if 'autoScalingGroupProvider' in cap_info and 'autoScalingGroupArn' in cap_info['autoScalingGroupProvider']:
if cap_info['autoScalingGroupProvider']['autoScalingGroupArn'] != self.asg_arn:
continue
# Keep built-in capacity providers
if cap_info['name'] in ['FARGATE', 'FARGATE_SPOT']:
continue
# Delete if the capacity provider does not exist in Paco config
if cap_info['name'] != self.capacity_provider.aws_name:
self.delete(cp_name_to_delete=cap_info['name'])
continue
else:
# delete if exists but is disabled
if self.capacity_provider.is_enabled() == False:
self.delete()
continue
elif cap_info['status'] == 'INACTIVE':
self.create()
elif self.is_changed(cap_info):
# update if it's cache is different
self.update(cap_info)
provider_exists = True

# delete if exists but is disabled
if not self.capacity_provider.is_enabled():
if len(cap_info) > 0:
return self.delete()
return

# create if does not yet exist
if len(cap_info) == 0:
return self.create()
elif cap_info[0]['status'] == 'INACTIVE':
# Create if the provider does not exist
if self.capacity_provider.is_enabled() == True and provider_exists == False:
return self.create()

# update if it's cache is different
if self.is_changed(cap_info[0]):
self.update(cap_info[0])

def is_changed(self, capacity_provider_info):
local_md5 = md5sum(str_data=f"{self.asg.paco_ref}-{self.capacity_provider.target_capacity}-{self.capacity_provider.minimum_scaling_step_size}-{self.capacity_provider.maximum_scaling_step_size}")
aws_target_capacity = capacity_provider_info['autoScalingGroupProvider']['managedScaling']['targetCapacity']
Expand All @@ -73,8 +84,17 @@ def get_existing_capacity_providers(self):
for cp in response['capacityProviders']:
# check if capacity provider is already associated
# ToDo: a better way to check this?
if cp['status'] == 'ACTIVE':
capacity_providers.append(cp['name'])
if cp['status'] != 'ACTIVE':
continue
# Only include providers assocated with the right ASG
if 'autoScalingGroupProvider' not in cp:
continue
if 'autoScalingGroupArn' not in cp['autoScalingGroupProvider']:
continue
if cp['autoScalingGroupProvider']['autoScalingGroupArn'] != self.asg_arn:
continue

capacity_providers.append(cp['name'])
return capacity_providers

def create(self):
Expand Down Expand Up @@ -105,12 +125,17 @@ def create(self):
defaultCapacityProviderStrategy=[],
)

def delete(self):
def delete(self, cp_name_to_delete=None):
"Delete ECS Capacity Provider resource"
# before you can delete, you must disassociate the CP from the ASG
# to do that you need to list ALL other CPs to remain associated
if cp_name_to_delete == None:
cp_name_to_delete = self.capacity_provider.aws_name
existing_cps = self.get_existing_capacity_providers()
new_cps = [cp_name for cp_name in existing_cps if cp_name != self.capacity_provider.aws_name]
new_cps = [cp_name for cp_name in existing_cps if cp_name != cp_name_to_delete]
# default_provider = []
# if len(new_cps) == 0:
# default_provider=[{'capacityProvider': 'FARGATE', 'weight': 1, 'base': 1}
cluster_name = self.get_cluster_name()
try:
response = self.ecs_client.put_cluster_capacity_providers(
Expand All @@ -122,7 +147,7 @@ def delete(self):
if error.response['Error']['Code'] == 'ResourceInUseException':
# capacity provider is in-use, do not attempt to delete
return
response = self.ecs_client.delete_capacity_provider(capacityProvider=self.capacity_provider.aws_name)
response = self.ecs_client.delete_capacity_provider(capacityProvider=cp_name_to_delete)

def update(self, capacity_info):
"Update an ECS Capacity Provider resource"
Expand Down
71 changes: 39 additions & 32 deletions src/paco/cftemplates/asg.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,10 @@ def __init__(
True
)
asg_dict['Tags'].append(asg_tag)
# ECS Cluster Capacity Manager requries NewInstancesProtectedFromScaleIn to be eneabled if it is going to manage instance protection
if asg_config.ecs.capacity_provider != None and asg_config.ecs.capacity_provider.is_enabled():
if asg_config.ecs.capacity_provider.managed_instance_protection == True:
asg_dict['NewInstancesProtectedFromScaleIn'] = True

# ECS Release Phase Configuration
policy_statements = []
Expand Down Expand Up @@ -372,7 +376,7 @@ def __init__(
Resource=push_repos
)
)

iam_cluster_cache = []
for command in ecr_deploy.release_phase.ecs:
service_obj = get_model_obj_from_ref(command.service, self.paco_ctx.project)
ecs_services_obj = get_parent_by_interface(service_obj, schemas.IECSServices)
Expand Down Expand Up @@ -407,42 +411,45 @@ def __init__(
asg_dict['Tags'].append(ecs_cluster_asg_tag)
asg_dict['Tags'].append(ecs_service_asg_tag)

policy_statements.append(
Statement(
Sid=f'ECSReleasePhaseSSMSendCommand{idx}',
Effect=Allow,
Action=[
Action('ssm', 'SendCommand'),
],
Resource=[ 'arn:aws:ec2:*:*:instance/*' ],
Condition=Condition(
StringLike({
'ssm:resourceTag/Paco-ECSCluster-Name': troposphere.Ref(ecs_release_phase_cluster_name_param)
})

if ecs_services_obj.cluster not in iam_cluster_cache:
policy_statements.append(
Statement(
Sid=f'ECSReleasePhaseSSMSendCommand{idx}',
Effect=Allow,
Action=[
Action('ssm', 'SendCommand'),
],
Resource=[ 'arn:aws:ec2:*:*:instance/*' ],
Condition=Condition(
StringLike({
'ssm:resourceTag/Paco-ECSCluster-Name': troposphere.Ref(ecs_release_phase_cluster_name_param)
})
)
)
)
)

policy_statements.append(
Statement(
Sid=f'ECSRelasePhaseClusterAccess{idx}',
Effect=Allow,
Action=[
Action('ecs', 'DescribeServices'),
Action('ecs', 'RunTask'),
Action('ecs', 'StopTask'),
Action('ecs', 'DescribeContainerInstances'),
Action('ecs', 'ListTasks'),
Action('ecs', 'DescribeTasks'),
],
Resource=[ '*' ],
Condition=Condition(
StringEquals({
'ecs:cluster': troposphere.Ref(ecs_release_phase_cluster_arn_param)
})
policy_statements.append(
Statement(
Sid=f'ECSRelasePhaseClusterAccess{idx}',
Effect=Allow,
Action=[
Action('ecs', 'DescribeServices'),
Action('ecs', 'RunTask'),
Action('ecs', 'StopTask'),
Action('ecs', 'DescribeContainerInstances'),
Action('ecs', 'ListTasks'),
Action('ecs', 'DescribeTasks'),
],
Resource=[ '*' ],
Condition=Condition(
StringEquals({
'ecs:cluster': troposphere.Ref(ecs_release_phase_cluster_arn_param)
})
)
)
)
)
iam_cluster_cache.append(ecs_services_obj.cluster)

idx += 1

Expand Down

0 comments on commit 52f9c73

Please sign in to comment.