From 6a1b598dc3f4de3ec7ec66f6e70be6bdf40a2461 Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Tue, 1 Jul 2025 10:40:01 -0400 Subject: [PATCH 1/6] Update database instance class to a default variable value --- terraform-unity/main.tf | 1 + .../modules/terraform-unity-sps-database/main.tf | 2 +- .../modules/terraform-unity-sps-database/variables.tf | 5 +++++ terraform-unity/variables.tf | 6 ++++++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/terraform-unity/main.tf b/terraform-unity/main.tf index ee104d3e..b251733f 100644 --- a/terraform-unity/main.tf +++ b/terraform-unity/main.tf @@ -50,6 +50,7 @@ module "unity-sps-database" { venue = var.venue service_area = var.service_area release = var.release + db_instance_class = var.db_instance_class } module "unity-sps-efs" { diff --git a/terraform-unity/modules/terraform-unity-sps-database/main.tf b/terraform-unity/modules/terraform-unity-sps-database/main.tf index e14e2205..f27de316 100644 --- a/terraform-unity/modules/terraform-unity-sps-database/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-database/main.tf @@ -68,7 +68,7 @@ resource "aws_db_instance" "sps_db" { storage_type = "gp3" engine = "postgres" engine_version = "16.8" - instance_class = "db.m5d.2xlarge" + instance_class = var.db_instance_class db_name = "sps_db" username = "db_user" password = aws_secretsmanager_secret_version.db.secret_string diff --git a/terraform-unity/modules/terraform-unity-sps-database/variables.tf b/terraform-unity/modules/terraform-unity-sps-database/variables.tf index 54189e70..e99fb7d0 100644 --- a/terraform-unity/modules/terraform-unity-sps-database/variables.tf +++ b/terraform-unity/modules/terraform-unity-sps-database/variables.tf @@ -17,3 +17,8 @@ variable "release" { description = "The software release version." type = string } + +variable "db_instance_class" { + description = "The SPS RDS database instance class" + type = string +} diff --git a/terraform-unity/variables.tf b/terraform-unity/variables.tf index b19414bd..8286540f 100644 --- a/terraform-unity/variables.tf +++ b/terraform-unity/variables.tf @@ -367,3 +367,9 @@ variable "installprefix" { type = string default = "" } + +variable "db_instance_class" { + description = "The SPS RDS database instance class" + type = string + default = "db.m5d.xlarge" +} From 95078dd5229a4cb7819d4131d8fd1346fafd4f21 Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Wed, 9 Jul 2025 16:06:47 -0400 Subject: [PATCH 2/6] Ensure OGC API pods are executed on the same node as other Airflow components --- .../modules/terraform-unity-sps-ogc-processes-api/main.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf index 9153ba39..f415f103 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf @@ -83,12 +83,12 @@ resource "kubernetes_deployment" "ogc_processes_api" { match_expressions { key = "karpenter.k8s.aws/instance-family" operator = "In" - values = ["c6i", "c5"] + values = ["r5"] } match_expressions { key = "karpenter.k8s.aws/instance-cpu" operator = "In" - values = ["4"] + values = ["8"] } } } From bdb4234ad649ab3c74f6c9d87eab94c667f6a946 Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Thu, 10 Jul 2025 09:27:26 -0400 Subject: [PATCH 3/6] Update redis deployment to match core components and set top-level affinity to be consistent --- airflow/helm/values.tmpl.yaml | 4 +-- .../main.tf | 28 +++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/airflow/helm/values.tmpl.yaml b/airflow/helm/values.tmpl.yaml index 6c6f067f..589faa82 100644 --- a/airflow/helm/values.tmpl.yaml +++ b/airflow/helm/values.tmpl.yaml @@ -56,10 +56,10 @@ affinity: values: ["on-demand"] - key: "karpenter.k8s.aws/instance-family" operator: "In" - values: ["c6i", "c5"] + values: ["r5"] - key: "karpenter.k8s.aws/instance-cpu" operator: "In" - values: ["2", "4"] + values: ["8"] topologySpreadConstraints: - maxSkew: 1 diff --git a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf index f415f103..d17e7842 100644 --- a/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf +++ b/terraform-unity/modules/terraform-unity-sps-ogc-processes-api/main.tf @@ -17,6 +17,34 @@ resource "kubernetes_deployment" "redis" { } } spec { + affinity { + node_affinity { + required_during_scheduling_ignored_during_execution { + node_selector_term { + match_expressions { + key = "karpenter.sh/nodepool" + operator = "In" + values = compact([for pool in var.karpenter_node_pools : pool if pool == "airflow-core-components"]) + } + match_expressions { + key = "karpenter.sh/capacity-type" + operator = "In" + values = ["on-demand"] + } + match_expressions { + key = "karpenter.k8s.aws/instance-family" + operator = "In" + values = ["r5"] + } + match_expressions { + key = "karpenter.k8s.aws/instance-cpu" + operator = "In" + values = ["8"] + } + } + } + } + } container { name = "redis" image = "${var.docker_images.redis.name}:${var.docker_images.redis.tag}" From 9444c56a7711ef8b7c85f1ce5d6268c6051ea433 Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Mon, 14 Jul 2025 16:14:59 -0400 Subject: [PATCH 4/6] Update celery worker replicas and consolidation policy --- airflow/helm/values.tmpl.yaml | 2 +- terraform-unity/variables.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/airflow/helm/values.tmpl.yaml b/airflow/helm/values.tmpl.yaml index 589faa82..2da41b65 100644 --- a/airflow/helm/values.tmpl.yaml +++ b/airflow/helm/values.tmpl.yaml @@ -248,7 +248,7 @@ workers: keda: enabled: true pollingInterval: 1 - minReplicaCount: 1 + minReplicaCount: 0 maxReplicaCount: 128 # Specify HPA related options # https://github.com/kubernetes/enhancements/blob/master/keps/sig-autoscaling/853-configurable-hpa-scale-velocity/README.md diff --git a/terraform-unity/variables.tf b/terraform-unity/variables.tf index 3b45fc5d..ca840967 100644 --- a/terraform-unity/variables.tf +++ b/terraform-unity/variables.tf @@ -283,7 +283,7 @@ variable "karpenter_node_pools" { memory = "320Gi" } disruption = { - consolidationPolicy = "WhenEmpty" + consolidationPolicy = "WhenEmptyOrUnderutilized" consolidateAfter = "1m" } }, From a924b66e47cf12d6fe8941077c989b3bb67621e8 Mon Sep 17 00:00:00 2001 From: Nikki <17799906+nikki-t@users.noreply.github.com> Date: Tue, 15 Jul 2025 08:21:27 -0400 Subject: [PATCH 5/6] Document minimum replica count for celery workers --- airflow/helm/values.tmpl.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow/helm/values.tmpl.yaml b/airflow/helm/values.tmpl.yaml index 2da41b65..5aece3d1 100644 --- a/airflow/helm/values.tmpl.yaml +++ b/airflow/helm/values.tmpl.yaml @@ -248,7 +248,7 @@ workers: keda: enabled: true pollingInterval: 1 - minReplicaCount: 0 + minReplicaCount: 0 # Minimum node available for celery workers; set to 1 to always have a worker node available to run tasks maxReplicaCount: 128 # Specify HPA related options # https://github.com/kubernetes/enhancements/blob/master/keps/sig-autoscaling/853-configurable-hpa-scale-velocity/README.md From ebf0973eb01580ba23ef2d5ab393f2eb7375efd0 Mon Sep 17 00:00:00 2001 From: Luca Cinquini Date: Tue, 15 Jul 2025 07:07:47 -0600 Subject: [PATCH 6/6] plitting the comment on 2 lines? --- airflow/helm/values.tmpl.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/airflow/helm/values.tmpl.yaml b/airflow/helm/values.tmpl.yaml index 5aece3d1..2d6abe31 100644 --- a/airflow/helm/values.tmpl.yaml +++ b/airflow/helm/values.tmpl.yaml @@ -248,7 +248,9 @@ workers: keda: enabled: true pollingInterval: 1 - minReplicaCount: 0 # Minimum node available for celery workers; set to 1 to always have a worker node available to run tasks + # Minimum node available for celery workers + # Set to 1 to always have a worker node available to run tasks + minReplicaCount: 0 maxReplicaCount: 128 # Specify HPA related options # https://github.com/kubernetes/enhancements/blob/master/keps/sig-autoscaling/853-configurable-hpa-scale-velocity/README.md