diff --git a/.yamllint b/.yamllint index 669c864..1fd3784 100644 --- a/.yamllint +++ b/.yamllint @@ -3,3 +3,5 @@ extends: default rules: line-length: disable document-start: disable + indentation: + indent-sequences: consistent diff --git a/apis/composition-intelligent.yaml b/apis/composition-intelligent.yaml index 800b6ad..674244d 100644 --- a/apis/composition-intelligent.yaml +++ b/apis/composition-intelligent.yaml @@ -31,7 +31,7 @@ spec: - "FreeStorageSpace" - "ReadIOPS" - "WriteIOPS" - period: 300 + period: 60 target: "status.performanceMetrics" credentials: - name: aws-creds @@ -59,15 +59,21 @@ spec: 6. When scaling needed: ONLY modify the instanceClass or allocatedStorage fields in spec.forProvider 7. PRESERVE all other existing spec fields unchanged - do not recreate the entire spec 4. Make scaling decisions based on these thresholds: - - High CPU (>80%): Consider increasing instance class + - High CPU (>50%): Consider increasing instance class - High Memory usage (FreeableMemory <20% of total): Consider memory-optimized instance - High IOPS (>80% of provisioned): Consider increasing storage or instance class - High connections (>80% of max): Consider increasing instance class SCALING LOGIC: - - For high CPU/Memory/IOPS: Upgrade instance class (e.g., db.t3.micro → db.t3.small → db.t3.medium) + - For high CPU/Memory/IOPS: Upgrade instance class (e.g., db.t3.micro → db.t3.small → db.t3.medium → db.t3.large) + - For low utilization: Downgrade instance class (e.g., db.t3.large → db.t3.medium → db.t3.small → db.t3.micro) - For storage issues: Increase allocatedStorage by 20GB increments - - Only scale up, never scale down automatically for safety + - Scale up when resources are constrained, scale down when consistently over-provisioned + - DOWNSCALING SAFETY: Only scale down if ALL metrics show low utilization for sustained period: + * CPU < 20% for extended time + * Memory usage < 40% + * Connections < 30% of capacity + * No recent scaling events (check annotations for last scaling time) CRITICAL UPDATE APPROACH: - PRESERVE the entire existing resource structure from section diff --git a/apis/composition-rds-metrics.yaml b/apis/composition-rds-metrics.yaml index 1eb064a..274d3b0 100644 --- a/apis/composition-rds-metrics.yaml +++ b/apis/composition-rds-metrics.yaml @@ -13,7 +13,7 @@ spec: pipeline: - step: xsqlinstance functionRef: - name: upbound-configuration-aws-databasexsqlinstance + name: upbound-configuration-aws-database-aixsqlinstance - step: fetch-metrics functionRef: name: upbound-function-rds-metrics diff --git a/examples/providerconfig-aws.yaml b/examples/providerconfig-aws.yaml new file mode 100644 index 0000000..63ebc2b --- /dev/null +++ b/examples/providerconfig-aws.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: aws.upbound.io/v1beta1 +kind: ProviderConfig +metadata: + name: default +spec: + credentials: + source: Secret + secretRef: + namespace: crossplane-system + name: aws-creds + key: credentials diff --git a/operations/rds-intelligent-scaling-cron/operation.yaml b/operations/rds-intelligent-scaling-cron/operation.yaml index d13907a..74f49d2 100644 --- a/operations/rds-intelligent-scaling-cron/operation.yaml +++ b/operations/rds-intelligent-scaling-cron/operation.yaml @@ -10,7 +10,7 @@ spec: concurrencyPolicy: Forbid successfulHistoryLimit: 5 failedHistoryLimit: 1 - + operationTemplate: spec: mode: Pipeline @@ -33,31 +33,35 @@ spec: and makes scaling decisions based on CloudWatch performance metrics. userPrompt: | You are analyzing an XSQLInstance resource for potential RDS scaling needs. - + SCALING ANALYSIS CRITERIA: 1. Check performance metrics in status.performanceMetrics 2. Check current instanceClass in spec.parameters.instanceClass 3. Check if analysis was done recently (intelligent-scaling annotations) - + RATE LIMITING: - Skip analysis if "intelligent-scaling/last-analyzed" annotation exists and is < 5 minutes old - Only proceed if no recent analysis or if metrics show significant changes - + SCALING TRIGGERS (conservative for cost control): - - CPU > 85%: scale up instance class - - FreeableMemory < 15%: scale up for memory pressure - - DatabaseConnections > 85% of max: scale up for connection pressure - - INSTANCE CLASS PROGRESSION: - db.t3.micro → db.t3.small → db.t3.medium → db.t3.large - + - SCALE UP: CPU > 85%, Memory < 15%, Connections > 85% + - SCALE DOWN: CPU < 20% AND Memory > 60% AND Connections < 30% for sustained period + + INSTANCE CLASS PROGRESSION (bidirectional): + db.t3.micro ↔ db.t3.small ↔ db.t3.medium ↔ db.t3.large + + DOWNSCALING SAFETY: + - Only scale down if ALL conditions met for sustained period + - Check last-scaled annotation to prevent frequent changes + - Ensure minimum 15-minute cooldown between scaling events + REQUIRED OUTPUT FORMAT: You must output the XSQLInstance resource in JSON format with ONLY the fields that need to be patched. - + If scaling is needed: { "apiVersion": "aws.platform.upbound.io/v1alpha1", - "kind": "XSQLInstance", + "kind": "XSQLInstance", "metadata": { "name": "", "annotations": { @@ -71,10 +75,10 @@ spec: } } } - + If no scaling is needed: { - "apiVersion": "aws.platform.upbound.io/v1alpha1", + "apiVersion": "aws.platform.upbound.io/v1alpha1", "kind": "XSQLInstance", "metadata": { "name": "", @@ -84,10 +88,10 @@ spec: } } } - + Rules: - Use the exact resource name from the input - - Only include fields that need to be updated + - Only include fields that need to be updated - Use current timestamp in ISO8601 format - Provide brief, clear reasoning in annotations - Output only the JSON, no explanatory text @@ -101,4 +105,4 @@ spec: namespace: crossplane-system name: claude retryLimit: 3 - schedule: '*/10 * * * *' # Run every 10 minute for testing + schedule: '*/2 * * * *' # Run every 2 minutes for demo diff --git a/perf-scale-demo.sh b/perf-scale-demo.sh new file mode 100755 index 0000000..9201487 --- /dev/null +++ b/perf-scale-demo.sh @@ -0,0 +1,66 @@ + #!/bin/bash + # Demo-optimized load test for fast autoscaling trigger + + DB_ENDPOINT="rds-metrics-database-ai-scale.cxal1lomznba.us-west-2.rds.amazonaws.com" + DB_USER="masteruser" + DB_PASS="YzZiCjT6vitMxClxBmE7OH8IScb" + XR_NAME="rds-metrics-database-ai-scale" + + echo "🚀 Starting DEMO load test (optimized for speed)..." + + # Maximum intensity load - 20 processes with high benchmark values + for i in {1..20}; do + mysql --host=$DB_ENDPOINT --user=$DB_USER --password=$DB_PASS \ + --default-auth=mysql_native_password \ + --execute="SELECT BENCHMARK(3000000000, MD5('demo_intensive_$i'));" & + done + + # Additional CPU-intensive operations + for i in {1..10}; do + mysql --host=$DB_ENDPOINT --user=$DB_USER --password=$DB_PASS \ + --execute=" + SELECT BENCHMARK(1000000000, SHA2(CONCAT('demo_', RAND()), 256)); + SELECT BENCHMARK(1000000000, MD5(CONCAT(CONNECTION_ID(), '_$i'))); + " & + done + + echo "⏱️ Load test running... Expected timeline:" + echo " - 30-60 seconds: CPU should hit 50%+" + echo " - 1-2 minutes: CloudWatch metrics update" + echo " - 2-3 minutes: Claude analysis and scaling decision" + echo " - 5-10 minutes: Instance scaling completion" + + # Real-time monitoring + for i in {1..15}; do + echo "" + echo "=== Demo Check $i ($(date +%H:%M:%S)) ===" + + # Current metrics + CPU=$(kubectl get xsqlinstance $XR_NAME -o jsonpath='{.status.performanceMetrics.metrics.CPUUtilization.value}' + 2>/dev/null || echo "collecting...") + echo "🔥 CPU: ${CPU}% (threshold: 50%)" + + # Instance class + CLASS=$(kubectl get instance.rds -l crossplane.io/composite=$XR_NAME -o + jsonpath='{.items[0].spec.forProvider.instanceClass}' 2>/dev/null || echo "unknown") + echo "💾 Instance: $CLASS" + + # Claude decision + REASONING=$(kubectl get xsqlinstance $XR_NAME -o jsonpath='{.status.claudeDecision.reasoning}' 2>/dev/null || echo "analyzing...") + echo "🤖 Claude: ${REASONING:0:80}..." + + # Check if scaling happened + if [[ "$CLASS" != "db.t3.micro" ]]; then + echo "🎉 SCALING SUCCESSFUL! Instance upgraded to $CLASS" + break + fi + + sleep 20 + done + + echo "" + echo "🛑 Stopping load test..." + pkill -f "mysql.*BENCHMARK" + pkill -f "mysql.*SHA2" + + echo "✅ Demo complete!" diff --git a/simple-demo-scaling-monitor.sh b/simple-demo-scaling-monitor.sh new file mode 100755 index 0000000..9066f1a --- /dev/null +++ b/simple-demo-scaling-monitor.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# simple-demo-scaling-monitor.sh + +XR_NAME="rds-metrics-database-ai-scale" +CONTEXT="kind-up-configuration-aws-database-ai" + +while true; do + echo "=== $(date '+%Y-%m-%d %H:%M:%S') ===" + echo "$ kubectl get xsqlinstance $XR_NAME -o yaml | yq '.metadata.annotations | +del(.[\"kubectl.kubernetes.io/last-applied-configuration\"])'" + + kubectl --context $CONTEXT get xsqlinstance $XR_NAME -o yaml | yq '.metadata.annotations | +del(.["kubectl.kubernetes.io/last-applied-configuration"])' + + echo "" + sleep 45 +done diff --git a/upbound.yaml b/upbound.yaml index 5be3b50..580788b 100644 --- a/upbound.yaml +++ b/upbound.yaml @@ -23,7 +23,7 @@ spec: - apiVersion: pkg.crossplane.io/v1beta1 kind: Function package: xpkg.upbound.io/upbound/function-rds-metrics - version: v0.0.3 + version: v0.0.6 description: This repository provides a foundational configuration to build and operate an AWS database, including handling its dependencies and configurations. The setup ensures a streamlined and repeatable deployment in a cloud environment.