In [None]:
# Copyright 2025 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Deploying GKE HPA Config Recommender

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/aburhan/kubernetes-engine-samples/blob/workloadrecommender/notebook.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2Faburhan%2Fkubernetes-engine-samples%2Fworkloadrecommender%2Fcost-optimization%2Fhpa-config-recommender%2Fdocs%2Fnotebook.ipynb">
      <img width="32px" src="https://cloud.google.com/ml-engine/images/colab-enterprise-logo-32px.png" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>    
  <td style="text-align: center">
    <a href="https://raw.githubusercontent.com/aburhan/kubernetes-engine-samples/refs/heads/workloadrecommender/cost-optimization/hpa-config-recommender/docs/notebook.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/aburhan/kubernetes-engine-samples/blob/workloadrecommender/cost-optimization/hpa-config-recommender/docs/notebook.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

## Overview

This notebook guides you through building and running the HPA Config Recommender. This tool analyzes historical metric data from your Google Kubernetes Engine (GKE) workloads to provide optimal Horizontal Pod Autoscaler (HPA) settings. These settings include CPU and memory resource requests, minimum and maximum replica counts, and target utilization, all designed to balance cost efficiency and reliability.

> **Note:** This solution is currently tested only for Kubernetes
> Deployments.

- Key Features

-   Fetch and aggregate workload CPU and memory metrics from Cloud
    Monitoring.
-   Calculate workload startup time by considering pod initialization and
    cluster autoscaler delays.
-   Simulate resource scaling using DMR (Dynamic Minimum Replicas) and DCR
    (Dynamic CPU Requests) algorithms.
-   Generate resource recommendations for both HPA and VPA.


- Required Roles

Ensure you have the following Google Cloud roles:

-   roles/resourcemanager.projectCreator
-   roles/monitoring.viewer
-   roles/bigquery.dataOwner
-   roles/artifactregistry.creator
-   roles/monitoring.admin

### Create a new monitoring project

For monitoring workloads across multiple projects, it's best to set up a separate
monitoring project. Once you've created this project, you'll need to add your
other projects to its metrics scope. This allows you to receive consolidated
recommendations. Use the following instructions to
[add projects to your metrics scope configuration](https://cloud.google.com/monitoring/settings/multiple-projects)

## Get started

This lab will wak you though running the workload recommender. Before you start, make sure you've followed all instructions in [Setup](README.md).

The [Setup](README.md) creates a python package repository. You will install the python package to install the library and run a sample simulation on a kubernetes workload.

In [None]:
PROJECT_ID = "PROJECT_ID"  # @param {type:"string"}
REGION = "us-central1"  # @param {type:"string"}
ARTIFACT_REPO = "hpa-config-recommender-repo"


In [None]:

! gcloud auth login --no-launch-browser
! gcloud config set project {PROJECT_ID}

### Install build tool and build the project


In [None]:
import os
from google.auth.transport.requests import Request
from google.auth import default

# Get the access token for authentication
creds, project = default()
creds.refresh(Request())
token = creds.token

# Set the PIP authentication header
os.environ["PIP_EXTRA_INDEX_URL"] = f"https://oauth2accesstoken:{token}@{REGION}-python.pkg.dev/{PROJECT_ID}/{ARTIFACT_REPO}/simple/"

!gcloud artifacts print-settings python \
    --project={PROJECT_ID} \
    --repository={ARTIFACT_REPO} \
    --location={REGION}

# Install the built library
! pip install hpaconfigrecommender --upgrade --quiet
! pip install nest_asyncio --quiet

### Restart runtime (Colab only)

To use the newly installed packages, you must restart the runtime on Google Colab.

In [None]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

### Authenticate your notebook environment (Colab only)

Authenticate your environment on Google Colab.

In [None]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

## Get GKE workload details

The workload must:

*   be a resource monitored by the monitoring project. To add project to the monitoring project: [Add monitor project](https://cloud.google.com/monitoring/settings/multiple-projects#add-monitored-project)
*   a K8s Deployment object
*   have at least 14 days of metrics in Cloud Monitoring



In [None]:
# Allow asyncio to work in nested environments like Jupyter Notebooks
import nest_asyncio
nest_asyncio.apply()

# Define input parameters for workload details
PROJECT_ID = 'PROJECT_ID'  #@param {type:"string"}
LOCATION = 'LOCATION'  #@param {type:"string"}
CLUSTER_NAME = 'CLUSTER_NAME'  #@param {type:"string"}
NAMESPACE = 'NAMESPACE'  #@param {type:"string"}
CONTROLLER_NAME = 'CONTROLLER_NAME'  #@param {type:"string"}
CONTROLLER_TYPE = 'CONTROLLER_TYPE'  #@param {type:"string"}
CONTAINER_NAME = 'CONTAINER_NAME'  #@param {type:"string"}

### Set workload detail information

*   Set workload detail information
*   Retrieve the time it takes for the pod to move from a `scheduled` to `ready` state. See [pod lifecycle](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/) for more details


In [None]:
import logging
# Import configuration settings and user agent information
import hpaconfigrecommender.utils as utils

# Import function to retrieve aggregated workload time series data
from hpaconfigrecommender.read_workload_startuptime import get_workload_startup_time

# Configure logging for debugging and execution tracking
logger = logging.getLogger(__name__)


# Load configuration settings. for more details about Config. See
config = utils.config.Config()

# Create WorkloadDetails object using the input parameters
workload_details = utils.models.WorkloadDetails(
    config=config,
    project_id=PROJECT_ID,
    cluster_name=CLUSTER_NAME,
    location=LOCATION,
    namespace=NAMESPACE,
    controller_name=CONTROLLER_NAME,
    controller_type=CONTROLLER_TYPE,
    container_name=CONTAINER_NAME
)

workload_details = get_workload_startup_time(
    config,
    workload_details)
workload_details

> **Note:** In the event you don't have access to Asset Inventory you can set use kubectl event to set the pod startuptime:

```
kubectl get events --field-selector involvedObject.name=<pod-name> \
    --sort-by='.metadata.creationTimestamp' -o jsonpath='{range.items[*]}{.reason}{"\t"}{.lastTimestamp}{"\n"}{end}' | \
    awk '/Scheduled/{t1=$2} /ContainersReady/{t2=$2} END{if (t1 && t2) { "date -d@"t2" +%s" - "date -d@"t1" +%s" | getline diff; print diff"s"} else {print "Timestamps not found"}}'
```

Then set the startup time in the workload

```
workload_details.scheduled_to_ready_seconds = [ENTER SECONDS BETWEEN SCHEDULE AND READY]
```

### Get workload timeseriesere

Get workload timeseries data from [Cloud Monitoring](https://console.cloud.google.com/monitoring)



In [None]:
import pandas as pd
from datetime import datetime, timedelta

# Import function to retrieve aggregated workload time series data
from hpaconfigrecommender.read_workload_timeseries import get_workload_agg_timeseries

# Set the time from scheduling to readiness for the workload
# workload_details.scheduled_to_ready_seconds = 20

# Define the analysis time window
end_datetime = datetime.now()  # Current timestamp as the end of the analysis window
start_datetime = end_datetime - timedelta(days=14)  # Start time set to 1 day before

# Fetch the aggregated workload time series data
workload_df = get_workload_agg_timeseries(
    config,
    workload_details,
    start_datetime,
    end_datetime
)

# Return the workload DataFrame
workload_df


### Generate HPA Plans

The HPA configuration planning process analyzes past performance data (time series data) of your workload. This data is used to generate various combinations of resource allocations (CPU and Memory) and scaling limits (minimum and maximum replicas). These combinations are then evaluated in the subsequent simulation steps to determine the most efficient and reliable HPA configuration.

In [None]:
from hpaconfigrecommender.run_workload_simulation_plan import get_simulation_plans
# Use the `get_simulation_plans` function to generate scaling recommendations based on historical data.

plans,msg = get_simulation_plans(workload_details, workload_df)

for plan in plans:
    print(plan)

print(msg)

### Run HPA Configuration Simulations

This step simulates the HPA algorithm using the plans generated previously and your workload's historical data. Plans that cause resource underprovisioning (CPU or memory) are discarded. The most reliable and cost-effective HPA configuration is returned in best_option_df (DataFrame) and JSON format, including recommendations for CPU/memory resources, replica counts, and target CPU utilization. All valid simulation results are available in all_dfs for further analysis and configuration tuning.

In [None]:
from hpaconfigrecommender.run_workload_simulation_run import run_simulation_plans
# Run the simulation
# Simulate the plans to evaluate performance and resource optimization.

best_option_df, recommendation,  reasons, all_dfs = run_simulation_plans(plans, workload_details, workload_df)

# Review Results
# Review the generated analysis and recommendations.

if best_option_df.empty:
    print("No suitable recommendations found. Summary:")
    print(reasons)
else:
    print("Recommendations Summary:")
    print(recommendation.to_json())

### Store recommendation in BigQuery

Store HPA recommendations in BigQuery for dashboards and alerts. You can store the best recommendation or all reliable ones. If no cost-effective HPA configuration is found, a static VPA is recommended. Storing all recommendations allows for analysis and fine-tuning of configuration settings.

In [None]:
store_all_recommendations_to_bigquery = True #@param {type:"boolean"}

from hpaconfigrecommender.run_workload_simulation_run import write_to_bigquery

# Define BigQuery details
DATASET_ID = "workload_metrics"
TABLE_ID = "hpa_forecast_results"

# Store all simulations to BigQuery
if store_all_recommendations_to_bigquery:
  for df in all_dfs:
    write_to_bigquery(df, workload_details, DATASET_ID, TABLE_ID)

# Store only the simulation which has the best cost savings and ensures reliability
else:
  write_to_bigquery(best_option_df, workload_details, DATASET_ID, TABLE_ID)

### Display the most cost effective simulation

Plot the recommendation that is the most cost effective while ensuring reliability

In [None]:
# Plot visualizations for recommendation
if best_option_df:
  best_option_df.plot(title="CPU Recommendation vs Avg Usage", x="window_begin", y=["recommended_cpu_request","avg_container_cpu_usage"])
  best_option_df.plot(title="Memory Recommendation vs Avg Usage (MiB)", x="window_begin", y=["recommended_mem_request_mi","max_containers_mem_usage_mi"])

  best_option_df.plot(title="CPU Sum Usage vs Recommendation", x="window_begin", y=["hpa_forecast_sum_cpu_up_and_running","sum_container_cpu_usage"])
  best_option_df.plot(title="Memory Sum Usage vs Recommendation (MiB)", x="window_begin", y=["hpa_forecast_sum_mem_up_and_running","sum_containers_mem_usage_mi"])

## Next Steps

Your HPA recommendations are now in BigQuery and ready for visualization! You can create dashboards using tools like Looker or any other tool that connects to BigQuery. The data is located in your monitoring project: `PROJECT_ID.DATASET_ID.TABLE_ID.`