In [0]:
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.compute import InstancePoolAwsAttributes,InstancePoolAccessControlRequest,InstancePoolPermissionLevel
from pyspark.sql.functions import sum as _sum, when,col, split
import pandas as pd

w = WorkspaceClient()

In [0]:
df = (spark.sql("""
  with node_counts as
(SELECT
    driver_node_type as node_type, 1 as node_count
  FROM
    system.compute.clusters
WHERE 
workspace_id = dataops_prd.libs.get_workspace_id() AND driver_instance_pool_id is null
UNION ALL
SELECT
    worker_node_type as node_type, coalesce(worker_count,max_autoscale_workers) as node_count
  FROM
    system.compute.clusters
WHERE workspace_id = dataops_prd.libs.get_workspace_id() and worker_instance_pool_id is null
)
select node_type, sum(node_count) as count
FROM
    node_counts
GROUP BY ALL
ORDER BY count DESC
"""))
df = df.select(
    split(col("node_type"), "\.").getItem(0).alias('instance_family'),
    split(col("node_type"), "\.").getItem(1).alias('size'),
    col("count")
  )
df.display()

In [0]:
from pyspark.sql.functions import regexp_replace, concat, lit

fleet_instances = df.select(
    concat(regexp_replace("instance_family", r"\d+", ""), lit("-fleet")).alias("instance_family")
).withColumn('size', lit(None)).withColumn('count', lit(None))

display(fleet_instances)

In [0]:
instances_to_pools = df.unionByName(fleet_instances)
instances_to_pools.display()

In [0]:
most_used_instance = df.groupBy("instance_family").agg(_sum("count"))

instances_to_change = (
    most_used_instance
    .join(instances_to_pools, instances_to_pools["instance_family"] == most_used_instance["instance_family"], "left")
    .withColumn("saving_plan", when(instances_to_pools["instance_family"].isNull(), False).otherwise(True))
    .where("saving_plan = false")
    .select(most_used_instance["instance_family"],"sum(count)","saving_plan")
)

display(instances_to_change)
  

In [0]:
most_used_size_list = df.select("size").distinct().selectExpr("collect_list(size)").first()[0]
instances_to_pools_list = instances_to_pools.selectExpr("collect_list(instance_family)").first()[0]


In [0]:
node_type_id_list = [
    f"{instance}.{size}"
    for instance in instances_to_pools_list
    for size in most_used_size_list
]

In [0]:
node_types = w.clusters.list_node_types().node_types
df = pd.DataFrame(node_types)
node_df = spark.createDataFrame(df)

In [0]:
zones = w.clusters.list_zones().zones

for node_type_id in node_type_id_list:
  try:
    if node_df.where(f"node_type_id = '{node_type_id}'").count() > 0:
      category = node_df.where(f"node_type_id = '{node_type_id}'").selectExpr("replace(lower(category), ' ', '_')").first()[0]
      for avaiability in ["SPOT","ON_DEMAND"]:
        if "fleet" in node_type_id:
          zones = ["auto"]
        for zone_id in zones:
          aws_attributes = InstancePoolAwsAttributes.from_dict(
              {
                "availability": avaiability,
                "zone_id": zone_id,
                "spot_bid_price_percent": 100 if avaiability == "SPOT" else None
              }
          )
          w.instance_pools.create(
            node_type_id=node_type_id,
            instance_pool_name=f"{avaiability.lower()}_{zone_id.replace('-', '_')}_{category}_{node_type_id}_pool",
            idle_instance_autotermination_minutes=10,
            # max_capacity=100000,
            min_idle_instances=0,
            enable_elastic_disk=True,
            aws_attributes=aws_attributes
          )
    else:
      print(f"Node type {node_type_id} does not exist")
  except Exception as e:
    print(f"Error creating instance pool for {node_type_id}: {e}")

In [0]:
all_pools_pd = pd.DataFrame([*map(lambda x: x.as_dict(), w.instance_pools.list())])
all_pools_df = spark.createDataFrame(all_pools_pd)

display(all_pools_df)

In [0]:
pools_to_permit = all_pools_df.selectExpr("collect_list(instance_pool_id)").first()[0]

In [0]:
for instance_pool_id in pools_to_permit:
  try:
    w.instance_pools.update_permissions(instance_pool_id=instance_pool_id, access_control_list = [
      InstancePoolAccessControlRequest(group_name="admins",
    permission_level = InstancePoolPermissionLevel("CAN_MANAGE")
),
      InstancePoolAccessControlRequest(group_name="users",
    permission_level = InstancePoolPermissionLevel("CAN_ATTACH_TO")
)
      ]
                                        )
  except Exception as e:
    print(f"Error updating permissions for {instance_pool_id}: {e}")