# GOLD: Team Performance (Materialized)

Win rate, total fights, fighters count, and fights per fighter by team (overall and by year).


In [None]:
from pyspark.sql import functions as F

# Bootstrap
try:
    dbutils.widgets.text("storage_account", "storagetmufc")
    dbutils.widgets.text("secret_scope", "kv-scope")
    dbutils.widgets.text("key_name", "adls-account-key")
    dbutils.widgets.text("silver_db", "ufc_silver")
    dbutils.widgets.text("gold_db", "ufc_gold")
except Exception:
    pass

silver_db = dbutils.widgets.get("silver_db") if 'dbutils' in globals() else "ufc_silver"
gold_db = dbutils.widgets.get("gold_db") if 'dbutils' in globals() else "ufc_gold"

try:
    storage_account = dbutils.widgets.get("storage_account")
    secret_scope = dbutils.widgets.get("secret_scope")
    key_name = dbutils.widgets.get("key_name")
    account_key = dbutils.secrets.get(secret_scope, key_name)
    spark.conf.set(f"fs.azure.account.key.{storage_account}.dfs.core.windows.net", account_key)
except Exception:
    pass

try:
    spark.sql("USE CATALOG hive_metastore")
except Exception:
    try:
        spark.catalog.setCurrentCatalog("hive_metastore")
    except Exception:
        pass
spark.sql(f"CREATE DATABASE IF NOT EXISTS {silver_db}")
spark.sql(f"CREATE DATABASE IF NOT EXISTS {gold_db}")
print("Silver DB:", silver_db, "| Gold DB:", gold_db)


In [None]:
# Load silver
af = spark.table(f"hive_metastore.{silver_db}.espn_fights_silver")
ath = spark.table(f"hive_metastore.{silver_db}.espn_athletes_silver")

# Normalize team name
norm_team = F.udf(lambda s: (s or "").strip().lower(), "string")
ath1 = (ath
    .withColumn("team_norm", F.when(F.col("team").isNull() | (F.trim("team")==""), F.lit(None)).otherwise(norm_team("team")))
    .select("athlete_id","team_norm"))

# Make athlete-fight rows (two rows per fight)
left = (af
    .select("competition_id","event_id","event_date",F.year("event_date").alias("event_year"),
            F.col("fighter_a_name").alias("athlete_name"), F.col("fighter_a_winner").alias("is_winner"))
    .withColumn("side", F.lit("A")))
right = (af
    .select("competition_id","event_id","event_date",F.year("event_date").alias("event_year"),
            F.col("fighter_b_name").alias("athlete_name"), F.col("fighter_b_winner").alias("is_winner"))
    .withColumn("side", F.lit("B")))
rows = left.unionByName(right)

# Map athlete name -> id -> team
names = (ath.select(F.col("full_name").alias("n1"), F.col("display_name").alias("n2"), "athlete_id"))
rowsj = (rows
    .join(names, (F.lower("athlete_name") == F.lower("n1")) | (F.lower("athlete_name") == F.lower("n2")), "left")
    .join(ath1, "athlete_id", "left"))

# Filter to rows with known team
rowsj = rowsj.filter(F.col("team_norm").isNotNull())

# Overall team performance
team_perf = (rowsj
    .groupBy("team_norm")
    .agg(
        F.count("*").alias("fights"),
        F.sum(F.when(F.col("is_winner") == True, 1).otherwise(0)).alias("wins"),
        F.countDistinct("athlete_id").alias("fighters")
    )
    .withColumn("win_rate", F.when(F.col("fights")>0, F.col("wins")/F.col("fights")).otherwise(F.lit(0.0)))
    .withColumn("fights_per_fighter", F.when(F.col("fighters")>0, F.col("fights")/F.col("fighters")).otherwise(F.lit(0.0)))
)

# Yearly team performance
team_year = (rowsj
    .groupBy("team_norm","event_year")
    .agg(
        F.count("*").alias("fights"),
        F.sum(F.when(F.col("is_winner") == True, 1).otherwise(0)).alias("wins"),
        F.countDistinct("athlete_id").alias("fighters")
    )
    .withColumn("win_rate", F.when(F.col("fights")>0, F.col("wins")/F.col("fights")).otherwise(F.lit(0.0)))
    .withColumn("fights_per_fighter", F.when(F.col("fighters")>0, F.col("fights")/F.col("fighters")).otherwise(F.lit(0.0)))
)

# Materialize
team_perf.write.format("delta").mode("overwrite").option("overwriteSchema","true").saveAsTable(f"hive_metastore.{gold_db}.mv_team_performance")
team_year.write.format("delta").mode("overwrite").option("overwriteSchema","true").saveAsTable(f"hive_metastore.{gold_db}.mv_team_performance_year")
spark.sql(f"CREATE OR REPLACE VIEW hive_metastore.{gold_db}.v_team_performance AS SELECT * FROM hive_metastore.{gold_db}.mv_team_performance")
spark.sql(f"CREATE OR REPLACE VIEW hive_metastore.{gold_db}.v_team_performance_year AS SELECT * FROM hive_metastore.{gold_db}.mv_team_performance_year")

# Previews (ordered by highest win_rate, then fights)
display(spark.table(f"hive_metastore.{gold_db}.mv_team_performance").orderBy(F.desc("win_rate"), F.desc("fights")).limit(50))
display(spark.table(f"hive_metastore.{gold_db}.mv_team_performance_year").orderBy(F.desc("win_rate"), F.desc("fights")).limit(50))
