In [None]:
max_match_ts = dbutils.jobs.taskValues.get(taskKey="Extract_Max_ts", key="max_match_ts", debugValue="2026-01-01 00:00:00")
max_delivery_ts = dbutils.jobs.taskValues.get(taskKey="Extract_Max_ts", key="max_delivery_ts", debugValue="2026-01-01 00:00:00")

print(max_match_ts, max_delivery_ts)


In [None]:
from pyspark.sql.functions import col, to_date, current_timestamp
match_df = spark.sql(f"SELECT * FROM bronze.match_loading WHERE ingestion_ts <= '{max_match_ts}'")

dim_team = spark.read.table("silver.dim_team") \
    .select("team_sk", "team_name")

dim_player = spark.read.table("silver.dim_player") \
    .select("player_sk", "player_name")

dim_venue = spark.read.table("silver.dim_venue") \
    .select("venue_id", "official_name")

dim_umpire = spark.read.table("silver.dim_umpire") \
    .select("umpire_sk", "umpire_name")


In [None]:
df = (
    match_df
    .join(
        dim_team.withColumnRenamed("team_sk", "team1_sk")
                .withColumnRenamed("team_name", "team1"),
        "team1",
        "left"
    )
    .join(
        dim_team.withColumnRenamed("team_sk", "team2_sk")
                .withColumnRenamed("team_name", "team2"),
        "team2",
        "left"
    )
    .join(
        dim_team.withColumnRenamed("team_sk", "toss_winner_sk")
                .withColumnRenamed("team_name", "toss_winner"),
        "toss_winner",
        "left"
    )
    .join(
        dim_team.withColumnRenamed("team_sk", "winner_sk")
                .withColumnRenamed("team_name", "winner"),
        "winner",
        "left"
    )
    .join(
        dim_team.withColumnRenamed("team_sk", "home_team_sk")
                .withColumnRenamed("team_name", "home_team"),
        "home_team",
        "left"
    )
)


In [None]:
df = df.join(
    dim_player.withColumnRenamed("player_sk", "player_of_match_sk")
              .withColumnRenamed("player_name", "player_of_match"),
    "player_of_match",
    "left"
)


In [None]:
df = df.join(
    dim_venue.withColumnRenamed("venue_id", "venue_sk")
             .withColumnRenamed("official_name", "venue"),
    "venue",
    "left"
)


In [None]:
df = (
    df
    .join(
        dim_umpire.withColumnRenamed("umpire_sk", "umpire1_sk")
                  .withColumnRenamed("umpire_name", "umpire1"),
        "umpire1",
        "left"
    )
    .join(
        dim_umpire.withColumnRenamed("umpire_sk", "umpire2_sk")
                  .withColumnRenamed("umpire_name", "umpire2"),
        "umpire2",
        "left"
    )
    .join(
        dim_umpire.withColumnRenamed("umpire_sk", "umpire3_sk")
                  .withColumnRenamed("umpire_name", "umpire3"),
        "umpire3",
        "left"
    )
)


In [None]:
fact_match_df = df.select(
    "match_id",
    "season",
    to_date(col("date"), "yyyy-MM-dd").alias("match_date"),
    "dl_applied",

    "team1_sk",
    "team2_sk",
    "toss_winner_sk",
    "winner_sk",
    "home_team_sk",

    "toss_decision",
    "result",

    "win_by_runs",
    "win_by_wickets",

    "player_of_match_sk",

    "venue_sk",
    "umpire1_sk",
    "umpire2_sk",
    "umpire3_sk",

    "source_file",
    "ingestion_ts",

    current_timestamp().alias("created_at")
)


In [None]:
(
    fact_match_df
    .write
    .format("delta")
    .mode("append")
    .saveAsTable("silver.fact_matches")
)
