In [0]:
"""
Central configuration for the traffic ETL pipeline.

This module defines a single ``Config`` class that centralizes all
catalog names, database names and storage locations used in the ETL
process.  The ``conf`` object is provided as a convenient singleton
instance.
"""
"""
Central configuration for the traffic ETL pipeline.
(Only change here: add REGION_MAP for dim_region build.)
"""
import os
from pyspark.sql import SparkSession

# Optional: region map if you seed dim_region elsewhere
REGION_MAP = {
    "BBN": "Blackburn", "BEN": "Bendigo", "BRI": "Brighton", "CA1": "Carlton 1", "CA2": "Carlton 2",
    "CRN": "Croydon", "DIO": "Dialin/Dialout (remote)", "DON": "Doncaster", "DUP": "Duplicate",
    "ES2": "Essendon 2", "ESS": "Essendon", "FR2": "Frankston 2", "FRA": "Frankston",
    "FT1": "Footscray 1", "FT2": "Footscray 2", "FT3": "Footscray 3", "GE2": "Geelong 2",
    "GEE": "Geelong", "GLI": "Glen Iris", "GR2": "Greensborough 2", "GRE": "Greensborough",
    "KEW": "Kew", "MC1": "Melbourne City 1", "MC2": "Melbourne City 2", "MC3": "Melbourne City 3",
    "MEN": "Mentone", "MNP": "Moonee Ponds", "PR2": "Preston 2", "PRS": "Preston",
    "SK1": "St Kilda 1", "SK2": "St Kilda 2", "SP2": "Springvale 2", "SPR": "Springvale",
    "VI2": "Regional Victoria 2", "VIC": "Regional Victoria", "WV1": "Waverly 1", "WV2": "Waverly 2"
}

# ---------------- Databricks widgets passthrough ----------------
try:
    dbutils.widgets.dropdown("ENV", "dev", ["dev", "qa"], "Environment")
    dbutils.widgets.text("STORAGE_ACCOUNT", "trafficsa2", "Storage account")
    dbutils.widgets.text("METASTORE_ACCOUNT", "trafficsa2", "Metastore account")
    os.environ["ENV"] = dbutils.widgets.get("ENV").strip().lower()
    os.environ["STORAGE_ACCOUNT"] = dbutils.widgets.get("STORAGE_ACCOUNT").strip()
    os.environ["METASTORE_ACCOUNT"] = (dbutils.widgets.get("METASTORE_ACCOUNT") or os.environ["STORAGE_ACCOUNT"]).strip()
except NameError:
    pass

import os

class Config:
    def __init__(self, env=None, storage_account=None, metastore_account=None):
        # 1) resolve environment (widgets or env vars can set these before import)
        self.env = (env or os.getenv("ENV") or "dev").strip().lower()
        sa = (storage_account   or os.getenv("STORAGE_ACCOUNT")   or "trafficsa2").strip()
        ms = (metastore_account or os.getenv("METASTORE_ACCOUNT") or sa).strip()

        # 2) UC names
        self.catalog = self.env
        self.db_name = "traffic_db"

        # 3) FQDNs
        self.sa_fqdn = f"{sa}.dfs.core.windows.net"
        self.ms_fqdn = f"{ms}.dfs.core.windows.net"

        # 4) storage locations
        self.metastore_root  = f"abfss://traffic-metastore-root@{self.ms_fqdn}/"
        self.managed_data    = f"abfss://traffic-managed-{self.env}@{self.sa_fqdn}/"
        self.unmanaged_data  = f"abfss://traffic-unmanaged-{self.env}@{self.sa_fqdn}/"

        # 5) base paths
        self.raw_data_path   = f"{self.unmanaged_data}data_zone"
        self.checkpoint_base = f"{self.unmanaged_data}checkpoint_zone"

        # 6) logical table names (canonical)
        self.bronze_table  = "raw_traffic"
        self.silver_table  = "traffic_silver_events"   # canonical (plural)
        self.region_lookup = "region_lookup"

    def table_fqn(self, table: str) -> str:
        return f"{self.catalog}.{self.db_name}.{table}"

# default instance; will use ENV/STORAGE_ACCOUNT/METASTORE_ACCOUNT if set
conf = Config()


# optimize the spark session
spark = SparkSession.getActiveSession() or SparkSession.builder.getOrCreate()
spark.conf.set("spark.sql.shuffle.partitions", "64")
spark.conf.set("spark.sql.files.maxPartitionBytes", "134217728")
spark.conf.set("spark.databricks.io.cache.enabled", "true")