Since you are operating in a Session-Based Architecture (without a central Metastore/Catalog), you cannot simply go to the "Catalog" tab or run standard SELECT * FROM table commands in the SQL Editor. The tables "physically" exist in S3, but the Databricks UI doesn't know they exist.

To query them, you must register them as Temporary Views inside your notebook. This bridges the gap between your Python credentials and SQL syntax.

In [0]:
ACCESS_KEY = dbutils.secrets.get(scope = "ticker", key = "access_key")
SECRET_KEY = dbutils.secrets.get(scope = "ticker", key = "secret_key")
SESSION_TOKEN = dbutils.secrets.get(scope = "ticker", key = "session_key")

temp_ak = dbutils.jobs.taskValues.get(taskKey="Init_Auth", key="temp_ak", debugValue="debug-key")
temp_sk = dbutils.jobs.taskValues.get(taskKey="Init_Auth", key="temp_sk", debugValue="debug-secret")
temp_token = dbutils.jobs.taskValues.get(taskKey="Init_Auth", key="temp_token", debugValue="debug-token")

In [0]:
aws_creds = {
    "fs.s3a.access.key": dbutils.jobs.taskValues.get(taskKey="Init_Auth", key="temp_ak"),
    "fs.s3a.secret.key": dbutils.jobs.taskValues.get(taskKey="Init_Auth", key="temp_sk"),
    "fs.s3a.session.token": dbutils.jobs.taskValues.get(taskKey="Init_Auth", key="temp_token"),
    "fs.s3a.aws.credentials.provider": "org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider"
}

# 2. Define Table Registry (Single Source of Truth)
# Maps the Managed Table Name -> The S3 Source Path
sync_config = [
    {
        "table_name": "bronze_audit",
        "s3_path": "s3a://mzon-to-databricks-5482/bronze/source=fmp/",
        "partition_col": "date" # Explicitly defining the structure to copy
    },
    {
        "table_name": "silver_audit",
        "s3_path": "s3a://mzon-to-databricks-5482/silver/income_statement/valid",
        "partition_col": "date"
    },
    {
        "table_name": "quarantine_audit",
        "s3_path": "s3a://mzon-to-databricks-5482/quarantine/company_financials/",
        "partition_col": "date"
    }
]

# 3. The Optimized Sync Function
def sync_s3_to_managed(table_config):
    table_name = table_config["table_name"]
    path = table_config["s3_path"]
    part_col = table_config.get("partition_col")

    try:
        print(f"ðŸ”„ Syncing {table_name}...")
        
        # READ (Securely from S3)
        df = (spark.read.format("delta")
              .options(**aws_creds)
              .load(path))

        # WRITE (To Managed Table)
        # OPTIMIZATION: We add .partitionBy() to ensure the managed table 
        # has the exact same physical structure as S3.
        writer = df.write.mode("overwrite").format("delta")
        
        if part_col:
            writer = writer.partitionBy(part_col)
            
        writer.saveAsTable(table_name)
        
        print(f"Success: {table_name} synced with partitioning on '{part_col}'.")

    except Exception as e:
        print(f"Skipped {table_name}: {str(e)}")

# 4. Execute Loop
for config in sync_config:
    sync_s3_to_managed(config)

A. Check Bronze (Raw History)

In [0]:
%sql
-- 1. How many raw records do we have?
SELECT count(*) as total_bronze_rows FROM v_bronze;

-- 2. Do we have duplicates? (Should be YES if you ran the job twice)
SELECT symbol, date, count(*) as duplicate_count  , eps
FROM v_bronze 
GROUP BY symbol, date ,eps
HAVING count(*) > 1;

B. Check Silver (The "Truth")

In [0]:
%sql
-- 1. Verify Uniqueness (Should return 0 rows)
SELECT symbol, date, count(*) 
FROM v_silver 
GROUP BY symbol, date 
HAVING count(*) > 1;

-- 2. Check Data Types & Values
SELECT symbol, date, revenue, eps, reportedCurrency
FROM v_silver
ORDER BY date DESC
LIMIT 10;

In [0]:
%sql
-- 1. Why are rows failing?
SELECT date, symbol, _failed_cols, revenue, eps
FROM v_quarantine
LIMIT 20;

-- 2. Which error is most common?
SELECT _failed_cols, count(*) as error_count
FROM v_quarantine
GROUP BY _failed_cols;