In [0]:
from pyspark.sql.functions import col
from pyspark.sql.types import DoubleType, StringType, StructType, StructField

# Paths
path_bronze = "abfss://lakehouse@projectabi.dfs.core.windows.net/bronze/open_brewery"

# Catalog and table name
catalog_name = "datricks_airflow_azure"
schema_name = "silver"
table_name = "project_brewery"
full_table_name = f"{catalog_name}.{schema_name}.{table_name}"

# Apply Coments
def adicionaComentariosTabela(catalog, schema, table, table_comment, col_comments):
    spark.sql(f"COMMENT ON TABLE {catalog}.{schema}.{table} IS '{table_comment}'")
    schema_struct = spark.table(f"{catalog}.{schema}.{table}").schema
    for field in schema_struct:
        if field.name in col_comments:
            tipo = field.dataType.simpleString()
            comentario = col_comments[field.name]
            sql = f"ALTER TABLE {catalog}.{schema}.{table} CHANGE COLUMN {field.name} {field.name} {tipo} COMMENT '{comentario}'"
            spark.sql(sql)

table_comment = "Table containing brewery data extracted from the bronze layer and partitioned by state_province."

# Column comments
column_comments = {
    "id": "Unique identifier for the brewery",
    "name": "Name of the brewery",
    "brewery_type": "Classification of the brewery (e.g., micro, nano, brewpub)",
    "address_1": "Primary street address of the brewery",
    "address_2": "Secondary address (optional)",
    "address_3": "Tertiary address (optional)",
    "city": "City where the brewery is located",
    "state_province": "State or province of the brewery",
    "postal_code": "Postal or ZIP code of the brewery address",
    "country": "Country where the brewery is located",
    "longitude": "Longitude coordinate of the brewery location",
    "latitude": "Latitude coordinate of the brewery location",
    "phone": "Phone number of the brewery (if available)",
    "website_url": "Website of the brewery",
    "state": "U.S. state abbreviation (can overlap with state_province)",
    "street": "Full street name (may duplicate address_1)",
}

# Read Bronze Data
df_bronze = spark.read.json(path_bronze)

# Remove duplicates (based on ID)
df_silver = df_bronze.dropDuplicates(["id"])

# Define the schema
schema = StructType([
    StructField("id", StringType(), True),
    StructField("name", StringType(), True),
    StructField("brewery_type", StringType(), True),
    StructField("address_1", StringType(), True),
    StructField("address_2", StringType(), True),
    StructField("address_3", StringType(), True),
    StructField("city", StringType(), True),
    StructField("state_province", StringType(), True),
    StructField("postal_code", StringType(), True),
    StructField("country", StringType(), True),
    StructField("longitude", DoubleType(), True),
    StructField("latitude", DoubleType(), True),
    StructField("phone", StringType(), True),
    StructField("website_url", StringType(), True),
    StructField("state", StringType(), True),
    StructField("street", StringType(), True)
])

# Cast columns dynamically based on the schema
for field in schema.fields:
    df_silver = df_silver.withColumn(field.name, col(field.name).cast(field.dataType))

# Checks if a table exists
def table_exists(catalog, schema, table):
    try:
        spark.table(f"{catalog}.{schema}.{table}")
        return True
    except:
        return False

if table_exists(catalog_name, schema_name, table_name):
    # If exists, insert new data
    df_silver.write \
        .format("delta") \
        .mode("overwrite") \
        .partitionBy("state_province") \
        .saveAsTable(name = f"{catalog_name}.{schema_name}.{table_name}")
    print(f"Overwrite to existing Delta table: {full_table_name}")
else:
     # Create schema if not exists
    spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog_name}.{schema_name}")

    # If not exists, write and register in catalog
    df_silver.write \
        .mode("overwrite") \
        .partitionBy("state_province") \
        .option("overwriteSchema", "true") \
        .saveAsTable(name = f"{catalog_name}.{schema_name}.{table_name}")

    adicionaComentariosTabela(catalog_name, schema_name, table_name, table_comment, column_comments)

    
    print(f"Delta table created and registered: {full_table_name}")
