In [0]:
# -----------------------------------------------
# STEP 1: Configuraciones iniciales
# -----------------------------------------------
from pyspark.sql.types import *
from pyspark.sql.functions import *

# Esquema del mensaje JSON (estructura de la tabla Pagos)
schema = StructType([
    StructField("Id", IntegerType()),
    StructField("ClienteId", StringType()),
    StructField("Monto", DoubleType()),
    StructField("MetodoPago", StringType()),
    StructField("FechaPago", TimestampType()),
    StructField("Estado", StringType())
])

# -----------------------------------------------
# STEP 2: Lectura desde Azure Event Hub
# -----------------------------------------------
event_hub_conn_str = "Endpoint=sb://arquitecturadatosdemoces.servicebus.windows.net/;SharedAccessKeyName=RootManageSharedAccessKey;SharedAccessKey=KcINHqpzcX230cIXqTK2TyU24H2GNhaNq+AEhFiwpxs=;EntityPath=pagos_ces"

eh_conf = {
    'eventhubs.connectionString': sc._jvm.org.apache.spark.eventhubs.EventHubsUtils.encrypt(event_hub_conn_str)
}

eventhub_stream = (
    spark.readStream
    .format("eventhubs")
    .options(**eh_conf)
    .load()
)

# Decodificamos el mensaje y extraemos campos
eventhub_df = (eventhub_stream
    .withColumn("body", col("body").cast("string"))
    .withColumn("enqueuedTime", col("enqueuedTime").cast("timestamp"))
    .withColumn("json", from_json(col("body"), schema))
    .select("json.*", "enqueuedTime")
    .withColumn("source", lit("eventhub"))
    .withColumn("tiempo_diferencia_segundos", (col("enqueuedTime").cast("long") - col("FechaPago").cast("long")))
)

# -----------------------------------------------
# STEP 3: Lectura desde Confluent Cloud Kafka
# -----------------------------------------------
kafka_bootstrap_servers = "pkc-921jm.us-east-2.aws.confluent.cloud:9092"
kafka_topic = "pagos_cdc.bdd_cdc.dbo.Pagos"
kafka_api_key = "F5SH4HSRR7X4TEO6"
kafka_api_secret = "rpyUUD+yeL2qMRyXmcfF47Y0JeQ1K7fpZYP6AZZ/Qe5tFenTL5XIo6JHe1rIIZQ9"

kafka_stream = (spark.readStream
    .format("kafka")
    .option("kafka.bootstrap.servers", kafka_bootstrap_servers)
    .option("subscribe", kafka_topic)
    .option("kafka.security.protocol", "SASL_SSL")
    .option("kafka.sasl.mechanism", "PLAIN")
    .option("kafka.sasl.jaas.config", f'kafkashaded.org.apache.kafka.common.security.plain.PlainLoginModule required username="{kafka_api_key}" password="{kafka_api_secret}";')
    .load()
)

# Procesamos mensajes Kafka
confluent_df = (kafka_stream
    .withColumn("value", col("value").cast("string"))
    .withColumn("timestamp", col("timestamp").cast("timestamp"))
    .withColumn("json", from_json(col("value"), schema))
    .select("json.*", "timestamp")
    .withColumn("source", lit("confluent"))
    .withColumn("tiempo_diferencia_segundos", (col("timestamp").cast("long") - col("FechaPago").cast("long")))
)

# -----------------------------------------------
# STEP 4: Unión de ambos streams
# -----------------------------------------------
unified_df = eventhub_df.unionByName(confluent_df)

# -----------------------------------------------
# STEP 5: Escritura como tabla Delta (streaming)
# -----------------------------------------------
(unified_df.writeStream
    .format("delta")
    .outputMode("append")
    .option("checkpointLocation", "/mnt/delta/pagos_checkpoint/")
    .start("/mnt/delta/pagos_tiempos/")
)
