In [0]:
spark.conf.set("spark.sql.ansi.enabled", "false")

LANDING_PATH = "abfss://landing-dev@stcryptomedallion.dfs.core.windows.net/"
BRONZE_PATH = "abfss://bronze-dev@stcryptomedallion.dfs.core.windows.net/"

In [0]:
from pyspark.sql.functions import input_file_name, regexp_extract, col, lit

def autoload_interval(exchange: str, interval: str):
    try:
        path = f"{LANDING_PATH}{exchange}/"
        checkpoint = f"{BRONZE_PATH}_checkpoints/{exchange}_{interval}"
        table = f"crypto.bronze.{exchange}_{interval}"
        
        df = (
            spark.readStream
            .format("cloudFiles")
            .option("cloudFiles.format", "json")
            .option("cloudFiles.schemaLocation", f"{BRONZE_PATH}{exchange}/_schema")  # Shared schema
            .option("cloudFiles.inferColumnTypes", "true")
            .option("cloudFiles.rescuedDataColumn", "_rescued_data")
            .option("recursiveFileLookup", "true")
            .option("multiLine", "true")
            .load(path)
        )
        
        df_with_meta = df.withColumn("_input_file", input_file_name()) \
            .withColumn("file_date", regexp_extract(col("_input_file"), r"date=(\d{4}-\d{2}-\d{2})", 1)) \
            .withColumn("file_hour", regexp_extract(col("_input_file"), r"hour=(\d{2})", 1)) \
            .withColumn("file_minute", regexp_extract(col("_input_file"), r"\d{8}_\d{2}(\d{2})\d{2}", 1)) \
            .withColumn("file_second", regexp_extract(col("_input_file"), r"\d{8}_\d{4}(\d{2})", 1)) \
            .withColumn("file_interval", regexp_extract(col("_input_file"), r"_(\d+[mhd])\.json", 1)) \
            .withColumn("exchange", lit(exchange))
        
        df_filtered = df_with_meta.filter(col("file_interval") == interval)
        

        stream = (
            df_filtered.writeStream
            .format("delta")
            .option("checkpointLocation", checkpoint)
            .option("mergeSchema", "true")
            .outputMode("append")
            .trigger(availableNow=True)
            .toTable(table)
        )
        
        print(f"✓ Started: {exchange}_{interval}")
        return stream
        
    except Exception as e:
        print(f"✗ Failed: {exchange}_{interval} - {e}")
        return None


EXCHANGES = ["binance", "bybit"]
INTERVALS = ["15m", "1h", "4h", "1d"]


streams = []
for exchange in EXCHANGES:
    for interval in INTERVALS:
        stream = autoload_interval(exchange, interval)
        if stream:
            streams.append(stream)

print(f"\n✓ Successfully started {len(streams)}/{len(EXCHANGES) * len(INTERVALS)} streams")


✓ Started: binance_15m
✓ Started: binance_1h
✓ Started: binance_4h
✓ Started: binance_1d
✓ Started: bybit_15m
✓ Started: bybit_1h
✓ Started: bybit_4h
✓ Started: bybit_1d

✓ Successfully started 8/8 streams
