In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import ArrayType , StringType , StructField , StructType , LongType , DoubleType , IntegerType , BooleanType
from pyspark.sql.window import Window



In [0]:
source_path = "/Volumes/workspace/default/storage/bronze/ticker_data_v2"


In [0]:
# providing a starting version
df = spark.readStream.format("delta") \
  .option("startingVersion", "0") \
  .load(source_path)

In [0]:
df.printSchema()

In [0]:
schema = StructType([
    StructField("s", StringType(), True),
     StructField("currentPrice", DoubleType(), True),
     StructField("change", DoubleType(), True),
     StructField("changePercent", DoubleType(), True),
     StructField("high", DoubleType(), True),
     StructField("low", DoubleType(), True),
     StructField("open", DoubleType(), True),
     StructField("previousClose", DoubleType(), True),
      
])

In [0]:
df = df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)", "topic", "partition", "offset", "timestamp")
df = df.withColumn("ticker" , F.from_json(F.col("value"), schema,options={"mode": "PERMISSIVE", "columnNameOfCorruptRecord": "_corrupt_record"}))
df =df.select("ticker.*" , F.col("timestamp").alias("event_time"))



In [0]:


destination_path = "/Volumes/workspace/default/storage/silver/ticker_data_v5"
# --- 2. THE FIX: PATH MUST INCLUDE THE VOLUME NAME ---
# Path format: /Volumes/<catalog>/<schema>/<VOLUME_NAME>/<folder>
# We added 'storage' because that is the volume we just created in SQL.
checkpoint_path = "/Volumes/workspace/default/storage/checkpoints/job_silver_ticker_checkpoint_v5"

print(f"Streaming Strategy:")
print(f"Checkpoint: {checkpoint_path}")
print(f"Data (S3): {destination_path}")

# --- 3. WRITE STREAM ---
df.writeStream \
    .format("delta") \
    .outputMode("append") \
    .option("checkpointLocation", checkpoint_path) \
    .trigger(availableNow=True) \
    .start(destination_path)