In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import from_json, col
from pyspark.sql.types import StructType, StringType, DoubleType


KAFKA_BROKER = "kafka:9092"  # Update with your Kafka broker address
KAFKA_TOPIC = "your_topic"  # Update with your Kafka topic name

# Initialize SparkSession with Kafka dependency
spark = SparkSession.builder \
    .appName("KafkaSparkStreaming") \
    .master("local[*]") \
    .config("spark.jars.packages", "org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.2,org.apache.kafka:kafka-clients:3.3.1") \
    .getOrCreate()

print("Spark session initialized.")

# Define schema
schema = StructType() \
    .add("id", StringType()) \
    .add("timestamp", StringType()) \
    .add("value", DoubleType())

# Read Kafka stream
kafka_stream = spark.readStream \
    .format("kafka") \
    .option("kafka.bootstrap.servers", KAFKA_BROKER) \
    .option("subscribe", KAFKA_TOPIC) \
    .option("startingOffsets", "earliest") \
    .load()

# Extract and parse message value
parsed_stream = kafka_stream.selectExpr("CAST(value AS STRING)") \
    .select(from_json(col("value"), schema).alias("data")) \
    .select("data.*")

# Output the parsed stream to console
query = parsed_stream.writeStream \
    .outputMode("append") \
    .format("console") \
    .start()

query.awaitTermination()
