In [None]:
%%pyspark project.spark.fine-grained
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.streaming.Trigger
import org.apache.spark.sql.functions._
import com.amazonaws.services.glue.GlueContext
import org.apache.spark.SparkContext

object KinesisToIcebergStreaming {
  def main(args: Array[String]): Unit = {
    // Initialize Spark Session with Iceberg support
    val spark = SparkSession.builder()
      .appName("Kinesis-To-Iceberg-Streaming")
      .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions")
      .config("spark.sql.catalog.spark_catalog", "org.apache.iceberg.spark.SparkSessionCatalog")
      .config("spark.sql.catalog.spark_catalog.type", "hive")
      .config("spark.sql.catalog.aws_catalog", "org.apache.iceberg.spark.SparkCatalog")
      .config("spark.sql.catalog.aws_catalog.catalog-impl", "org.apache.iceberg.aws.glue.GlueCatalog")
      .config("spark.sql.catalog.aws_catalog.warehouse", "s3://your-bucket/warehouse/")
      .getOrCreate()

    // Read from Kinesis Data Stream
    val streamingDF = spark.readStream
      .format("kinesis")
      .option("streamName", "your-stream-name")
      .option("endpointUrl", "https://kinesis.your-region.amazonaws.com")
      .option("startingPosition", "TRIM_HORIZON") // or LATEST
      .option("awsSTSRoleARN", "arn:aws:iam::account:role/role-name") // if using role-based auth
      .load()

    // Assuming the data is in JSON format, parse it
    // Modify this according to your data structure
    val parsedDF = streamingDF
      .selectExpr("cast (data as STRING) as json_data")
      .select(from_json(col("json_data"), 
        // Define your schema here
        "struct<id:string, timestamp:timestamp, value:double>"
      ).as("data"))
      .select("data.*")

    // Write to Iceberg table
    val query = parsedDF.writeStream
      .format("iceberg")
      .outputMode("append")
      .option("path", "aws_catalog.your_database.your_table")
      .option("checkpointLocation", "s3://your-bucket/checkpoints/")
      .trigger(Trigger.ProcessingTime("1 minute"))
      .start()

    query.awaitTermination()
  }
}