In [None]:
import yaml
import nbimporter
import pyspark
from pyspark.sql import SparkSession

def create_spark_session(config_file, app_name)->SparkSession:
   try:  
        with open(config_file,"r") as file:
            config=yaml.safe_load(file)
            catalog_uri = config['spark']['catalog_uri'] 
            warehouse = config['spark']['warehouse']     # Minio Address to Write to
            storage_uri = config['spark']['storage_uri'] # Minio IP address from docker inspec
            spark_master_uri = config['spark']['spark_master_uri'] # Minio IP address from docker inspec
        
        # Configure Spark with necessary packages and Iceberg/Nessie settings
        conf = (
            pyspark.SparkConf()
                .setAppName(app_name)
                # Include necessary packages
                .set('spark.jars.packages',
                     'org.postgresql:postgresql:42.7.3,'
                     'org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.5.0,'
                     'org.projectnessie.nessie-integrations:nessie-spark-extensions-3.5_2.12:0.77.1,'             
                     # awssdk 2.29.42 compatible with spark 3.5.4
                     'software.amazon.awssdk:bundle:2.24.8,'
                     'software.amazon.awssdk:url-connection-client:2.24.8')
                # Enable Iceberg and Nessie extensions
                .set('spark.sql.extensions', 
                     'org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,'
                     'org.projectnessie.spark.extensions.NessieSparkSessionExtensions')
                # Configure Nessie catalog
                .set('spark.sql.catalog.nessie', 'org.apache.iceberg.spark.SparkCatalog')
                .set('spark.sql.catalog.nessie.uri', catalog_uri)
                .set('spark.sql.catalog.nessie.ref', 'main')
                .set('spark.sql.catalog.nessie.authentication.type', 'NONE')
                .set('spark.sql.catalog.nessie.catalog-impl', 'org.apache.iceberg.nessie.NessieCatalog')
                # Set Minio as the S3 endpoint for Iceberg storage
                .set('spark.sql.catalog.nessie.s3.endpoint', storage_uri)
                .set('spark.sql.catalog.nessie.warehouse', warehouse)
                .set('spark.sql.catalog.nessie.io-impl', 'org.apache.iceberg.aws.s3.S3FileIO')
                # Set master location, the job will be sent to the cluster
                # .set('spark.master', spark_master_uri)
                .set("spark.network.timeout", "50000s")
                .set("spark.executor.heartbeatInterval", "60s")
                .set("spark.task.maxFailures", "4") 
        )   
        
        # Start Spark session
        spark = SparkSession.builder.config(conf=conf).getOrCreate()
        
        return spark

   except Exception as e:
        print(f"Error: {e}")