In [None]:
import os
from pyspark.sql import SparkSession
import boto3
from botocore.client import Config

# --- ETAPA 0: Criar o bucket no MinIO programaticamente ---
s3_client = boto3.client(
    's3',
    endpoint_url='http://minio:9000',
    aws_access_key_id='minioadmin',
    aws_secret_access_key='minioadmin',
    config=Config(signature_version='s3v4')
)

bucket_name = 'iceberg-warehouse'
try:
    s3_client.head_bucket(Bucket=bucket_name)
    print(f"Bucket '{bucket_name}' já existe.")
except s3_client.exceptions.ClientError as e:
    if e.response['Error']['Code'] == '404':
        s3_client.create_bucket(Bucket=bucket_name)
        print(f"Bucket '{bucket_name}' criado com sucesso.")
    else:
        raise

# --- Configurações Essenciais do Spark ---
spark_jars_packages = [
    "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.5.0",
    "org.apache.hadoop:hadoop-aws:3.3.4",
    "software.amazon.awssdk:bundle:2.17.257",
    "org.postgresql:postgresql:42.6.0",
    "org.mongodb.spark:mongo-spark-connector_2.12:10.2.1"
]

# --- Inicialização da SparkSession ---
spark = SparkSession.builder \
    .appName("MBA_DataCollect_Lab") \
    .config("spark.jars.packages", ",".join(spark_jars_packages)) \
    .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") \
    .config("spark.sql.defaultCatalog", "minio_catalog") \
    .config("spark.sql.catalog.minio_catalog", "org.apache.iceberg.spark.SparkCatalog") \
    .config(f"spark.sql.catalog.minio_catalog.warehouse", f"s3a://{bucket_name}/") \
    .config("spark.sql.catalog.minio_catalog.io-impl", "org.apache.iceberg.aws.s3.S3FileIO") \
    .config("spark.hadoop.fs.s3a.endpoint", "http://minio:9000") \
    .config("spark.hadoop.fs.s3a.access.key", "minioadmin") \
    .config("spark.hadoop.fs.s3a.secret.key", "minioadmin") \
    .config("spark.hadoop.fs.s3a.path.style.access", "true") \
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
    .getOrCreate()

print("Spark Session e Catálogo Iceberg (MinIO) configurados com sucesso!")