In [12]:
from pyspark.sql import SparkSession

catalog_name = "rainbow-data-production-iceberg"
spark = SparkSession.builder.appName("Spark-Iceberg") \
    .config("spark.driver.userClassPathFirst", "false") \
    .config("spark.executor.userClassPathFirst", "false") \
    .config(f'spark.sql.catalog.{catalog_name}', 'org.apache.iceberg.spark.SparkCatalog') \
    .config(f'spark.sql.catalog.{catalog_name}.type', 'rest') \
    .config(f'spark.sql.catalog.{catalog_name}.uri', 'https://biglake.googleapis.com/iceberg/v1/restcatalog') \
    .config(f'spark.sql.catalog.{catalog_name}.warehouse', 'bq://projects/rainbow-data-production-483609') \
    .config(f'spark.sql.catalog.{catalog_name}.header.x-goog-user-project', 'rainbow-data-production-483609') \
    .config(f'spark.sql.catalog.{catalog_name}.rest.auth.type', 'org.apache.iceberg.gcp.auth.GoogleAuthManager') \
    .config(f'spark.sql.catalog.{catalog_name}.io-impl', 'org.apache.iceberg.gcp.gcs.GCSFileIO') \
    .config(f'spark.sql.catalog.{catalog_name}.rest-metrics-reporting-enabled', 'false') \
    .config('spark.sql.extensions', 'org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions') \
    .config('spark.sql.defaultCatalog', 'rainbow-data-production-iceberg') \
    .config(
        "spark.jars.packages",
        ",".join([
            "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.10.1",
            "org.apache.iceberg:iceberg-gcp-bundle:1.10.1",
            "com.google.auth:google-auth-library-oauth2-http:1.41.0",
            "com.google.auth:google-auth-library-credentials:1.41.0",
            "com.google.guava:guava:32.1.2-jre",
            "com.google.cloud:google-cloud-storage:2.61.0",
            "com.google.cloud:libraries-bom:26.73.0",
        ])
    ) \
  .getOrCreate()


In [2]:
spark.sql("SHOW CATALOGS").show()

+--------------------+
|             catalog|
+--------------------+
|rainbow-data-prod...|
|       spark_catalog|
+--------------------+



In [3]:
spark.sql("SELECT current_catalog();").show()

+--------------------+
|   current_catalog()|
+--------------------+
|rainbow-data-prod...|
+--------------------+



In [4]:
# spark.sql("CREATE NAMESPACE IF NOT EXISTS test_namespace ;")
spark.sql("CREATE NAMESPACE IF NOT EXISTS test_namespace1 LOCATION 'gs://rainbow-data-production-iceberg/test_namespace1' WITH DBPROPERTIES ('gcp-region' = 'us-central1');")

DataFrame[]

In [5]:
spark.sql("SHOW NAMESPACES").show()

+---------------+
|      namespace|
+---------------+
|           test|
| test_namespace|
|test_namespace1|
+---------------+



In [9]:
spark.sql("USE test_namespace;")

DataFrame[]

In [13]:
spark.sql("CREATE TABLE IF NOT EXISTS sample_table (id BIGINT, data STRING) USING ICEBERG;")

DataFrame[]

In [14]:
spark.sql("SHOW TABLES").show()

+--------------+------------+-----------+
|     namespace|   tableName|isTemporary|
+--------------+------------+-----------+
|test_namespace|sample_table|      false|
+--------------+------------+-----------+



In [15]:
spark.sql("""
INSERT INTO sample_table VALUES
  (1, 'first'), (2, 'second'), (3, 'third')
""")

                                                                                

DataFrame[]

In [16]:
spark.sql("SELECT * FROM sample_table").show()

[Stage 2:>                                                          (0 + 1) / 1]

+---+------+
| id|  data|
+---+------+
|  1| first|
|  2|second|
|  3| third|
+---+------+



                                                                                

In [None]:
spark.sql("""
INSERT INTO sample_table VALUES
  (4, 'fourth'), (5, 'fifth'), (6, 'sixth');
""")

                                                                                

DataFrame[]

In [12]:
spark.sql("SELECT * FROM sample_table;").show()

[Stage 5:>                                                          (0 + 1) / 1]

+---+------+
| id|  data|
+---+------+
|  4|fourth|
|  5| fifth|
|  6| sixth|
|  1| first|
|  2|second|
|  3| third|
+---+------+



                                                                                