In [None]:
# create the 'warehouse' S3 bucket

import boto3


s3_resource = boto3.resource('s3', 
    endpoint_url='http://minio:9000',
    aws_access_key_id='minioadmin',
    aws_secret_access_key='minioadmin',
    aws_session_token=None,
    config=boto3.session.Config(signature_version='s3v4'),
    verify=False,
)

# if it fails with BucketAlreadyOwnedByYou, it means the bucket is already there
try:
    s3_resource.Bucket("warehouse").create()
except Exception as e:
    print(e)

In [None]:
# bootstrap the catalog

import requests


r = requests.post("http://lakekeeper:8181/management/v1/bootstrap", json={"accept-terms-of-use": True})
r.json()

In [None]:
# initialise the 'iceberg' warehouse in Lakekeeper

import requests

payload = {
  "warehouse-name": "iceberg",
  "project-id": "00000000-0000-0000-0000-000000000000",
  "storage-profile": {
    "type": "s3",
    "bucket": "warehouse",
    "key-prefix": "iceberg",
    "assume-role-arn": None,
    "endpoint": "http://minio:9000",
    "region": "eu-central-1",
    "path-style-access": True,
    "flavor": "minio",
    "sts-enabled": True,
  },
  "storage-credential": {
    "type": "s3",
    "credential-type": "access-key",
    "aws-access-key-id": "minioadmin",
    "aws-secret-access-key": "minioadmin"
  }
}

r = requests.post("http://lakekeeper:8181/management/v1/warehouse", json=payload)
r.json()

In [1]:
# check the config of the created warehouse, in particular, its prefix

import requests

r = requests.get("http://lakekeeper:8181/catalog/v1/config?warehouse=iceberg")
r.json()

{'overrides': {'uri': 'http://lakekeeper:8181/catalog'},
 'defaults': {'prefix': 'a7bd7196-4a24-11f0-9747-6722da9e9d8e',
  'rest-page-size': '100'},
 'endpoints': ['GET /v1/config',
  'GET /v1/{prefix}/namespaces',
  'HEAD /v1/{prefix}/namespaces/{namespace}',
  'POST /v1/{prefix}/namespaces',
  'GET /v1/{prefix}/namespaces/{namespace}',
  'DELETE /v1/{prefix}/namespaces/{namespace}',
  'POST /v1/{prefix}/namespaces/{namespace}/properties',
  'GET /v1/{prefix}/namespaces/{namespace}/tables',
  'POST /v1/{prefix}/namespaces/{namespace}/tables',
  'GET /v1/{prefix}/namespaces/{namespace}/tables/{table}',
  'POST /v1/{prefix}/namespaces/{namespace}/tables/{table}',
  'DELETE /v1/{prefix}/namespaces/{namespace}/tables/{table}',
  'HEAD /v1/{prefix}/namespaces/{namespace}/tables/{table}',
  'GET /v1/{prefix}/namespaces/{namespace}/tables/{table}/credentials',
  'POST /v1/{prefix}/tables/rename',
  'POST /v1/{prefix}/namespaces/{namespace}/register',
  'POST /v1/{prefix}/namespaces/{namespac

In [32]:
# check that Spark client works

from pyspark.sql import SparkSession


spark = (
    SparkSession.builder
        .config(
            "spark.sql.extensions",
            "org.projectnessie.spark.extensions.NessieSparkSessionExtensions, org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions"
        )
        .config("spark.sql.catalog.iceberg", "org.apache.iceberg.spark.SparkCatalog")
        .config("spark.sql.catalog.iceberg.io-impl", "org.apache.iceberg.aws.s3.S3FileIO")
        .config("spark.sql.catalog.iceberg.type", "rest")
        .config("spark.sql.catalog.iceberg.uri", "http://lakekeeper:8181/catalog/")
        .config("spark.sql.catalog.iceberg.warehouse", "iceberg")
        .config("spark.sql.catalog.iceberg.ref", "main")
        .config("spark.sql.catalog.iceberg.cache-enabled", False)
        .getOrCreate()
)
spark.sparkContext.setLogLevel('ERROR')

# spark.sql("""
#     CREATE NAMESPACE IF NOT EXISTS my_namespace
# """).show()

#spark.sql("""
#     SHOW NAMESPACES
#""").show()

spark.sql("""
    USE default
""")
spark.sql("""
    SHOW SCHEMAS
""").show()

+------------+
|   namespace|
+------------+
|     default|
|my_namespace|
+------------+



In [None]:
# check that Trino client works

from trino.dbapi import connect


trino_connection = connect(
    host="trino",
    port=8080,
    user="trino",
)
trino = trino_connection.cursor()
trino.execute("SHOW SCHEMAS FROM iceberg")

rows = trino.fetchall()
print(rows)

In [None]:
# finally, let's check that avro tools work

!java -jar /usr/bin/avro.jar