In [None]:
# import polars

import polars as pl

In [None]:
# use pyiceberg to get the location of the table

from pyiceberg.catalog.hive import HiveCatalog

catalog = HiveCatalog(name="iceberg", uri="thrift://hivemetastore:9083")
table = catalog.load_table("default.bank_transfers")
table_location = table.metadata_location

In [None]:
# perform a scan of the table

bank_transfers = pl.scan_iceberg(table_location)

In [None]:
# use SQL to query the table through the variable created before

pl.sql(
    """
    SELECT * FROM bank_transfers
    """
).collect()

In [None]:
# use SQL for self-referential query

bank_transfers.sql(
    """
    SELECT * FROM self
    """
).collect()

In [None]:
# A filtering query using the DataFrame API

bank_transfers.filter(
    pl.col('transferred_from') == "Charles B."
).collect()

In [None]:
# A basic analytical query

pl.sql(
    """
    SELECT
        transferred_to,
        COUNT(*) AS count,
        SUM(amount) AS total_transfers
    FROM bank_transfers
    GROUP BY transferred_to
    """
).collect()

In [None]:
# Is it really evaluated lazily?

df = pl.sql(
    """
    SELECT
        transferred_to,
        COUNT(*) AS count,
        SUM(amount) AS total_transfers
    FROM bank_transfers
    GROUP BY transferred_to
    """
)
type(df)

In [None]:
# What does it contain, then?

df.explain()

In [None]:
# Let's see the results again!

df.collect()

In [None]:
# What about time travel?

table_log = table.metadata.metadata_log
table_log

In [None]:
pl.scan_iceberg(
    table_log[4].metadata_file
).collect()