Description
Apache Iceberg version
1.9.1 (latest release)
Query engine
Spark
Please describe the bug 🐞
zOrder functionality not working with:
(a) either spark APIs
(b) through call function
version used:
org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.9.1
`import org.apache.spark.sql.SparkSession
import org.apache.iceberg.catalog.TableIdentifier
import org.apache.iceberg.hive.HiveCatalog
import org.apache.iceberg.spark.SparkActions
import scala.collection.JavaConverters._
object IcebergRewriteZOrderExample {
def main(args: Array[String]): Unit = {
// Initialize SparkSession with Iceberg extensions enabled
val spark = SparkSession.builder()
.appName("IcebergRewriteZOrder")
.config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions")
.config("spark.sql.catalog.hive_catalog", "org.apache.iceberg.spark.SparkCatalog")
.config("spark.sql.catalog.hive_catalog.type", "hive")
.config("spark.sql.catalog.hive_catalog.warehouse", "s3://your-bucket/warehouse-path/")
.getOrCreate()
// Initialize HiveCatalog programmatically (optional, can also use SparkSession catalog)
val hiveCatalog = new HiveCatalog()
hiveCatalog.setConf(spark.sparkContext.hadoopConfiguration)
val catalogProperties = Map(
"uri" -> "thrift://your-hive-metastore-host:9083",
"warehouse" -> "s3://your-bucket/warehouse-path/"
).asJava
hiveCatalog.initialize("hive_catalog", catalogProperties)
// Define your Iceberg table identifier
val tableName = "your_database.your_table"
val Array(dbName, tblName) = tableName.split("\\.")
val tableId = TableIdentifier.of(dbName, tblName)
// Load the Iceberg table from the catalog
val table = hiveCatalog.loadTable(tableId)
// Columns to use for Z-Order sorting
val zOrderColumns = Array("p_id", "refunds_created_date")
// Perform rewrite data files with Z-Order strategy and target file size 128 MB
SparkActions.get()
.rewriteDataFiles(table)
.zOrder(zOrderColumns: _*) // Use Z-Order strategy
.option("target-file-size-bytes", "134217728") // 128 MB target file size
.execute()
println("Iceberg data files rewritten successfully with Z-Order sorting.")
// Stop SparkSession
spark.stop()
}
}
`
Error:
An error was encountered: <console>: error: value zOrder is not a member of org.apache.iceberg.actions.RewriteDataFiles SparkActions.get().rewriteDataFiles(table).zOrder(zOrderColumns: _*).option("target-file-size-bytes", "134217728").execute()
code for spark.sql():
`val tableName = "your_database.your_table"
val zorderColumns = Seq("refunds_merchant_id", "refunds_created_date")
val sortOrder = s"zorder(${zorderColumns.mkString(",")})"
val sqlCommand =
s"""
|CALL hive_catalog.system.rewrite_data_files(
| table => '$tableName',
| strategy => 'sort',
| sort_order => '$sortOrder',
| options => map('target-file-size-bytes', '134217728')
|)
|""".stripMargin
spark.sql(sqlCommand)
Error:
An error was encountered:
java.lang.NoSuchMethodError: 'void org.apache.spark.sql.catalyst.trees.Origin.(scala.Option, scala.Option)'
at org.apache.spark.sql.catalyst.parser.extensions.IcebergParserUtils$.position(IcebergSqlExtensionsAstBuilder.scala:278)
at org.apache.spark.sql.catalyst.parser.extensions.IcebergParserUtils$.withOrigin(IcebergSqlExtensionsAstBuilder.scala:268)
at org.apache.spark.sql.catalyst.parser.extensions.IcebergSqlExtensionsAstBuilder.visitSingleStatement(IcebergSqlExtensionsAstBuilder.scala:235)
at org.apache.spark.sql.catalyst.parser.extensions.IcebergSqlExtensionsAstBuilder.visitSingleStatement(IcebergSqlExtensionsAstBuilder.scala:58)
at org.apache.spark.sql.catalyst.parser.extensions.IcebergSqlExtensionsParser$SingleStatementContext.accept(IcebergSqlExtensionsParser.java:129)
at org.apache.iceberg.shaded.org.antlr.v4.runtime.tree.AbstractParseTreeVisitor.visit(AbstractParseTreeVisitor.java:18)
at org.apache.spark.sql.catalyst.parser.extensions.IcebergSparkSqlExtensionsParser.$anonfun$parsePlan$1(IcebergSparkSqlExtensionsParser.scala:104)
at org.apache.spark.sql.catalyst.parser.extensions.IcebergSparkSqlExtensionsParser.parse(IcebergSparkSqlExtensionsParser.scala:140)
at org.apache.spark.sql.catalyst.parser.extensions.IcebergSparkSqlExtensionsParser.parsePlan(IcebergSparkSqlExtensionsParser.scala:104)
at org.apache.spark.sql.SparkSession.$anonfun$sql$5(SparkSession.scala:685)
at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:219)
at org.apache.spark.sql.SparkSession.$anonfun$sql$4(SparkSession.scala:684)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:901)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:683)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:714)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:745)
... 54 elided`
Willingness to contribute
- I can contribute a fix for this bug independently
- I would be willing to contribute a fix for this bug with guidance from the Iceberg community
- I cannot contribute a fix for this bug at this time