Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: apache/arrow-rs
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: main
Choose a base ref
...
head repository: cube-js/arrow-rs
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: cube
Choose a head ref
Can’t automatically merge. Don’t worry, you can still create the pull request.
Loading
Showing with 6,626 additions and 2,041 deletions.
  1. +1 −1 .github/workflows/miri.yaml
  2. +54 −54 .github/workflows/rust.yml
  3. +10 −3 arrow-flight/Cargo.toml
  4. +2 −2 arrow-pyarrow-integration-testing/Cargo.toml
  5. +51 −3 arrow/Cargo.toml
  6. +1 −1 arrow/src/alloc/alignment.rs
  7. +65 −0 arrow/src/array/array.rs
  8. +1 −3 arrow/src/array/array_binary.rs
  9. +13 −0 arrow/src/array/array_boolean.rs
  10. +25 −0 arrow/src/array/array_primitive.rs
  11. +50 −0 arrow/src/array/builder.rs
  12. +25 −1 arrow/src/array/data.rs
  13. +10 −0 arrow/src/array/equal/mod.rs
  14. +44 −0 arrow/src/array/mod.rs
  15. +49 −14 arrow/src/array/ord.rs
  16. +69 −3 arrow/src/array/transform/mod.rs
  17. +4 −0 arrow/src/bitmap.rs
  18. +17 −2 arrow/src/compute/kernels/arithmetic.rs
  19. +890 −55 arrow/src/compute/kernels/cast.rs
  20. +167 −52 arrow/src/compute/kernels/cast_utils.rs
  21. +193 −106 arrow/src/compute/kernels/comparison.rs
  22. +1 −0 arrow/src/compute/kernels/filter.rs
  23. +110 −0 arrow/src/compute/kernels/if_op.rs
  24. +881 −0 arrow/src/compute/kernels/merge.rs
  25. +2 −0 arrow/src/compute/kernels/mod.rs
  26. +48 −2 arrow/src/compute/kernels/sort.rs
  27. +3 −3 arrow/src/compute/kernels/substring.rs
  28. +202 −33 arrow/src/compute/kernels/take.rs
  29. +2 −0 arrow/src/csv/writer.rs
  30. +57 −0 arrow/src/datatypes/datatype.rs
  31. +4 −0 arrow/src/datatypes/field.rs
  32. +1 −0 arrow/src/datatypes/mod.rs
  33. +30 −1 arrow/src/datatypes/native.rs
  34. +15 −0 arrow/src/datatypes/numeric.rs
  35. +2 −1 arrow/src/datatypes/schema.rs
  36. +15 −0 arrow/src/datatypes/types.rs
  37. +2 −2 arrow/src/ffi.rs
  38. +36 −6 arrow/src/ipc/convert.rs
  39. +6 −12 arrow/src/ipc/reader.rs
  40. +80 −0 arrow/src/ipc/writer.rs
  41. +1 −1 arrow/src/json/reader.rs
  42. +1 −1 arrow/src/json/writer.rs
  43. +34 −0 arrow/src/util/bit_util.rs
  44. +5 −1 arrow/src/util/display.rs
  45. +1 −0 arrow/src/util/integration_util.rs
  46. +9 −10 arrow/src/util/pretty.rs
  47. +1 −1 arrow/test/dependency/default-features/Cargo.toml
  48. +1 −1 arrow/test/dependency/no-default-features/Cargo.toml
  49. +1 −1 arrow/test/dependency/simd/Cargo.toml
  50. +2 −0 dev/release/rat_exclude_files.txt
  51. +12 −1 integration-testing/Cargo.toml
  52. +45 −7 parquet/Cargo.toml
  53. +146 −39 parquet/src/arrow/array_reader.rs
  54. +167 −4 parquet/src/arrow/arrow_array_reader.rs
  55. +1 −0 parquet/src/arrow/arrow_reader.rs
  56. +87 −2 parquet/src/arrow/arrow_writer.rs
  57. +44 −6 parquet/src/arrow/converter.rs
  58. +9 −0 parquet/src/arrow/levels.rs
  59. +73 −6 parquet/src/arrow/schema.rs
  60. +11 −0 parquet/src/basic.rs
  61. +13 −6 parquet/src/column/page.rs
  62. +2 −2 parquet/src/column/reader.rs
  63. +81 −15 parquet/src/column/writer.rs
  64. +51 −18 parquet/src/data_type.rs
  65. +2 −1 parquet/src/encodings/encoding.rs
  66. +263 −0 parquet/src/file/encryption.rs
  67. +168 −13 parquet/src/file/footer.rs
  68. +78 −15 parquet/src/file/metadata.rs
  69. +5 −0 parquet/src/file/mod.rs
  70. +14 −0 parquet/src/file/properties.rs
  71. +178 −12 parquet/src/file/serialized_reader.rs
  72. +382 −35 parquet/src/file/writer.rs
  73. +6 −2 parquet/src/record/api.rs
  74. +6 −11 parquet/src/schema/parser.rs
  75. +10 −1 parquet/src/schema/types.rs
  76. +1,440 −1,440 parquet/src/util/bit_packing.rs
  77. +0 −1 parquet/src/util/bit_util.rs
  78. +21 −21 parquet/src/util/hash_util.rs
  79. +10 −2 parquet_derive/Cargo.toml
  80. +2 −2 parquet_derive/README.md
  81. +3 −3 parquet_derive_test/Cargo.toml
  82. +2 −0 rust-toolchain.toml
2 changes: 1 addition & 1 deletion .github/workflows/miri.yaml
Original file line number Diff line number Diff line change
@@ -30,7 +30,7 @@ jobs:
strategy:
matrix:
arch: [amd64]
rust: [nightly-2021-07-04]
rust: [nightly-2022-06-22]
steps:
- uses: actions/checkout@v2
with:
108 changes: 54 additions & 54 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
@@ -31,7 +31,7 @@ jobs:
strategy:
matrix:
arch: [amd64]
rust: [stable]
rust: [nightly-2024-01-29]
container:
image: ${{ matrix.arch }}/rust
env:
@@ -73,7 +73,7 @@ jobs:
strategy:
matrix:
arch: [amd64]
rust: [stable]
rust: [nightly-2024-01-29]
container:
image: ${{ matrix.arch }}/rust
env:
@@ -120,61 +120,61 @@ jobs:
cargo run --example read_csv_infer_schema
# test the --features "simd" of the arrow crate. This requires nightly.
linux-test-simd:
name: Test SIMD on AMD64 Rust ${{ matrix.rust }}
runs-on: ubuntu-latest
strategy:
matrix:
arch: [amd64]
rust: [nightly-2021-07-04]
container:
image: ${{ matrix.arch }}/rust
env:
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
ARROW_TEST_DATA: /__w/arrow-rs/arrow-rs/testing/data
steps:
- uses: actions/checkout@v2
with:
submodules: true
- name: Cache Cargo
uses: actions/cache@v2
with:
path: /github/home/.cargo
# this key equals the ones on `linux-build-lib` for re-use
key: cargo-cache-
- name: Cache Rust dependencies
uses: actions/cache@v2
with:
path: /github/home/target
# this key equals the ones on `linux-build-lib` for re-use
key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
- name: Setup Rust toolchain
run: |
rustup toolchain install ${{ matrix.rust }}
rustup default ${{ matrix.rust }}
rustup component add rustfmt
- name: Run tests
run: |
export CARGO_HOME="/github/home/.cargo"
export CARGO_TARGET_DIR="/github/home/target"
cd arrow
cargo test --features "simd"
- name: Check new project build with simd features
run: |
export CARGO_HOME="/github/home/.cargo"
export CARGO_TARGET_DIR="/github/home/target"
cd arrow/test/dependency/simd
cargo check
# linux-test-simd:
# name: Test SIMD on AMD64 Rust ${{ matrix.rust }}
# runs-on: ubuntu-latest
# strategy:
# matrix:
# arch: [amd64]
# rust: []
# container:
# image: ${{ matrix.arch }}/rust
# env:
# # Disable full debug symbol generation to speed up CI build and keep memory down
# # "1" means line tables only, which is useful for panic tracebacks.
# RUSTFLAGS: "-C debuginfo=1"
# ARROW_TEST_DATA: /__w/arrow-rs/arrow-rs/testing/data
# steps:
# - uses: actions/checkout@v2
# with:
# submodules: true
# - name: Cache Cargo
# uses: actions/cache@v2
# with:
# path: /github/home/.cargo
# # this key equals the ones on `linux-build-lib` for re-use
# key: cargo-cache-
# - name: Cache Rust dependencies
# uses: actions/cache@v2
# with:
# path: /github/home/target
# # this key equals the ones on `linux-build-lib` for re-use
# key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }}
# - name: Setup Rust toolchain
# run: |
# rustup toolchain install ${{ matrix.rust }}
# rustup default ${{ matrix.rust }}
# rustup component add rustfmt
# - name: Run tests
# run: |
# export CARGO_HOME="/github/home/.cargo"
# export CARGO_TARGET_DIR="/github/home/target"
# cd arrow
# cargo test --features "simd"
# - name: Check new project build with simd features
# run: |
# export CARGO_HOME="/github/home/.cargo"
# export CARGO_TARGET_DIR="/github/home/target"
# cd arrow/test/dependency/simd
# cargo check

windows-and-macos:
name: Test on ${{ matrix.os }} Rust ${{ matrix.rust }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-latest, macos-latest]
rust: [stable]
rust: [nightly-2024-01-29]
steps:
- uses: actions/checkout@v2
with:
@@ -202,7 +202,7 @@ jobs:
strategy:
matrix:
arch: [amd64]
rust: [stable]
rust: [nightly-2024-01-29]
container:
image: ${{ matrix.arch }}/rust
env:
@@ -257,7 +257,7 @@ jobs:
strategy:
matrix:
arch: [amd64]
rust: [stable]
rust: [nightly-2024-01-29]
steps:
- uses: actions/checkout@v2
with:
@@ -297,7 +297,7 @@ jobs:
strategy:
matrix:
arch: [amd64]
rust: [nightly-2021-07-04]
rust: [nightly-2024-01-29]
container:
image: ${{ matrix.arch }}/rust
env:
@@ -341,7 +341,7 @@ jobs:
strategy:
matrix:
arch: [amd64]
rust: [stable]
rust: [nightly-2024-01-29]
container:
image: ${{ matrix.arch }}/rust
env:
13 changes: 10 additions & 3 deletions arrow-flight/Cargo.toml
Original file line number Diff line number Diff line change
@@ -18,15 +18,15 @@
[package]
name = "arrow-flight"
description = "Apache Arrow Flight"
version = "5.0.0-SNAPSHOT"
version = "5.0.0"
edition = "2018"
authors = ["Apache Arrow <dev@arrow.apache.org>"]
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
license = "Apache-2.0"

[dependencies]
arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT" }
arrow = { path = "../arrow", version = "5.0.0" }
base64 = "0.13"
tonic = "0.4"
bytes = "1"
@@ -41,8 +41,15 @@ futures = { version = "0.3", default-features = false, features = ["alloc"]}
tonic-build = "0.4"
# Pin specific version of the tonic-build dependencies to avoid auto-generated
# (and checked in) arrow.flight.protocol.rs from changing
proc-macro2 = "=1.0.27"
proc-macro2 = "1.0.27"

#[lib]
#name = "flight"
#path = "src/lib.rs"

# Just silence all of prolematic rules in original code
# This list should be updated during rebasing on new release, before our commits, but after toolchain updates
# That way we would disable only the rules that are violated by upstream code, but not by ours

[lints.clippy]
needless_borrow = "allow"
4 changes: 2 additions & 2 deletions arrow-pyarrow-integration-testing/Cargo.toml
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@
[package]
name = "arrow-pyarrow-integration-testing"
description = ""
version = "5.0.0-SNAPSHOT"
version = "5.0.0"
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
authors = ["Apache Arrow <dev@arrow.apache.org>"]
@@ -31,7 +31,7 @@ name = "arrow_pyarrow_integration_testing"
crate-type = ["cdylib"]

[dependencies]
arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT" }
arrow = { path = "../arrow", version = "5.0.0" }
pyo3 = { version = "0.12.1", features = ["extension-module"] }

[package.metadata.maturin]
54 changes: 51 additions & 3 deletions arrow/Cargo.toml
Original file line number Diff line number Diff line change
@@ -17,7 +17,7 @@

[package]
name = "arrow"
version = "5.0.0-SNAPSHOT"
version = "5.0.0"
description = "Rust implementation of Apache Arrow"
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
@@ -52,7 +52,7 @@ packed_simd = { version = "0.3", optional = true, package = "packed_simd_2" }
chrono = "0.4"
flatbuffers = { version = "=2.0.0", optional = true }
hex = "0.4"
prettytable-rs = { version = "0.8.0", optional = true }
comfy-table = { version = "4.0", optional = true, default-features = false }
lexical-core = "^0.7"
multiversion = "0.6.1"
bitflags = "1.2.1"
@@ -63,7 +63,7 @@ avx512 = []
csv = ["csv_crate"]
ipc = ["flatbuffers"]
simd = ["packed_simd"]
prettyprint = ["prettytable-rs"]
prettyprint = ["comfy-table"]
js = ["getrandom/js"]
# The test utils feature enables code used in benchmarks and tests but
# not the core arrow code itself
@@ -76,6 +76,7 @@ memory-check = []
[dev-dependencies]
criterion = "0.3"
flate2 = "1"
rand = "0.8"
tempfile = "3"

[build-dependencies]
@@ -163,3 +164,50 @@ harness = false
[[bench]]
name = "buffer_create"
harness = false


# Just silence all of prolematic rules in original code
# This list should be updated during rebasing on new release, before our commits, but after toolchain updates
# That way we would disable only the rules that are violated by upstream code, but not by ours

[lints.rust]
deprecated = "allow"
unreachable_patterns = "allow"
unused_imports = "allow"
unused_must_use = "allow"

[lints.clippy]
# Can drop this rule after rebase on commit 55d6073 "Require Send+Sync bounds for Allocation trait (#1945)", first released in 18.0.0
arc_with_non_send_sync = "allow"
borrow_deref_ref = "allow"
derivable_impls = "allow"
get_first = "allow"
err_expect = "allow"
extra_unused_lifetimes = "allow"
extra_unused_type_parameters = "allow"
into_iter_on_ref = "allow"
manual_bits = "allow"
manual_slice_size_calculation = "allow"
map_flatten = "allow"
needless_borrow = "allow"
needless_borrowed_reference = "allow"
needless_borrows_for_generic_args = "allow"
needless_late_init = "allow"
needless_lifetimes = "allow"
needless_question_mark = "allow"
needless_return = "allow"
non_canonical_partial_ord_impl = "allow"
non_minimal_cfg = "allow"
nonminimal_bool = "allow"
only_used_in_recursion = "allow"
partialeq_to_none = "allow"
redundant_closure = "allow"
redundant_closure_call = "allow"
seek_from_current = "allow"
suspicious_doc_comments = "allow"
to_string_in_format_args = "allow"
unnecessary_cast = "allow"
unnecessary_fallible_conversions = "allow"
unwrap_or_default = "allow"
useless_conversion = "allow"
useless_vec = "allow"
2 changes: 1 addition & 1 deletion arrow/src/alloc/alignment.rs
Original file line number Diff line number Diff line change
@@ -62,7 +62,7 @@ pub const ALIGNMENT: usize = 1 << 6;
// - https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/riscv/sifive-l2-cache.txt#L41
// in general all of them are the same.
/// Cache and allocation multiple alignment size
#[cfg(target_arch = "riscv")]
#[cfg(target_arch = "riscv64")]
pub const ALIGNMENT: usize = 1 << 6;

// This size is same across all hardware for this architecture.
Loading