Skip to content

Commit

Permalink
PARQUET-600: Add benchmarks for RLE-Level encoding
Browse files Browse the repository at this point in the history
Author: Uwe L. Korn <uwelk@xhochy.com>

Closes apache#95 from xhochy/parquet-600 and squashes the following commits:

87882fd [Uwe L. Korn] Use MaxBufferSize for size estimation
0fd2a34 [Uwe L. Korn] PARQUET-600: Add benchmarks for RLE-Level encoding

Change-Id: I9fa9bf6bb5f08ac760b5cf6c9f65b797c8bb7444
  • Loading branch information
xhochy authored and wesm committed May 17, 2016
1 parent 449e6da commit 5c12d2b
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 0 deletions.
1 change: 1 addition & 0 deletions cpp/src/parquet/column/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ ADD_PARQUET_TEST(properties-test)
ADD_PARQUET_TEST(scanner-test)

ADD_PARQUET_BENCHMARK(column-io-benchmark)
ADD_PARQUET_BENCHMARK(level-benchmark)
69 changes: 69 additions & 0 deletions cpp/src/parquet/column/level-benchmark.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "benchmark/benchmark.h"

#include "parquet/column/levels.h"
#include "parquet/util/buffer.h"

namespace parquet {

namespace benchmark {

static void BM_RleEncoding(::benchmark::State& state) {
// TODO: More than just all 0s
std::vector<int16_t> levels(state.range_x(), 0);
int16_t max_level = 1;
int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, levels.size());
auto buffer_rle = std::make_shared<OwnedMutableBuffer>(rle_size);

while (state.KeepRunning()) {
LevelEncoder level_encoder;
level_encoder.Init(Encoding::RLE, max_level, levels.size(),
buffer_rle->mutable_data(), buffer_rle->size());
level_encoder.Encode(levels.size(), levels.data());
}
state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(int16_t));
}

BENCHMARK(BM_RleEncoding)->Range(1024, 65536);

static void BM_RleDecoding(::benchmark::State& state) {
LevelEncoder level_encoder;
// TODO: More than just all 0s
std::vector<int16_t> levels(state.range_x(), 0);
int16_t max_level = 1;
int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, levels.size());
auto buffer_rle = std::make_shared<OwnedMutableBuffer>(rle_size);
level_encoder.Init(Encoding::RLE, max_level, levels.size(), buffer_rle->mutable_data(),
buffer_rle->size());
level_encoder.Encode(levels.size(), levels.data());

while (state.KeepRunning()) {
LevelDecoder level_decoder;
level_decoder.SetData(Encoding::RLE, max_level, levels.size(), buffer_rle->data());
level_decoder.Decode(state.range_x(), levels.data());
}

state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(int16_t));
}

BENCHMARK(BM_RleDecoding)->Range(1024, 65536);

} // namespace benchmark

} // namespace parquet

0 comments on commit 5c12d2b

Please sign in to comment.