-
Notifications
You must be signed in to change notification settings - Fork 226
/
Copy pathBenchPadAndStack.cpp
82 lines (64 loc) · 2.89 KB
/
BenchPadAndStack.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "BenchUtils.hpp"
#include <cvcuda/OpPadAndStack.hpp>
#include <nvbench/nvbench.cuh>
template<typename T>
inline void PadAndStack(nvbench::state &state, nvbench::type_list<T>)
try
{
long3 srcShape = benchutils::GetShape<3>(state.get_string("shape"));
long varShape = state.get_int64("varShape");
long3 dstShape = srcShape;
NVCVBorderType borderType = benchutils::GetBorderType(state.get_string("border"));
float borderValue{0.f};
state.add_global_memory_reads(srcShape.x * srcShape.y * srcShape.z * sizeof(T) + srcShape.x * sizeof(int) * 2);
state.add_global_memory_writes(dstShape.x * dstShape.y * dstShape.z * sizeof(T));
cvcuda::PadAndStack op;
// clang-format off
nvcv::Tensor dst({{dstShape.x, dstShape.y, dstShape.z, 1}, "NHWC"}, benchutils::GetDataType<T>());
nvcv::Tensor top({{srcShape.x, 1, 1, 1}, "NHWC"}, nvcv::TYPE_S32);
nvcv::Tensor left({{srcShape.x, 1, 1, 1}, "NHWC"}, nvcv::TYPE_S32);
benchutils::FillTensor<int>(top, [&srcShape](const long4 &){ return srcShape.y / 2; });
benchutils::FillTensor<int>(left, [&srcShape](const long4 &){ return srcShape.z / 2; });
if (varShape < 0) // negative var shape means use Tensor
{
throw std::invalid_argument("Tensor not implemented for this operator");
}
else // zero and positive var shape means use ImageBatchVarShape
{
nvcv::ImageBatchVarShape src(srcShape.x);
benchutils::FillImageBatch<T>(src, long2{srcShape.z, srcShape.y}, long2{varShape, varShape},
benchutils::RandomValues<T>());
state.exec(nvbench::exec_tag::sync,
[&op, &src, &dst, &top, &left, &borderType, &borderValue](nvbench::launch &launch)
{
op(launch.get_stream(), src, dst, top, left, borderType, borderValue);
});
}
}
catch (const std::exception &err)
{
state.skip(err.what());
}
// clang-format on
using PadAndStackTypes = nvbench::type_list<uint8_t, float>;
NVBENCH_BENCH_TYPES(PadAndStack, NVBENCH_TYPE_AXES(PadAndStackTypes))
.set_type_axes_names({"InOutDataType"})
.add_string_axis("shape", {"1x1080x1920"})
.add_int64_axis("varShape", {0})
.add_string_axis("border", {"REFLECT101"});