-
Notifications
You must be signed in to change notification settings - Fork 226
/
Copy pathBenchMinMaxLoc.hpp
121 lines (96 loc) · 4.67 KB
/
BenchMinMaxLoc.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef CVCUDA_BENCH_MINMAXLOC_HPP
#define CVCUDA_BENCH_MINMAXLOC_HPP
#include "BenchUtils.hpp"
namespace benchutils {
template<typename VT, typename R = Randomizer<VT>, typename RE = typename R::RE, typename UD = typename R::UD,
class = nvcv::cuda::Require<nvcv::cuda::NumComponents<VT> == 0 && std::is_integral_v<VT>>>
inline auto RandomValuesWithoutMinMax(VT min = nvcv::cuda::TypeTraits<VT>::min,
VT max = nvcv::cuda::TypeTraits<VT>::max, RE rng = DefaultGenerator())
{
return R{UD(min + 1, max - 1), rng};
}
template<typename VT, typename R = Randomizer<VT>, typename RE = typename R::RE>
inline void RandomMinMax(std::vector<uint8_t> &srcVec, const long3 &shape, const long3 &strides, long locs,
RE randomGenerator = DefaultGenerator())
{
long locsPerHeight = static_cast<int>(std::ceil(locs / shape.y));
if (locsPerHeight * 2 >= shape.z)
{
throw std::runtime_error("Locations is bigger than available pixels");
}
for (long x = 0; x < shape.x; ++x)
{
long countLocs = 0;
for (long y = 0; y < shape.y; ++y)
{
long numLocs = (countLocs + locsPerHeight > locs) ? (locs - countLocs) : locsPerHeight;
for (long z = 0; z < numLocs; ++z)
{
ValueAt<VT>(srcVec, strides, long3{x, y, z}) = nvcv::cuda::TypeTraits<VT>::min;
}
for (long z = numLocs; z < numLocs * 2; ++z)
{
ValueAt<VT>(srcVec, strides, long3{x, y, z}) = nvcv::cuda::TypeTraits<VT>::max;
}
std::shuffle(&ValueAt<VT>(srcVec, strides, long3{x, y, 0}),
&ValueAt<VT>(srcVec, strides, long3{x, y, shape.z}), randomGenerator);
countLocs += locsPerHeight;
}
}
}
template<typename VT>
inline void FillTensorWithMinMax(const nvcv::Tensor &tensor, long locations)
{
auto tensorData = tensor.exportData<nvcv::TensorDataStridedCuda>();
CVCUDA_CHECK_DATA(tensorData);
if (tensor.rank() != 3 && tensor.rank() != 4)
{
throw std::invalid_argument("Tensor rank is not 3 or 4");
}
long3 strides{tensorData->stride(0), tensorData->stride(1), tensorData->stride(2)};
long3 shape{tensorData->shape(0), tensorData->shape(1), tensorData->shape(2)};
long bufSize{nvcv::cuda::GetElement(strides, 0) * nvcv::cuda::GetElement(shape, 0)};
std::vector<uint8_t> tensorVec(bufSize);
FillBuffer<VT>(tensorVec, shape, strides, RandomValuesWithoutMinMax<VT>());
RandomMinMax<VT>(tensorVec, shape, strides, locations);
CUDA_CHECK_ERROR(cudaMemcpy(tensorData->basePtr(), tensorVec.data(), bufSize, cudaMemcpyHostToDevice));
}
template<typename VT>
inline void FillImageBatchWithMinMax(nvcv::ImageBatchVarShape &imageBatch, long2 size, long2 varSize, long locations)
{
auto randomWidth = RandomValues<int>(static_cast<int>(size.x - varSize.x), static_cast<int>(size.x));
auto randomHeight = RandomValues<int>(static_cast<int>(size.y - varSize.y), static_cast<int>(size.y));
for (int i = 0; i < imageBatch.capacity(); ++i)
{
nvcv::Image image(nvcv::Size2D{randomWidth(), randomHeight()}, GetFormat<VT>());
auto data = image.exportData<nvcv::ImageDataStridedCuda>();
CVCUDA_CHECK_DATA(data);
long2 strides{data->plane(0).rowStride, sizeof(VT)};
long2 shape{data->plane(0).height, data->plane(0).width};
long bufSize{strides.x * shape.x};
std::vector<uint8_t> imageBuffer(bufSize);
FillBuffer<VT>(imageBuffer, shape, strides, RandomValuesWithoutMinMax<VT>());
RandomMinMax<VT>(imageBuffer, long3{1, shape.x, shape.y}, long3{bufSize, strides.x, strides.y}, locations);
CUDA_CHECK_ERROR(cudaMemcpy2D(data->plane(0).basePtr, strides.x, imageBuffer.data(), strides.x, strides.x,
data->plane(0).height, cudaMemcpyHostToDevice));
imageBatch.pushBack(image);
}
}
} // namespace benchutils
#endif // CVCUDA_BENCH_MINMAXLOC_HPP