forked from kokkos/kokkos
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Kokkos_HIP.cpp
184 lines (146 loc) · 5.99 KB
/
Kokkos_HIP.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
#define KOKKOS_IMPL_PUBLIC_INCLUDE
#endif
#include <Kokkos_Core.hpp>
#include <HIP/Kokkos_HIP.hpp>
#include <HIP/Kokkos_HIP_Instance.hpp>
#include <impl/Kokkos_DeviceManagement.hpp>
#include <impl/Kokkos_ExecSpaceManager.hpp>
#include <hip/hip_runtime_api.h>
namespace Kokkos {
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
int HIP::concurrency() {
#else
int HIP::concurrency() const {
#endif
return Impl::HIPInternal::concurrency();
}
int HIP::impl_is_initialized() {
return Impl::HIPInternal::singleton().is_initialized();
}
void HIP::impl_initialize(InitializationSettings const& settings) {
const int hip_device_id = Impl::get_gpu(settings);
Impl::HIPInternal::m_hipDev = hip_device_id;
KOKKOS_IMPL_HIP_SAFE_CALL(
hipGetDeviceProperties(&Impl::HIPInternal::m_deviceProp, hip_device_id));
const auto& hipProp = Impl::HIPInternal::m_deviceProp;
KOKKOS_IMPL_HIP_SAFE_CALL(hipSetDevice(hip_device_id));
// number of multiprocessors
Impl::HIPInternal::m_multiProcCount = hipProp.multiProcessorCount;
//----------------------------------
// Maximum number of warps,
// at most one warp per thread in a warp for reduction.
Impl::HIPInternal::m_maxWarpCount =
hipProp.maxThreadsPerBlock / Impl::HIPTraits::WarpSize;
if (Impl::HIPTraits::WarpSize < Impl::HIPInternal::m_maxWarpCount) {
Impl::HIPInternal::m_maxWarpCount = Impl::HIPTraits::WarpSize;
}
//----------------------------------
// Maximum number of blocks
Impl::HIPInternal::m_maxBlock[0] = hipProp.maxGridSize[0];
Impl::HIPInternal::m_maxBlock[1] = hipProp.maxGridSize[1];
Impl::HIPInternal::m_maxBlock[2] = hipProp.maxGridSize[2];
// theoretically, we can get 40 WF's / CU, but only can sustain 32 see
// https://github.com/ROCm-Developer-Tools/HIP/blob/a0b5dfd625d99af7e288629747b40dd057183173/vdi/hip_platform.cpp#L742
Impl::HIPInternal::m_maxWavesPerCU = 32;
Impl::HIPInternal::m_shmemPerSM = hipProp.maxSharedMemoryPerMultiProcessor;
Impl::HIPInternal::m_maxShmemPerBlock = hipProp.sharedMemPerBlock;
Impl::HIPInternal::m_maxThreadsPerSM =
Impl::HIPInternal::m_maxWavesPerCU * Impl::HIPTraits::WarpSize;
// Init the array for used for arbitrarily sized atomics
desul::Impl::init_lock_arrays(); // FIXME
// Allocate a staging buffer for constant mem in pinned host memory
// and an event to avoid overwriting driver for previous kernel launches
KOKKOS_IMPL_HIP_SAFE_CALL(
hipHostMalloc((void**)&Impl::HIPInternal::constantMemHostStaging,
Impl::HIPTraits::ConstantMemoryUsage));
KOKKOS_IMPL_HIP_SAFE_CALL(
hipEventCreate(&Impl::HIPInternal::constantMemReusable));
hipStream_t singleton_stream;
KOKKOS_IMPL_HIP_SAFE_CALL(hipStreamCreate(&singleton_stream));
Impl::HIPInternal::singleton().initialize(singleton_stream);
}
void HIP::impl_finalize() {
(void)Impl::hip_global_unique_token_locks(true);
desul::Impl::finalize_lock_arrays(); // FIXME
KOKKOS_IMPL_HIP_SAFE_CALL(
hipEventDestroy(Impl::HIPInternal::constantMemReusable));
KOKKOS_IMPL_HIP_SAFE_CALL(
hipHostFree(Impl::HIPInternal::constantMemHostStaging));
Impl::HIPInternal::singleton().finalize();
KOKKOS_IMPL_HIP_SAFE_CALL(
hipStreamDestroy(Impl::HIPInternal::singleton().m_stream));
}
HIP::HIP()
: m_space_instance(&Impl::HIPInternal::singleton(),
[](Impl::HIPInternal*) {}) {
Impl::HIPInternal::singleton().verify_is_initialized(
"HIP instance constructor");
}
HIP::HIP(hipStream_t const stream, Impl::ManageStream manage_stream)
: m_space_instance(
new Impl::HIPInternal, [manage_stream](Impl::HIPInternal* ptr) {
ptr->finalize();
if (static_cast<bool>(manage_stream)) {
KOKKOS_IMPL_HIP_SAFE_CALL(hipStreamDestroy(ptr->m_stream));
}
delete ptr;
}) {
Impl::HIPInternal::singleton().verify_is_initialized(
"HIP instance constructor");
m_space_instance->initialize(stream);
}
KOKKOS_DEPRECATED HIP::HIP(hipStream_t const stream, bool manage_stream)
: HIP(stream,
manage_stream ? Impl::ManageStream::yes : Impl::ManageStream::no) {}
void HIP::print_configuration(std::ostream& os, bool /*verbose*/) const {
os << "Device Execution Space:\n";
os << " KOKKOS_ENABLE_HIP: yes\n";
os << "HIP Options:\n";
os << " KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE: ";
#ifdef KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
os << "yes\n";
#else
os << "no\n";
#endif
os << "\nRuntime Configuration:\n";
m_space_instance->print_configuration(os);
}
uint32_t HIP::impl_instance_id() const noexcept {
return m_space_instance->impl_get_instance_id();
}
void HIP::impl_static_fence(const std::string& name) {
Kokkos::Tools::Experimental::Impl::profile_fence_event<HIP>(
name,
Kokkos::Tools::Experimental::SpecialSynchronizationCases::
GlobalDeviceSynchronization,
[&]() { KOKKOS_IMPL_HIP_SAFE_CALL(hipDeviceSynchronize()); });
}
void HIP::fence(const std::string& name) const {
m_space_instance->fence(name);
}
hipStream_t HIP::hip_stream() const { return m_space_instance->m_stream; }
int HIP::hip_device() const { return impl_internal_space_instance()->m_hipDev; }
hipDeviceProp_t const& HIP::hip_device_prop() {
return Impl::HIPInternal::singleton().m_deviceProp;
}
const char* HIP::name() { return "HIP"; }
namespace Impl {
int g_hip_space_factory_initialized = initialize_space_factory<HIP>("150_HIP");
} // namespace Impl
} // namespace Kokkos