Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port legacy manager #61

Merged
merged 12 commits into from
Jan 25, 2021
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ set(AVA_GEN_DEMO_SPEC OFF CACHE BOOL "Enable demo specification")

set(AVA_MANAGER_GALVANIC OFF CACHE BOOL "Build Galvanic manager")
set(AVA_MANAGER_KATANA OFF CACHE BOOL "Build Katana manager")
set(AVA_MANAGER_LEGACY OFF CACHE BOOL "Build legacy (r1.0) manager")
set(AVA_MANAGER_DEMO OFF CACHE BOOL "Build demo manager")

message(STATUS "Build TensorFlow CUDA specification: ${AVA_GEN_TF_SPEC}")
Expand All @@ -58,6 +59,7 @@ message(STATUS "Build test (libtrivial) specification: ${AVA_GEN_TEST_SPEC}")
message(STATUS "Build demo specification: ${AVA_GEN_DEMO_SPEC}")
message(STATUS "Build Galvanic manager: ${AVA_MANAGER_GALVANIC}")
message(STATUS "Build Katana manager: ${AVA_MANAGER_KATANA}")
message(STATUS "Build legacy (r1.0) manager: ${AVA_MANAGER_LEGACY}")
message(STATUS "Build demo manager: ${AVA_MANAGER_DEMO}")

set(AVA_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/install" CACHE PATH "Installation path of AvA")
Expand Down Expand Up @@ -136,6 +138,7 @@ ExternalProject_Add(ava-manager
CMAKE_CACHE_ARGS
-DAVA_MANAGER_GALVANIC:BOOL=${AVA_MANAGER_GALVANIC}
-DAVA_MANAGER_KATANA:BOOL=${AVA_MANAGER_KATANA}
-DAVA_MANAGER_LEGACY:BOOL=${AVA_MANAGER_LEGACY}
-DAVA_MANAGER_DEMO:BOOL=${AVA_MANAGER_DEMO}
-DCMAKE_TOOLCHAIN_FILE:STRING=${CMAKE_TOOLCHAIN_FILE}
-DCMAKE_INSTALL_PREFIX:PATH=${AVA_INSTALL_DIR}
Expand Down
7 changes: 5 additions & 2 deletions cava/nightwatch/generator/c/cmakelists.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def source(api: API, errors):

list(APPEND CMAKE_MODULE_PATH "${{CMAKE_CURRENT_BINARY_DIR}}/../..")

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD 14)

set(c_flags {api.cflags})
set(cxx_flags {api.cxxflags})
Expand All @@ -37,7 +37,8 @@ def source(api: API, errors):
find_package(Boost REQUIRED COMPONENTS system)
find_library(Config++ NAMES libconfig++ config++ REQUIRED)

###### Set generated files ######
set(protobuf_MODULE_COMPATIBLE TRUE)
find_package(Protobuf REQUIRED QUIET)

###### Compile ######

Expand Down Expand Up @@ -95,12 +96,14 @@ def source(api: API, errors):
${{CMAKE_SOURCE_DIR}}/../../common/cmd_channel_socket_utilities.cpp
${{CMAKE_SOURCE_DIR}}/../../common/cmd_channel_socket_tcp.cpp
${{CMAKE_SOURCE_DIR}}/../../common/cmd_channel_socket_vsock.cpp
${{CMAKE_SOURCE_DIR}}/../../proto/manager_service.proto.cpp
)
target_link_libraries(guestlib
${{GLIB2_LIBRARIES}}
${{Boost_LIBRARIES}}
Threads::Threads
${{Config++}}
${{Protobuf_LIBRARIES}}
)
target_compile_options(guestlib
PUBLIC -fvisibility=hidden
Expand Down
31 changes: 27 additions & 4 deletions cava/samples/onnxruntime/onnx_dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -7557,8 +7557,13 @@ cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle,
cudnnConvolutionBwdDataAlgo_t algo,
size_t *sizeInBytes)
{
fprintf(stderr, "%s is not implemented\n", __func__);
abort();
ava_argument(wDesc) ava_handle;
ava_argument(dyDesc) ava_handle;
ava_argument(convDesc) ava_handle;
ava_argument(dxDesc) ava_handle;
ava_argument(sizeInBytes) {
ava_out; ava_buffer(1);
}
}

cudnnStatus_t CUDNNWINAPI
Expand All @@ -7576,8 +7581,26 @@ cudnnConvolutionBackwardData(cudnnHandle_t handle,
const cudnnTensorDescriptor_t dxDesc,
void *dx)
{
fprintf(stderr, "%s is not implemented\n", __func__);
abort();
ava_async;
ava_argument(handle) ava_handle;
ava_argument(alpha) {
ava_type_cast(const double *);
ava_in; ava_buffer(1);
}
ava_argument(beta) {
ava_type_cast(const double *);
ava_in; ava_buffer(1);
}
ava_argument(dx) ava_opaque;
ava_argument(dxDesc) ava_handle;
ava_argument(wDesc) ava_handle;
ava_argument(w) ava_opaque;
ava_argument(dyDesc) ava_handle;
ava_argument(dy) ava_opaque;
ava_argument(convDesc) ava_handle;
ava_argument(workSpace) {
ava_in; ava_buffer(workSpaceSizeInBytes);
}
}

cudnnStatus_t CUDNNWINAPI
Expand Down
31 changes: 27 additions & 4 deletions cava/samples/onnxruntime/onnx_opt.c
Original file line number Diff line number Diff line change
Expand Up @@ -8169,8 +8169,13 @@ cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle,
cudnnConvolutionBwdDataAlgo_t algo,
size_t *sizeInBytes)
{
fprintf(stderr, "%s is not implemented\n", __func__);
abort();
ava_argument(wDesc) ava_handle;
ava_argument(dyDesc) ava_handle;
ava_argument(convDesc) ava_handle;
ava_argument(dxDesc) ava_handle;
ava_argument(sizeInBytes) {
ava_out; ava_buffer(1);
}
}

cudnnStatus_t CUDNNWINAPI
Expand All @@ -8188,8 +8193,26 @@ cudnnConvolutionBackwardData(cudnnHandle_t handle,
const cudnnTensorDescriptor_t dxDesc,
void *dx)
{
fprintf(stderr, "%s is not implemented\n", __func__);
abort();
ava_async;
ava_argument(handle) ava_handle;
ava_argument(alpha) {
ava_type_cast(const double *);
ava_in; ava_buffer(1);
}
ava_argument(beta) {
ava_type_cast(const double *);
ava_in; ava_buffer(1);
}
ava_argument(dx) ava_opaque;
ava_argument(dxDesc) ava_handle;
ava_argument(wDesc) ava_handle;
ava_argument(w) ava_opaque;
ava_argument(dyDesc) ava_handle;
ava_argument(dy) ava_opaque;
ava_argument(convDesc) ava_handle;
ava_argument(workSpace) {
ava_in; ava_buffer(workSpaceSizeInBytes);
}
}

cudnnStatus_t CUDNNWINAPI
Expand Down
10 changes: 10 additions & 0 deletions cava/samples/tensorflow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,12 @@ ExternalProject_Add_Step(cava tf_dump-link
mkdir -p ${tf_dump_install}/lib &&
ln -f -s ${CMAKE_CURRENT_SOURCE_DIR}/headers ${CMAKE_CURRENT_BINARY_DIR}/headers &&
ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcuda.so.1 &&
ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcuda.so &&
ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcublas.so.10 &&
ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcublasLt.so.10 &&
ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcudart.so.10.0 &&
ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcudart.so.10.1 &&
ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcudart.so.10.2 &&
ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcudart.so.10 &&
ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcudnn.so.7 &&
ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcufft.so.10 &&
Expand Down Expand Up @@ -75,11 +79,17 @@ ExternalProject_Add_Step(cava tf_opt-link
mkdir -p ${tf_opt_install}/lib &&
ln -f -s ${CMAKE_CURRENT_SOURCE_DIR}/headers ${CMAKE_CURRENT_BINARY_DIR}/headers &&
ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcuda.so.1 &&
ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcuda.so &&
ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcublas.so.10 &&
ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcublasLt.so.10 &&
ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcudart.so.10.0 &&
ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcudart.so.10.1 &&
ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcudart.so.10.2 &&
ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcudart.so.10 &&
ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcudart.so &&
ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcudnn.so.7 &&
ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcufft.so.10 &&
ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcufft.so.10 &&
ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcurand.so.10 &&
ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcusolver.so.10 &&
ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcusparse.so.10
Expand Down
42 changes: 42 additions & 0 deletions cava/samples/tensorflow/README.md
Original file line number Diff line number Diff line change
@@ -1 +1,43 @@
Install TensorFlow
==================

Install TensorFlow 1.14 from [yuhc/tensorflow-cudart-dynam](https://github.com/yuhc/tensorflow-cudart-dynam).
Check out [BUILD.md](https://github.com/yuhc/tensorflow-cudart-dynam/blob/r1.14/BUILD.md) for build
instructions.

Build specification
===================

Configure AvA in `~/ava-build` with

```shell
cmake -DAVA_GEN_TF_SPEC=ON -DAVA_MANAGER_LEGACY=ON ../ava
make
```

Run benchmark
=============

Dump CUDA binaries for the benchmark (Generally speaking, in most cases, CUDA
binaries generated from a large TensorFlow benchmark can be used for most other
benchmarks):

```shell
./install/bin/legacy_manager install/tf_dump/bin/worker
LD_LIBRARY_PATH=~/ava-build/install/tf_dump/lib/ python3 your_tensorflow_benchmark.py
sudo mkdir /cuda_dumps
sudo cp /tmp/*.ava /cuda_dumps
```

> CUDA version: search `10.1` in `tf_dump.c` and `tf_opt.c`.
> Dump path: search `cuda_dumps` in `tf_opt.c`.

The CUDA dumps should be copied to both local (where benchmarks run) and remote
(where API servers run) servers.

Finally restart AvA manager and run the benchmark:

```shell
./install/bin/legacy_manager install/tf_opt/bin/worker
LD_LIBRARY_PATH=~/ava-build/install/tf_opt/lib/ python3 your_tensorflow_benchmark.py
```
31 changes: 27 additions & 4 deletions cava/samples/tensorflow/tf_dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -7565,8 +7565,13 @@ cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle,
cudnnConvolutionBwdDataAlgo_t algo,
size_t *sizeInBytes)
{
fprintf(stderr, "%s is not implemented\n", __func__);
abort();
ava_argument(wDesc) ava_handle;
ava_argument(dyDesc) ava_handle;
ava_argument(convDesc) ava_handle;
ava_argument(dxDesc) ava_handle;
ava_argument(sizeInBytes) {
ava_out; ava_buffer(1);
}
}

cudnnStatus_t CUDNNWINAPI
Expand All @@ -7584,8 +7589,26 @@ cudnnConvolutionBackwardData(cudnnHandle_t handle,
const cudnnTensorDescriptor_t dxDesc,
void *dx)
{
fprintf(stderr, "%s is not implemented\n", __func__);
abort();
ava_async;
ava_argument(handle) ava_handle;
ava_argument(alpha) {
ava_type_cast(const double *);
ava_in; ava_buffer(1);
}
ava_argument(beta) {
ava_type_cast(const double *);
ava_in; ava_buffer(1);
}
ava_argument(dx) ava_opaque;
ava_argument(dxDesc) ava_handle;
ava_argument(wDesc) ava_handle;
ava_argument(w) ava_opaque;
ava_argument(dyDesc) ava_handle;
ava_argument(dy) ava_opaque;
ava_argument(convDesc) ava_handle;
ava_argument(workSpace) {
ava_in; ava_buffer(workSpaceSizeInBytes);
}
}

cudnnStatus_t CUDNNWINAPI
Expand Down
33 changes: 28 additions & 5 deletions cava/samples/tensorflow/tf_opt.c
Original file line number Diff line number Diff line change
Expand Up @@ -1168,7 +1168,7 @@ cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args,
size_t sharedMem, cudaStream_t stream)
{
/* Cannot be ava_async, may lead to TensorFlow internal race condition */
ava_async;
// ava_async;
ava_disable_native_call;

ava_implicit_argument
Expand Down Expand Up @@ -8158,8 +8158,13 @@ cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle,
cudnnConvolutionBwdDataAlgo_t algo,
size_t *sizeInBytes)
{
fprintf(stderr, "%s is not implemented\n", __func__);
abort();
ava_argument(wDesc) ava_handle;
ava_argument(dyDesc) ava_handle;
ava_argument(convDesc) ava_handle;
ava_argument(dxDesc) ava_handle;
ava_argument(sizeInBytes) {
ava_out; ava_buffer(1);
}
}

cudnnStatus_t CUDNNWINAPI
Expand All @@ -8177,8 +8182,26 @@ cudnnConvolutionBackwardData(cudnnHandle_t handle,
const cudnnTensorDescriptor_t dxDesc,
void *dx)
{
fprintf(stderr, "%s is not implemented\n", __func__);
abort();
ava_async;
ava_argument(handle) ava_handle;
ava_argument(alpha) {
ava_type_cast(const double *);
ava_in; ava_buffer(1);
}
ava_argument(beta) {
ava_type_cast(const double *);
ava_in; ava_buffer(1);
}
ava_argument(dx) ava_opaque;
ava_argument(dxDesc) ava_handle;
ava_argument(wDesc) ava_handle;
ava_argument(w) ava_opaque;
ava_argument(dyDesc) ava_handle;
ava_argument(dy) ava_opaque;
ava_argument(convDesc) ava_handle;
ava_argument(workSpace) {
ava_in; ava_buffer(workSpaceSizeInBytes);
}
}

cudnnStatus_t CUDNNWINAPI
Expand Down
21 changes: 12 additions & 9 deletions common/cmd_channel_socket_tcp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include "common/cmd_handler.h"
#include "cmd_channel_socket_utilities.h"
#include "guest_config.h"
#include "manager_service.pb.h"
#include "manager_service.proto.h"

using boost::asio::ip::tcp;

Expand Down Expand Up @@ -53,22 +53,25 @@ std::vector<struct command_channel*> command_channel_socket_tcp_guest_new()

// Serialize configurations
ava_proto::WorkerAssignRequest request;
request.set_gpu_count(guestconfig::config->gpu_memory_.size());
request.gpu_count() = guestconfig::config->gpu_memory_.size();
for (auto m : guestconfig::config->gpu_memory_) {
request.add_gpu_mem(m << 20);
request.gpu_mem().push_back(m << 20);
}
std::string request_buf(request.SerializeAsString());
uint32_t request_length = static_cast<uint32_t>(request_buf.length() + 1);
std::vector<unsigned char> request_buf;
zpp::serializer::memory_output_archive out(request_buf);
out(request);
uint32_t request_length = static_cast<uint32_t>(request_buf.size());
boost::asio::write(manager_sock, boost::asio::buffer(&request_length, sizeof(uint32_t)));
boost::asio::write(manager_sock, boost::asio::buffer(request_buf.c_str(), request_length));
boost::asio::write(manager_sock, boost::asio::buffer(request_buf.data(), request_length));

// De-serialize API server addresses
uint32_t reply_length;
boost::asio::read(manager_sock, boost::asio::buffer(&reply_length, sizeof(uint32_t)));
char reply_str[reply_length];
boost::asio::read(manager_sock, boost::asio::buffer(reply_str, reply_length));
std::vector<unsigned char> reply_buf(reply_length);
zpp::serializer::memory_input_archive in(reply_buf);
boost::asio::read(manager_sock, boost::asio::buffer(reply_buf.data(), reply_length));
ava_proto::WorkerAssignReply reply;
reply.ParseFromString(reply_str);
in(reply);
std::vector<std::string> worker_address;
for (auto& wa : reply.worker_address()) {
worker_address.push_back(wa);
Expand Down
13 changes: 7 additions & 6 deletions docs/build_and_setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,13 @@ old codes into the new build system (v2.0) is still ongoing.

> \* Upstream changes ([ava-serverless](https://github.com/photoszzt/ava-serverless)) have not been merged.

| AvA manager | Status |
| ----------- | ------ |
| Demo | TESTED |
| Galvanic | NO |
| Katana | NO |
| Nvidia GPU | NO |
| AvA manager | Status |
| ------------- | ------ |
| Demo | NO |
| Galvanic | NO |
| Katana | NO |
| Legacy (r1.0) | TESTED |
| Nvidia GPU | NO |

This tutorial shows how to configure and build the minimal AvA demo API
remoting system. To build and use other supported virtualized APIs, please
Expand Down