From 510e13675e73fd6312d1a276b95a6af3b4502d81 Mon Sep 17 00:00:00 2001 From: Hangchen Yu Date: Sun, 24 Jan 2021 23:14:23 +0000 Subject: [PATCH 01/12] Enable subclass to access base members --- worker/include/manager_service.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/worker/include/manager_service.h b/worker/include/manager_service.h index 35a3fa96..87ae77a1 100644 --- a/worker/include/manager_service.h +++ b/worker/include/manager_service.h @@ -27,7 +27,14 @@ class ManagerServiceServerBase { void HandleAccept(std::unique_ptr socket, std::unique_ptr endpoint); + boost::asio::io_service io_service_; + std::unique_ptr acceptor_; + std::unique_ptr socket_; + std::unique_ptr endpoint_; + virtual ava_proto::WorkerAssignReply HandleRequest(const ava_proto::WorkerAssignRequest& request); + +protected: virtual pid_t SpawnWorker(const std::vector& environments, const std::vector& parameters); @@ -37,11 +44,6 @@ class ManagerServiceServerBase { std::atomic worker_id_; std::map> worker_monitor_map_; - - boost::asio::io_service io_service_; - std::unique_ptr acceptor_; - std::unique_ptr socket_; - std::unique_ptr endpoint_; }; } // namespace ava_manager From ce6863c75476b4492fcf181ed33ded880bc38296 Mon Sep 17 00:00:00 2001 From: Hangchen Yu Date: Sun, 24 Jan 2021 23:15:31 +0000 Subject: [PATCH 02/12] Port from r1.0 TCP manager --- CMakeLists.txt | 3 ++ worker/CMakeLists.txt | 4 ++ worker/legacy/CMakeLists.txt | 24 +++++++++ worker/legacy/manager.cpp | 96 ++++++++++++++++++++++++++++++++++++ 4 files changed, 127 insertions(+) create mode 100644 worker/legacy/CMakeLists.txt create mode 100644 worker/legacy/manager.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b6be5279..1bd43d8e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,7 @@ set(AVA_GEN_DEMO_SPEC OFF CACHE BOOL "Enable demo specification") set(AVA_MANAGER_GALVANIC OFF CACHE BOOL "Build Galvanic manager") set(AVA_MANAGER_KATANA OFF CACHE BOOL "Build Katana manager") +set(AVA_MANAGER_LEGACY OFF CACHE BOOL "Build legacy (r1.0) manager") set(AVA_MANAGER_DEMO OFF CACHE BOOL "Build demo manager") message(STATUS "Build TensorFlow CUDA specification: ${AVA_GEN_TF_SPEC}") @@ -58,6 +59,7 @@ message(STATUS "Build test (libtrivial) specification: ${AVA_GEN_TEST_SPEC}") message(STATUS "Build demo specification: ${AVA_GEN_DEMO_SPEC}") message(STATUS "Build Galvanic manager: ${AVA_MANAGER_GALVANIC}") message(STATUS "Build Katana manager: ${AVA_MANAGER_KATANA}") +message(STATUS "Build legacy (r1.0) manager: ${AVA_MANAGER_LEGACY}") message(STATUS "Build demo manager: ${AVA_MANAGER_DEMO}") set(AVA_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/install" CACHE PATH "Installation path of AvA") @@ -136,6 +138,7 @@ ExternalProject_Add(ava-manager CMAKE_CACHE_ARGS -DAVA_MANAGER_GALVANIC:BOOL=${AVA_MANAGER_GALVANIC} -DAVA_MANAGER_KATANA:BOOL=${AVA_MANAGER_KATANA} + -DAVA_MANAGER_LEGACY:BOOL=${AVA_MANAGER_LEGACY} -DAVA_MANAGER_DEMO:BOOL=${AVA_MANAGER_DEMO} -DCMAKE_TOOLCHAIN_FILE:STRING=${CMAKE_TOOLCHAIN_FILE} -DCMAKE_INSTALL_PREFIX:PATH=${AVA_INSTALL_DIR} diff --git a/worker/CMakeLists.txt b/worker/CMakeLists.txt index 96a62142..cff28676 100644 --- a/worker/CMakeLists.txt +++ b/worker/CMakeLists.txt @@ -32,6 +32,10 @@ if(AVA_MANAGER_KATANA) add_subdirectory(katana) endif() +if(AVA_MANAGER_LEGACY) + add_subdirectory(legacy) +endif() + if(AVA_MANAGER_DEMO) add_subdirectory(demo) endif() diff --git a/worker/legacy/CMakeLists.txt b/worker/legacy/CMakeLists.txt new file mode 100644 index 00000000..24a91a21 --- /dev/null +++ b/worker/legacy/CMakeLists.txt @@ -0,0 +1,24 @@ +cmake_minimum_required(VERSION 3.13) + +project(legacy-manager C CXX CUDA) + +set(CMAKE_CXX_STANDARD 17) + +add_compile_options(-Wall -Wextra -pedantic -Wno-write-strings -g) + +add_executable(legacy-manager + manager.cpp + ${manager_service_pb_srcs} + ${manager_service_ava_srcs} +) +target_link_libraries(legacy-manager + ${manager_service_libraries} +) +set_target_properties(legacy-manager + PROPERTIES OUTPUT_NAME manager +) + +install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/manager + TYPE BIN + RENAME legacy_manager +) diff --git a/worker/legacy/manager.cpp b/worker/legacy/manager.cpp new file mode 100644 index 00000000..1f41917d --- /dev/null +++ b/worker/legacy/manager.cpp @@ -0,0 +1,96 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "manager_service.h" +#include "manager_service.pb.h" + +using ava_manager::ManagerServiceServerBase; + +const uint32_t kDefaultManagerPort = 3333; +const uint32_t kDefaultWorkerPortBase = 4000; +const bool kDefaultWorkerPoolEnabled = true; +const uint32_t kDefaultWorkerPoolSize = 3; + +class DemoManager : public ManagerServiceServerBase { +public: + DemoManager(uint32_t port, uint32_t worker_port_base, std::string worker_path) : + ManagerServiceServerBase(port, worker_port_base, worker_path) { + // Spawn worker pool with default environment variables + if (kDefaultWorkerPoolEnabled) { + for (uint32_t i = 0; i < kDefaultWorkerPoolSize; i++) { + auto worker_address = SpawnWorkerWrapper(); + worker_pool_.push(worker_address); + } + } + } + +private: + uint32_t SpawnWorkerWrapper() { + // Let API server use TCP channel + std::vector environments; + environments.push_back("AVA_CHANNEL=TCP"); + + // Pass only port to API server + auto port = worker_port_base_ + + worker_id_.fetch_add(1, std::memory_order_relaxed); + std::vector parameters; + parameters.push_back(std::to_string(port)); + + std::cerr << "Spawn API server at 0.0.0.0:" << port << "(cmdline=\\\\" + << boost::algorithm::join(environments, " ") << " " << worker_path_ << " " + << boost::algorithm::join(parameters, " ") << "\\\\)" << std::endl; + + auto child_pid = SpawnWorker(environments, parameters); + + auto child_monitor = std::make_shared( + [](pid_t child_pid, + uint32_t port, + std::map> *worker_monitor_map) { + pid_t ret = waitpid(child_pid, NULL, 0); + std::cerr << "[pid=" << child_pid << "] API server at ::" << port + << " has exit (waitpid=" << ret << ")" << std::endl; + worker_monitor_map->erase(port); + }, + child_pid, port, &worker_monitor_map_); + child_monitor->detach(); + worker_monitor_map_.insert({port, child_monitor}); + + return port; + } + + ava_proto::WorkerAssignReply HandleRequest( + const ava_proto::WorkerAssignRequest& request) { + ava_proto::WorkerAssignReply reply; + uint32_t worker_port; + + if (worker_pool_.pop(worker_port)) { + worker_pool_.push(SpawnWorkerWrapper()); + } + else { + worker_port = SpawnWorkerWrapper(); + } + reply.add_worker_address("0.0.0.0:" + std::to_string(worker_port)); + + return reply; + } + + boost::lockfree::queue> worker_pool_; +}; + +int main(int argc, char* argv[]) { + if (argc <= 1) { + fprintf(stderr, "Usage: %s \n" + "Example: %s generated/cudadrv_nw/worker\n", + argv[0], argv[0]); + exit(0); + } + ava_manager::setupSignalHandlers(); + DemoManager manager(kDefaultManagerPort, kDefaultWorkerPortBase, argv[1]); + manager.RunServer(); + return 0; +} From 531444603463ea92ad4c2d84a6ce08c2c4a30994 Mon Sep 17 00:00:00 2001 From: Hangchen Yu Date: Mon, 25 Jan 2021 06:07:09 +0000 Subject: [PATCH 03/12] Fix protobuf dependency --- cava/nightwatch/generator/c/cmakelists.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cava/nightwatch/generator/c/cmakelists.py b/cava/nightwatch/generator/c/cmakelists.py index f6cd3638..782888d0 100644 --- a/cava/nightwatch/generator/c/cmakelists.py +++ b/cava/nightwatch/generator/c/cmakelists.py @@ -37,7 +37,8 @@ def source(api: API, errors): find_package(Boost REQUIRED COMPONENTS system) find_library(Config++ NAMES libconfig++ config++ REQUIRED) -###### Set generated files ###### +set(protobuf_MODULE_COMPATIBLE TRUE) +find_package(Protobuf REQUIRED QUIET) ###### Compile ###### @@ -95,12 +96,14 @@ def source(api: API, errors): ${{CMAKE_SOURCE_DIR}}/../../common/cmd_channel_socket_utilities.cpp ${{CMAKE_SOURCE_DIR}}/../../common/cmd_channel_socket_tcp.cpp ${{CMAKE_SOURCE_DIR}}/../../common/cmd_channel_socket_vsock.cpp + ${{CMAKE_BINARY_DIR}}/../../proto/manager_service.pb.cc ) target_link_libraries(guestlib ${{GLIB2_LIBRARIES}} ${{Boost_LIBRARIES}} Threads::Threads ${{Config++}} + ${{Protobuf_LIBRARIES}} ) target_compile_options(guestlib PUBLIC -fvisibility=hidden From c33e8855a7a353805778c193b694040c177576db Mon Sep 17 00:00:00 2001 From: Hangchen Yu Date: Mon, 25 Jan 2021 06:07:38 +0000 Subject: [PATCH 04/12] Link to proper CUDA libraries --- cava/samples/tensorflow/CMakeLists.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cava/samples/tensorflow/CMakeLists.txt b/cava/samples/tensorflow/CMakeLists.txt index ac6f15b7..9df81cb3 100644 --- a/cava/samples/tensorflow/CMakeLists.txt +++ b/cava/samples/tensorflow/CMakeLists.txt @@ -32,8 +32,12 @@ ExternalProject_Add_Step(cava tf_dump-link mkdir -p ${tf_dump_install}/lib && ln -f -s ${CMAKE_CURRENT_SOURCE_DIR}/headers ${CMAKE_CURRENT_BINARY_DIR}/headers && ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcuda.so.1 && + ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcuda.so && ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcublas.so.10 && ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcublasLt.so.10 && + ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcudart.so.10.0 && + ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcudart.so.10.1 && + ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcudart.so.10.2 && ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcudart.so.10 && ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcudnn.so.7 && ln -f -s ${tf_dump_install}/lib/libguestlib.so ${tf_dump_install}/lib/libcufft.so.10 && @@ -75,11 +79,17 @@ ExternalProject_Add_Step(cava tf_opt-link mkdir -p ${tf_opt_install}/lib && ln -f -s ${CMAKE_CURRENT_SOURCE_DIR}/headers ${CMAKE_CURRENT_BINARY_DIR}/headers && ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcuda.so.1 && + ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcuda.so && ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcublas.so.10 && ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcublasLt.so.10 && + ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcudart.so.10.0 && + ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcudart.so.10.1 && + ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcudart.so.10.2 && ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcudart.so.10 && + ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcudart.so && ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcudnn.so.7 && ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcufft.so.10 && + ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcufft.so.10 && ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcurand.so.10 && ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcusolver.so.10 && ln -f -s ${tf_opt_install}/lib/libguestlib.so ${tf_opt_install}/lib/libcusparse.so.10 From 9b71ab437dab49dcd599cddc50640accce2b484a Mon Sep 17 00:00:00 2001 From: Hangchen Yu Date: Mon, 25 Jan 2021 08:15:22 +0000 Subject: [PATCH 05/12] Import a serializer library --- include/import/serializer.h | 2297 +++++++++++++++++++++++++++++++++++ 1 file changed, 2297 insertions(+) create mode 100644 include/import/serializer.h diff --git a/include/import/serializer.h b/include/import/serializer.h new file mode 100644 index 00000000..9f4cb93c --- /dev/null +++ b/include/import/serializer.h @@ -0,0 +1,2297 @@ +#ifndef __zpp_serializer_h__ +#define __zpp_serializer_h__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if __cplusplus >= 201703L +#include +#include +#endif + +namespace zpp +{ +/** + * Supports serialization of objects and polymorphic objects. + * Example of non polymorphic serialization: + * ~~~ + * class point + * { + * public: + * point() = default; + * point(int x, int y) noexcept : + * m_x(x), + * m_y(y) + * { + * } + * + * friend zpp::serializer::access; + * template + * static void serialize(Archive & archive, Self & self) + * { + * archive(self.m_x, self.m_y); + * } + * + * int get_x() const noexcept + * { + * return m_x; + * } + * + * int get_y() const noexcept + * { + * return m_y; + * } + * + * private: + * int m_x = 0; + * int m_y = 0; + * }; + * + * static void foo() + * { + * std::vector data; + * zpp::serializer::memory_input_archive in(data); + * zpp::serializer::memory_output_archive out(data); + * + * out(point(1337, 1338)); + * + * point my_point; + * in(my_point); + * + * std::cout << my_point.get_x() << ' ' << my_point.get_y() << '\n'; + * } + * ~~~ + * + * Example of polymorphic serialization: + * ~~~ + * class person : public zpp::serializer::polymorphic + * { + * public: + * person() = default; + * explicit person(std::string name) noexcept : + * m_name(std::move(name)) + * { + * } + * + * friend zpp::serializer::access; + * template + * static void serialize(Archive & archive, Self & self) + * { + * archive(self.m_name); + * } + * + * const std::string & get_name() const noexcept + * { + * return m_name; + * } + * + * virtual void print() const + * { + * std::cout << "person: " << m_name; + * } + * + * private: + * std::string m_name; + * }; + * + * class student : public person + * { + * public: + * student() = default; + * student(std::string name, std::string university) noexcept : + * person(std::move(name)), + * m_university(std::move(university)) + * { + * } + * + * friend zpp::serializer::access; + * template + * static void serialize(Archive & archive, Self & self) + * { + * person::serialize(archive, self); + * archive(self.m_university); + * } + * + * virtual void print() const + * { + * std::cout << "student: " << person::get_name() << ' ' << + * m_university << '\n'; + * } + * + * private: + * std::string m_university; + * }; + * + * namespace + * { + * zpp::serializer::register_types< + * zpp::serializer::make_type, + * zpp::serializer::make_type > _; } // + * + * static void foo() + * { + * std::vector data; + * zpp::serializer::memory_input_archive in(data); + * zpp::serializer::memory_output_archive out(data); + * + * std::unique_ptr my_person = + * std::make_unique("1337", "1337University"); out(my_person); + * + * my_person = nullptr; + * in(my_person); + * + * my_person->print(); + * } + * + * static void bar() + * { + * std::vector data; + * zpp::serializer::memory_input_archive in(data); + * zpp::serializer::memory_output_archive out(data); + * + * out(zpp::serializer::as_polymorphic(student("1337", + * "1337University"))); + * + * std::unique_ptr my_person; + * in(my_person); + * + * my_person->print(); + * } + * ~~~ + */ +namespace serializer +{ +namespace detail +{ +/** + * Map any sequence of types to void. + */ +template +using void_t = void; + +/** + * Tests if all conditions are true, empty means true. + * Example: + * ~~~ + * all_of::value == false + * all_of::value == true + * all_of::value == false + * all_of<>::value == true + * ~~~ + */ +template +struct all_of : std::true_type +{ +}; + +template +struct all_of : std::false_type +{ +}; + +template +struct all_of : all_of +{ +}; + +template <> +struct all_of : std::true_type +{ +}; + +/** + * Remove const of container value_type + */ +template +struct container_nonconst_value_type +{ + using type = std::remove_const_t; +}; + +/** + * Same as above, except in case of std::map and std::unordered_map, and + * similar, we also need to remove the const of the key type. + */ +template