diff --git a/.cirrus.yml b/.cirrus.yml index 2dc18b87..60cf04ad 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -1,5 +1,6 @@ +# The Community Cluster allows up to 8 CPUs and up to 24 GB of memory. cpus: &CPUS 8 -memory: &MEMORY 6GB +memory: &MEMORY 24GB config: &CONFIG --build-type=release --enable-static memcheck_config: &MEMCHECK_CONFIG --build-type=debug --sanitizers=address @@ -173,7 +174,7 @@ freebsd11_task: memcheck_task: container: # Just uses a recent/common distro to run memory error/leak checks. - dockerfile: ci/ubuntu-18.04/Dockerfile + dockerfile: ci/debian-9/Dockerfile cpu: 8 # AddressSanitizer uses more memory than normal config. memory: 8GB diff --git a/CMakeLists.txt b/CMakeLists.txt index 4e80aee9..626b3c22 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -284,6 +284,10 @@ endif() set(BROKER_SRC ${OPTIONAL_SRC} src/address.cc + src/alm/multipath.cc + src/alm/peer.cc + src/alm/routing_table.cc + src/alm/stream_transport.cc src/configuration.cc src/convert.cc src/core_actor.cc @@ -304,17 +308,21 @@ set(BROKER_SRC src/detail/master_actor.cc src/detail/master_resolver.cc src/detail/memory_backend.cc - src/detail/meta_command_writer.cc src/detail/meta_data_writer.cc + src/detail/monotonic_buffer_resource.cc src/detail/network_cache.cc + src/detail/peer_handshake.cc src/detail/prefix_matcher.cc src/detail/sqlite_backend.cc src/detail/store_actor.cc src/detail/unipath_manager.cc + src/domain_options.cc src/endpoint.cc src/endpoint_info.cc + src/entity_id.cc src/error.cc src/filter_type.cc + src/gateway.cc src/internal_command.cc src/mailbox.cc src/message.cc @@ -322,7 +330,7 @@ set(BROKER_SRC src/peer_status.cc src/port.cc src/publisher.cc - src/publisher_id.cc + src/shutdown_options.cc src/status.cc src/status_subscriber.cc src/store.cc @@ -369,8 +377,9 @@ macro(add_tool name) endmacro() if (NOT BROKER_DISABLE_TOOLS) - add_tool(broker-pipe) + add_tool(broker-gateway) add_tool(broker-node) + add_tool(broker-pipe) endif () # -- Bindings ----------------------------------------------------------------- diff --git a/bindings/python/_broker.cpp b/bindings/python/_broker.cpp index 69bf17e3..bc791147 100644 --- a/bindings/python/_broker.cpp +++ b/bindings/python/_broker.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #pragma GCC diagnostic push @@ -51,6 +52,18 @@ PYBIND11_MAKE_OPAQUE(broker::set) PYBIND11_MAKE_OPAQUE(broker::table) PYBIND11_MAKE_OPAQUE(broker::vector) +namespace { + +broker::endpoint_id node_from_str(const std::string& node_str) { + caf::node_id node; + if (auto err = caf::parse(node_str, node)) + throw std::invalid_argument( + "endpoint::await_peer called with invalid endpoint ID"); + return node; +} + +} // namespace + PYBIND11_MODULE(_broker, m) { m.doc() = "Broker python bindings"; py::module mb = m.def_submodule("zeek", "Zeek-specific bindings"); @@ -251,8 +264,7 @@ PYBIND11_MODULE(_broker, m) { py::class_(m, "BrokerOptions") .def(py::init<>()) .def_readwrite("disable_ssl", &broker::broker_options::disable_ssl) - .def_readwrite("ttl", &broker::broker_options::ttl) - .def_readwrite("forward", &broker::broker_options::forward) + .def_readwrite("disable_forwarding", &broker::broker_options::disable_forwarding) .def_readwrite("ignore_broker_conf", &broker::broker_options::ignore_broker_conf) .def_readwrite("use_real_time", &broker::broker_options::use_real_time); @@ -336,6 +348,14 @@ PYBIND11_MODULE(_broker, m) { [](broker::endpoint& ep, const std::string& name) -> broker::expected { return ep.attach_clone(name); }) + .def("await_peer", + [](broker::endpoint& ep, const std::string& node_str) { + return ep.await_peer(node_from_str(node_str)); + }) + .def("await_peer", + [](broker::endpoint& ep, const std::string& node_str, broker::timespan timeout) { + return ep.await_peer(node_from_str(node_str), timeout); + }) ; } diff --git a/bindings/python/broker/__init__.py b/bindings/python/broker/__init__.py index 95d61282..bad86c44 100644 --- a/bindings/python/broker/__init__.py +++ b/bindings/python/broker/__init__.py @@ -268,6 +268,14 @@ def __exit__(self, type, value, traceback): self._parent = None self._store = None + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + self._store.reset() + self._parent = None + self._store = None + def name(self): return self._store.name() @@ -364,6 +372,16 @@ def pop(self, key, expiry=None): def _to_expiry(self, e): return (_broker.OptionalTimespan(_broker.Timespan(float(e))) if e is not None else _broker.OptionalTimespan()) + def await_idle(self, timeout=None): + if timeout: + return self._store.await_idle(_broker.Timespan(float(timeout))) + else: + return self._store.await_idle() + + # Points to the "owning" Endpoint to make sure Python cleans this object up + # before destroying the endpoint. + _parent = None + class Endpoint(_broker.Endpoint): def make_subscriber(self, topics, qsize = 20): topics = _make_topics(topics) @@ -396,7 +414,6 @@ def attach_master(self, name, type=None, opts={}): bopts = _broker.MapBackendOptions() # Generator expression doesn't work here. for (k, v) in opts.items(): bopts[k] = Data.from_py(v) - s = _broker.Endpoint.attach_master(self, name, type, bopts) if not s.is_valid(): return None @@ -416,6 +433,12 @@ def attach_clone(self, name): result._parent = self return result + def await_peer(self, node, timeout=None): + if timeout: + return _broker.Endpoint.await_peer(self, node, _broker.Timespan(float(timeout))) + else: + return _broker.Endpoint.await_peer(self, node) + def __enter__(self): return self @@ -551,48 +574,48 @@ def _try_bytes_decode(b): # class Store: # def __init__(self, handle): # self.store = handle -# +# # def name(self): # return self.store.name() -# +# # class Mailbox: # def __init__(self, handle): # self.mailbox = handle -# +# # def descriptor(self): # return self.mailbox.descriptor() -# +# # def empty(self): # return self.mailbox.empty() -# +# # def count(self, n = -1): # return self.mailbox.count(n) -# -# +# +# # class Message: # def __init__(self, handle): # self.message = handle -# +# # def topic(self): # return self.message.topic().string() -# +# # def data(self): # return self.message.data() # TODO: unwrap properly -# +# # def __str__(self): # return "%s -> %s" % (self.topic(), str(self.data())) -# -# +# +# # class BlockingEndpoint(Endpoint): # def __init__(self, handle): # super(BlockingEndpoint, self).__init__(handle) -# +# # def subscribe(self, topic): # self.endpoint.subscribe(topic) -# +# # def unsubscribe(self, topic): # self.endpoint.unsubscribe(topic) -# +# # def receive(self, x): # if x == Status: # return self.endpoint.receive() @@ -600,7 +623,7 @@ def _try_bytes_decode(b): # return Message(self.endpoint.receive()) # else: # raise BrokerError("invalid receive type") -# +# # #def receive(self): # # if fun1 is None: # # return Message(self.endpoint.receive()) @@ -611,29 +634,29 @@ def _try_bytes_decode(b): # # return self.endpoint.receive_msg(fun1) # # raise BrokerError("invalid receive callback arity; must be 1 or 2") # # return self.endpoint.receive_msg_or_status(fun1, fun2) -# +# # def mailbox(self): # return Mailbox(self.endpoint.mailbox()) -# -# +# +# # class NonblockingEndpoint(Endpoint): # def __init__(self, handle): # super(NonblockingEndpoint, self).__init__(handle) -# +# # def subscribe(self, topic, fun): # self.endpoint.subscribe_msg(topic, fun) -# +# # def on_status(fun): # self.endpoint.subscribe_status(fun) -# +# # def unsubscribe(self, topic): # self.endpoint.unsubscribe(topic) -# -# +# +# # class Context: # def __init__(self): # self.context = _broker.Context() -# +# # def spawn(self, api): # if api == Blocking: # return BlockingEndpoint(self.context.spawn_blocking()) @@ -641,4 +664,4 @@ def _try_bytes_decode(b): # return NonblockingEndpoint(self.context.spawn_nonblocking()) # else: # raise BrokerError("invalid API flag: " + str(api)) -# +# diff --git a/bindings/python/store.cpp b/bindings/python/store.cpp index 59361b5e..81262b05 100644 --- a/bindings/python/store.cpp +++ b/bindings/python/store.cpp @@ -48,11 +48,13 @@ void init_store(py::module& m) { .def("increment", &broker::store::increment) .def("decrement", &broker::store::decrement) .def("append", &broker::store::append) - .def("insert_into", (void (broker::store::*)(broker::data, broker::data, broker::optional) const) &broker::store::insert_into) - .def("insert_into", (void (broker::store::*)(broker::data, broker::data, broker::data, broker::optional) const) &broker::store::insert_into) + .def("insert_into", (void (broker::store::*)(broker::data, broker::data, broker::optional)) &broker::store::insert_into) + .def("insert_into", (void (broker::store::*)(broker::data, broker::data, broker::data, broker::optional)) &broker::store::insert_into) .def("remove_from", &broker::store::remove_from) .def("push", &broker::store::push) .def("pop", &broker::store::pop) + .def("await_idle", [](broker::store& st) { return st.await_idle(); }) + .def("await_idle", [](broker::store& st, broker::timespan timeout) { return st.await_idle(timeout); }) .def("reset", &broker::store::reset); // Don't need. diff --git a/caf b/caf index 212a9e77..b34f70a1 160000 --- a/caf +++ b/caf @@ -1 +1 @@ -Subproject commit 212a9e77475e181cb281eefe95606f19bc2e27d9 +Subproject commit b34f70a1d012047184b940c3ec06219f5e52b43f diff --git a/configure b/configure index 46abe49c..1446a0ad 100755 --- a/configure +++ b/configure @@ -36,7 +36,7 @@ Usage: $0 [OPTION]... [VAR=VALUE]... --python-prefix=PATH explicit install directory for Python bindings [PATH/lib/python/site-packages] - Optional Features: + Optional Features (on by default): --disable-python don't try to build python bindings --disable-docs don't try to build local documentation --disable-tests don't try to build unit tests @@ -45,6 +45,10 @@ Usage: $0 [OPTION]... [VAR=VALUE]... path to python-config executable --with-zeek=PATH path to Zeek executable for interoperability tests + Optional Features (off by default): + --enable-micro-benchmarks + build micro benchmarks (requires Google Benchmark) + Required Packages in Non-Standard Locations: --with-caf=PATH path to C++ Actor Framework installation --with-openssl=PATH path to OpenSSL install root @@ -120,7 +124,7 @@ while [ $# -ne 0 ]; do append_cache_entry CMAKE_BUILD_TYPE STRING $optarg ;; --enable-debug) - append_cache_entry ENABLE_DEBUG BOOL true + append_cache_entry BROKER_ENABLE_DEBUG BOOL true ;; --enable-static) append_cache_entry ENABLE_STATIC BOOL true @@ -158,6 +162,9 @@ while [ $# -ne 0 ]; do --with-zeek=*) append_cache_entry ZEEK_EXECUTABLE PATH $optarg ;; + --enable-micro-benchmarks) + append_cache_entry BROKER_ENABLE_MICRO_BENCHMARKS BOOL true + ;; *) echo "Invalid option '$1'. Try $0 --help to see available options." exit 1 diff --git a/doc/_images/core-actor-uml.graffle b/doc/_images/core-actor-uml.graffle new file mode 100644 index 00000000..0127eb52 Binary files /dev/null and b/doc/_images/core-actor-uml.graffle differ diff --git a/doc/_images/core-actor-uml.svg b/doc/_images/core-actor-uml.svg new file mode 100644 index 00000000..b96b5b65 --- /dev/null +++ b/doc/_images/core-actor-uml.svg @@ -0,0 +1,457 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Produced by OmniGraffle 7.17.1\n2020-07-25 08:43:07 +0000 + + Canvas 1 + + Layer 1 + + + + + + + + + + + Note + + + + + Stores routing + information and + subscriptions. + + + + + + + + + + + Note + + + + + Provides + messaging to/from + remote peers. + + + + + + + + + + + + + + Note + + + + + Connects to + remote peers with + automatic retries. + + + + + + + + + + + + + + Note + + + + + Manages clones + and masters for + data stores. + + + + + + + + + + + + + + Note + + + + + Publishes + statuses/errors to + well-known topics. + + + + + + + + + + + + + + Note + + + + + Records + meta data for + benchmarking. + + + + + + + + + + + + + + Note + + + + + Instantiates + class hierarchy & + creates behavior. + + + + + + + + + + + Note + + + + + Wraps a core + manager and adds + setup logic. + + + + + + + + + + + Note + + + + + Executes the + core logic of an + endpoint + . + + + + + + + + + + + + Class Name + + + + alm::peer + + + + Operations + + + + + tbl(): routing_table_type + + filter(): filter_type + + timestamp(): uint64_t + + peer_filter(id: peer_id_type): filter_type + + subscribe(filter: filter_type) + + publish<T>(msg: T) + + distance_to(peer: peer_id_type): optional<size_t> + + handle_filter_update(path: vector<peer_id>, + filter: filter_type, + timestamp: uint64_t) + + handle_publication(msg: message_type) + + ship(msg: message_type) + + ship(msg: message_type, receiver: peer_id_type) + + ship_locally<T>(msg: T) + + peer_connected(peer: peer_id_type, + hdl: communication_handle_type) + + peer_disconnected(peer: peer_id_type, + hdl: communication_handle_type, + reason: error) + + peer_removed(peer: peer_id_type, + hdl: communication_handle_type) + + cannot_remove_peer<T>(x: T) + + peer_unavailable<T>(x: T) + + + + + + Operations + + + + + self(): caf::event_based_actor + + connected_to(hdl: communication_handle_type): bool + + start_peering(peer: peer_id_type, + hdl: communication_handle_type, + rp: caf::response_promise) + + ship_locally<T>(msg: T) + + peer_connected(peer: peer_id_type, + hdl: communication_handle_type) + + + + Class Name + + + + alm::stream_transport + + + + + + Operations + + + + + try_peering(addr: network_info, + rp: caf::response_promise, + retry_count: uint32_t) + + try_publish(addr: network_info, msg: data_message, + rp: caf::response_promise) + + cache(): detail::network_cache + + + + Class Name + + + + mixin::connector + + + + + + Operations + + + + + has_remote_master(name: string): bool + + masters(): map<string, caf::actor> + + clones(): multimap<string, caf::actor> + + attach_master(name: string, type: backend, + opts: backend_options) + : caf::result<caf::actor> + + attach_clone(name: string, + resync_interval: double, + stale_interval: double, + mutation_buffer_interval: double) + : caf::result<caf::actor> + + get_master(name: string): caf::result<caf::actor> + + snapshot(name: string, clone: caf::actor) + + detach_stores() + + + + Class Name + + + + mixin::data_store_manager + + + + + + Operations + + + + + peer_connected(peer: peer_id_type, + hdl: communication_handle_type) + + peer_disconnected(peer: peer_id_type, + hdl: communication_handle_type, + reason: error) + + peer_removed(peer: peer_id_type, + hdl: communication_handle_type) + + cannot_remove_peer<T>(x: T) + + peer_unavailable<T>(x: T) + + + + Class Name + + + + mixin::notifier + + + + + + Operations + + + + + subscribe(filter: filter_type) + + ship(msg: message_type) + + ship(msg: message_type, receiver: peer_id_type) + + ship_locally<T>(msg: T) + + peer_connected(peer: peer_id_type, + hdl: communication_handle_type) + + + + Class Name + + + + mixin::recorder + + + + + + Operations + + + + + make_behavior(): caf::behavior + + id(): peer_id_type + + + + Class Name + + + + core_manager + + + + + + Operations + + + + + mgr: caf::intrusive_ptr<core_manager> + + + + Class Name + + + + core_state + + + + + + Operations + + + + + state: core_state + + + + Class Name + + + + caf::stateful_actor<core_state> + + + + + + diff --git a/doc/_images/endpoint.graffle b/doc/_images/endpoint.graffle new file mode 100644 index 00000000..8be569f0 Binary files /dev/null and b/doc/_images/endpoint.graffle differ diff --git a/doc/_images/endpoint.svg b/doc/_images/endpoint.svg new file mode 100644 index 00000000..019b0d33 --- /dev/null +++ b/doc/_images/endpoint.svg @@ -0,0 +1,139 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Produced by OmniGraffle 7.17.1\n2020-07-25 08:37:06 +0000 + + Canvas 1 + + Layer 1 + + + + + broker::endpoint + + + + + + + caf::actor_system + + + + + + + + + + + Incoming Messages + + + + + Outgoing Messages + + + + + + + Core + Actor + + + + + + + + + + + + + + + + + + + + + + Subscriber + + + + + + + + + + + + + + + + + Publisher + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/_images/gateway.drawio b/doc/_images/gateway.drawio new file mode 100644 index 00000000..01cc5de6 --- /dev/null +++ b/doc/_images/gateway.drawio @@ -0,0 +1 @@ +7Vpbc5s4FP41fqwHEGB4bBI7bbY7m2l6ye5LRwEZtMHII+TY7q+vZCRuAsdtcEw9fTI6knX5vqPvII5G4HKxuaZwGf9NQpSMLCPcjMDVyLJM03D5j7BspcUwQG6JKA6lrTTc4e9INZTWFQ5RVmvICEkYXtaNAUlTFLCaDVJK1vVmc5LUR13CCGmGuwAmuvUrDlmsFub6ZcU7hKNYDu1Zk7xiAVVjuZIshiFZV0xgOgKXlBCWPy02lygR6Clc8v/NOmqLiVGUskP+YN8sP4ZfP7ro8T68RgT67P6/N0B28wSTlVzxdMMQTWEiZ822CgreHUedFy74UpbCGCRkxXu/WMeYobslDIRxzT2B22K2SHjJ5I8RhSHms7wkCaG7vsBs5vsA8Lo5ThJlT0kqep/zYd7Jvz4hyjBn422Co5TbGBF9Q1lK0JyJP5CU3cl5Gqqce5Lp8rKOlFo17x1tKiaJHIdngRjd8iaq1pYsSj9WpK5LnwDKFlfcwfKkEUo/jIquS6r4g2TrZ5gDGnPv0xMzp3ihOQAd7L0CXY4xNLomGlt7WHqGFZgtc6mb4w0KG1RwikKIvHkg2GaUPKJKjRt46GHeD8Yc5BrGxRapgGy3YGwfC2JfRzTkWi6L0kUpWaWhAG3nehVQ0Qaze2kWz/+K57EjSimf3b1qJgrVuvCtiDLlANwyw2Liu57+R4xtpXPDFSPcRCiLSUT4Pv1AxH4oRr/aVIa/2qqCGLCs2pVUXSeLGVnRAO3BylSxFNIIsT0NLRnABZR7vYKiBDL8VA+bvXOsXgt6JNnskcZXIGQyLD7094ffXddAI3Z4J5Y10zo7iK0GxKZ/aozBkWSlNXL8xnFjcqBKKacdikzZZ7eHgDm0PeRokD67YczKhjGqG8YYHe1V6wUx+lDvV0eLoXi/24u6mV3qZhxL3V6BKtMbFlXe2QmVaw9NqPo5KNa0ayAKpYTneYVyBuX2at5n7Pan9vo2RI/3ijuU/WAfGgbsYe2H/s8jxhmRZQ6LLOfsxGtiDUy83LODuPkN5OQQ+wdAXNGgcvNPS2tDkloFokVIqtL1jAzlnKhUrNXG0mzmGcbLPnNYBx8fhvU5FhzyHvWHxI7AMxQS9Y9V15ChNdxqXHKtYXXo60CqjHp3qjZAImk80nO1CxyGSZea1j2kuNmwKzwiFsSjo6V27eZx0tCF02sRTutox0k9tfv+pMFJJe2vMOVdYSLYXKOMjfbn83vgxnEHlhK29K00HRw3CL4GN2Bw3BxwJYKsWIJTjqS63CVEJYRZXGiPkq0P8AEltyTDEtQHwhhZtOha/QZRIX9VCZV3ZBabSFxqG8N1BsbBKuP9IfotklKssT9zPIdLt06kVNxulTyGKrbceGlTxT7uu9CVsf7sgpt/bi4+LfCt8SX8y35jalz+1AG218NoI64fOfNlv/DdQf71lmA+xfK44Dtjz5l4rg0mruV6vt3YzfbYtC0TOK4FJqC4d6kGyJcn+yyp1YdpXuqZNBwkX7/W0c5HitX+utvYL3QblZEoL+70nnHdk1ctvrvkw1c/k3XmaXt0UXVKPjA727uPAmO/jzo9+WgzkjVF7Jd9lBfL+7h58/JaM5j+AA== \ No newline at end of file diff --git a/doc/_images/gateway.png b/doc/_images/gateway.png new file mode 100644 index 00000000..ff2b14e8 Binary files /dev/null and b/doc/_images/gateway.png differ diff --git a/doc/comm.rst b/doc/comm.rst index da4844d9..38c73dab 100644 --- a/doc/comm.rst +++ b/doc/comm.rst @@ -25,6 +25,8 @@ that have no further semantics attached. It's up to senders and receivers to agree on a specific layout of messages (e.g., a set of doubles for a measurement series). +.. _endpoint: + Endpoints ~~~~~~~~~ @@ -221,26 +223,6 @@ is dependent on the status code enum ``sc``. For example, all ``sc::peer_*`` status codes include an ``endpoint_info`` context as well as a message. -Forwarding ----------- - -In topologies where multiple endpoints are connected, an endpoint -forwards incoming messages to peers by default for topics that it is -itself subscribed to. One can configure additional topics to forward, -independent of the local subscription status, through the method -``endpoint::forward(std::vector)``. One can also disable -forwarding of remote messages altogether through the Broker -configuration option ``forward`` when creating an endpoint. - -When forwarding messages Broker assumes all connected endpoints -form a tree topology without any loops. Still, to avoid messages -circling indefinitely if a loop happens accidentally, Broker's message -forwarding adds a TTL value to messages, and drops any that have -traversed that many hops. The default TTL is 20; it can be changed by -setting the Broker configuration option ``ttl``. Note that it is the -first hop's TTL configuration that determines a message's lifetime -(not the original sender's). - .. _zeek_events_cpp: Exchanging Zeek Events @@ -278,3 +260,129 @@ Zeek: received pong[2] received pong[3] received pong[4] + +Gateways +-------- + +Broker was designed with peer-to-peer communication in mind. All endpoints in +the network form a single publish/subscribe layer. This implies that each +endpoint is aware of every other endpoint in the network as well as what topics +they have subscribed to. + +This level of transparency enables source routing, but it comes at a cost. +Endpoints flood subscriptions and topology changes to the entire network. The +volume of flooded messages remains small, as long as primarily endpoints with +high availability and a stable set of subscriptions join the network. However, +short-lived or unstable endpoints may increase the amount of messages in the +network quickly. Furthermore, the more endpoints join the network, the more +state and bookkeeping overhead accumulates. + +The overhead becomes especially prominent on endpoints that join the network +only to publish data but were placed on the edges of the network. Such endpoints +usually end up sending all---or nearly all---of their messages to another, +well-connected endpoint that distributes the messages. Nevertheless, these +producing endpoints still have to flood their subscriptions to the entire +network and get stored in all routing tables. In the Zeek ecosystem, the +`Zeek Agent `_ fits this +exact role. Agents run at the edge of the network and ideally should not consume +more network bandwidth and CPU cycles than necessary. + +Gateways provide a way to separate the well-connected "inner" endpoints from +endpoints at the edges that generally cannot contribute to the overall +connectivity of the network but still incur messaging and bookkeeping overhead. + +Topology +~~~~~~~~ + +Gateways separate the overlay into two domains: *external* and *internal*. The +external domain consists of stable, well-connected endpoints that build the core +of the publish/subscribe layer. The internal domain consists of endpoints that +need no knowledge of the entire overlay, because all ways would pass through the +gateway anyway. This means, the gateway is the only way in or out for endpoints +in the internal domain, as illustrated in the figure below. + +.. figure:: _images/gateway.png + :align: center + +Aside from forwarding messages between the two domains, gateways render all +endpoints of the internal domain *completely opaque* to endpoints in the +external domain and vice versa. + +To endpoints in the external domain, a gateway appears as the regular endpoint +``E``. It subscribes to all topics that were subscribed by any endpoint in the +internal domain and all messages published in the internal domain appear as if +``E`` was the publisher. + +The endpoint in the internal domain, ``I`` is the mirror image of ``E``: it +hides all endpoints from the external domain. + +The two endpoints ``E`` and ``I`` actually exist, i.e., the gateway starts both +endpoints in the same process and creates a "shortcut" between the two. Every +subscription or published events on one gets forwarded to the other. However, +``E`` and ``I`` are not aware of each other and the forwarded events and +subscriptions appear as if they had a local ``subscriber`` or ``publisher``. + +.. warning:: + + The endpoints ``E`` and ``I`` use the *same ID*. When setting up a gateway, + make sure that no other endpoint provides connectivity between the internal + and the external domain. Otherwise, ``E`` could receive messages from ``I`` + and vice versa. Since they share one ID, endpoints in the network would + receive contradictory messages from what appears to be the same endpoint. + +Setup +~~~~~ + +Broker includes the standalone tool ``broker-gateway``. When started, it creates +the two endpoints ``E`` and ``I`` in the same process. Each of the two endpoints +listens to its own port for incoming peerings. A minimal setup would only set +the two ports, as shown below. + +.. code-block:: none + + broker-gateway --internal.port=8080 --external.port=9090 + +Users can also configure the gateway to connect to a list of predefined peers on +startup. For example: + +.. code-block:: none + + broker-gateway --internal.port=8080 \ + --internal.peers=[tcp://mars:1234, tcp://venus:2345] \ + --external.port=9090 \ + --external.peers=[tcp://proxima-centauri:3456] + +The invocation above would listen on port 8080 for incoming peerings in the +internal domain and tries to connect to ``mars`` on port ``1234`` as well as to +``venus`` on port 2345. In the external domain, the gateway would listen on port +9090 and try to connect to ``proxima-centauri`` on port 3456. + +Instead of using the command line, users could also provide a ``broker.conf`` +file with the following content: + +.. code-block:: none + + internal { + port = 8080 + peers = [ + , + , + ] + } + external { + port = 9090 + peers = [ + , + ] + } + +There is also a third parameter for the domains: ``disable-forwarding``. In +particular, setting ``internal.disable-forwarding`` to ``true`` causes the +gateway to not only isolate endpoints in the internal domain from endpoints in +the external domains, but also endpoints *within* the internal domain from each +other. + +In setups where all endpoints of the internal domain connect only to the gateway +and do not need to interact with each other, setting this flag reduces any +messaging to the bare minimum by leading each endpoint in the internal domain to +believe that there is exactly one other endpoint in the network---the gateway. diff --git a/doc/data.rst b/doc/data.rst index ccccf3fc..2f547835 100644 --- a/doc/data.rst +++ b/doc/data.rst @@ -157,7 +157,7 @@ It is a type alias for ``std::set``. Table ~~~~~ -A ``table`` is an associative array with keys and values of type ``data``. That +A ``set`` is an associative array with keys and values of type ``data``. That is, it maps ``data`` to ``data``. It is a type alias for ``std::map``. diff --git a/doc/devs.rst b/doc/devs.rst new file mode 100644 index 00000000..745e7a6d --- /dev/null +++ b/doc/devs.rst @@ -0,0 +1,887 @@ +.. _devs: + +Developer Guide +=============== + +Broker is based on the `C++ Actor Framework (CAF) +`_. Experience with CAF certainly helps, but a +basic understanding of the actor model as well as publish/subscribe suffices for +understanding (and contributing to) Broker. + +In the code base of Broker, we frequently use templates, lambdas, mixins (static +polymorphism), etc. as well as common C++ idioms such as CRTP and RAII. +Developers should bring *at least* advanced C++ skills. + +Architecture +------------ + +From a user perspective, a Broker endpoint is the primary component in the API +(see :ref:`endpoint`). Internally, an endoint is a container for an actor system +that hosts the *core actor* plus any number of *subscribers* and *publishers*. +The figure below shows a simplified architecture of Broker in terms of actors. + +.. figure:: _images/endpoint.svg + :align: center + :alt: Simplified architecture of a Broker endpoint in terms of actors. + +A Broker endpoint always contains exactly one core actor. From the perspective +of the implementation, this actor is the primary component. It manages +publishers and subscribers, establishes peering relations, forwards messages to +remote peers, etc. + +Because the core actor has many roles to fill, its implementation spreads +severall classes. The following UML class diagram shows all classes involved in +implementing the core actor with an exempt of the relevant member functions. + +.. figure:: _images/core-actor-uml.svg + :align: center + :alt: All classes involved in implementing the core actor. + + +In a distributed setting, each core actor represents one *peer*. + +Implementation +-------------- + +Endpoints, master stores, and clone stores (see :ref:`overview`) all map to +actors. Endpoints wrap the `actor system`_ and the main component: the core +actor (see architecture_). The core actor is implemented using the mixins we +discuss in this section. + +Mixins +~~~~~~ + +Mixins (cf. `Wikipedia:Mixin `_) allow +Broker to avoid the +`diamond problem _` by +binding the base class late in order to instantiate an entire class hierarchy +using single inheritance only. + +Mixins usually follow this scaffold: + +.. code-block:: C++ + + template + class my_mixin : public Base { + public: + + using super = Base; + + using extended_base = my_mixin; + + // ... implementation ... + }; + +Given two mixins ``M1`` and ``M2``, we can instantiate a class hierarchy +for ``my_class`` as follows: + +.. code-block:: C++ + + class my_class : public M1> { + // ... + }; + +By using this technique, we avoid any ambiguity that multiple inheritance could +cause. ``M1`` overrides/hides functions of ``M2``, which in turn can +override/hide functions of ``my_base``. + +Lifting +~~~~~~~ + +Broker has a small utility called ``detail::lift`` (implemented in +``broker/detail/lift.hh``) for lifting member functions into message handlers +(callbacks). It's sole purpose is to avoid writing repeated lambda expressions +that only wrap member function calls. Consider this small example: + +.. code-block:: C++ + + struct calculator { + int add(int x, int y) const noexcept { return x + y; } + + int sub(int x, int y) const noexcept { return x - y; } + + // ... + + caf::behavior make_behavior() { + using detail::lift; + return { + lift(*this, calculator::add), + lift(*this, calculator::sub), + }; + } + }; + +By using ``lift``, we avoid repeating the arguments over and over again in +``make_behavior``. The implementation is equivalent to writing: + +.. code-block:: C++ + + caf::behavior make_behavior() { + return { + [this](atom::add, int x, int y) { return add(x, y); }, + [this](atom::sub, int x, int y) { return sub(x, y); }, + }; + } + +We can pass any number of template parameters to ``lift`` for prefixing the +message with atoms or leave the pack empty to dispatch on the member function +signature only. + +Note: lifting overloaded member functions does not work in this concise syntax. +In order to tell the compiler *which* overload to pick, we need to either store +the member function pointer in a properly typed variable first or use +``static_cast``. + +The Core Actor +~~~~~~~~~~~~~~ + +As the name suggests, this actor embodies the central hub for the +publish/subscribe communication. Everything flows through this actor: local +subscriptions, peering requests, local and remote publications, control messages +for the stores, and so on. However, you might be surprised when looking at +``core_actor.cc``, as it contains barely any code. Exactly because the core +actor has so many roles to fill, we have separated it into many functional +blocks. Most blocks are implemented as mixins_ in order to make each functional +block testable and reusable while avoiding runtime costs for the decoupling. + +``alm::peer`` +************* + +This class models a Broker peer in the network. It implements the management of +subscriptions, maintains a routing table for sending data to peers, and provides +a set of virtual functions that subtypes may override to add or change +functionality. + +The virtual functions are: + +- ``subscribe`` +- ``handle_filter_update`` +- ``handle_path_revocation`` +- ``handle_publication`` +- ``peer_discovered`` +- ``peer_connected`` +- ``peer_disconnected`` +- ``peer_removed`` +- ``peer_unreachable`` +- ``cannot_remove_peer`` +- ``peer_unavailable`` +- ``shutdown`` +- ``send`` (pure) +- ``ship_locally`` (pure) + +Please refer to the Doxygen documentation for a detailed explanation as well as +parameters. The important thing to note is that the peer allows extending its +basic functionality by extending the callbacks. Any subtype that wishes to +extend functionality of the peer should always call the function on its super +type at the end. + +For example, the following code shows how the ``notifier`` extends the +``peer_connected`` callback: + +.. code-block:: C++ + + void peer_connected(const endpoint_id& peer_id, + const caf::actor& hdl) override { + BROKER_TRACE(BROKER_ARG(peer_id) << BROKER_ARG(hdl)); + emit(peer_id, sc_constant(), "handshake successful"); + super::peer_connected(peer_id, hdl); + } + +The class ``peer`` implements `publishing data`_, but it has no code for +actually sending messages. The peer leaves it to the derived types (in +particular |alm::stream_transport|_) to provide an implementation for ``send``. + +Most functions in the ``peer`` are straightforward, but one member function in +particular is worth discussing: + +.. code-block:: C++ + + template + caf::behavior make_behavior(Fs... fs) + +This member function returns the behavior for initializing the actor that +implements the peer, i.e., the core actor (leaving unit tests aside). A behavior +is a set of message handlers (callbacks), usually lambda expressions or `lifted +member functions `_. Each class or `mixin `_ in the +inheritance graph can add additional message handlers to the actor. Each mixin +in the "chain" that registers additional message handlers implements +``make_behavior`` with this exact signature. The template parameter pack +``fs...`` are the message handler registered by a subtype. Each mixin forwards +this pack to its base type along with its own handlers. + +Each class should document the message handlers it adds to the actor. The sum of +all message handlers defines the messaging interface of the core actor. + +``alm::stream_transport`` +************************* + +This class provides the default communication backend for |alm::peer|_ and +connects peers by using two CAF stream paths (one for each direction, because +paths are unidirectional). + +The stream transport is a CAF `stream manager`_, i.e., it inherits from +``caf::stream_manager``. Aside from multiplexing the streaming traffic for data +and command messages, this class also implements a handshake to establish the +CAF streams between two peers as depicted below. + +.. code-block:: none + + +-------------+ +-------------+ + | Originator | | Responder | + +------+------+ +------+------+ + | | + endpoint::peer | | + +-------------->+ | + | | + +---+ | + | | try_peering | + +<--+ | + | | + | (ping, endpoint_id, actor) | + +--------------------------------->+ + | | + | (pong, endpoint_id, actor) | + +<---------------------------------+ + | | + +---+ | + | | start_peering | + +<--+ | + | | + | (peer, init, endpoint_id, actor) | + +--------------------------------->+ + | | + | +---+ + | | | handle_peering_request + | +<--+ + | (caf::open_stream_msg) | + +<---------------------------------+ + | | + +---+ | + | | handle_peering_handshake_1 | + +<--+ | + | | + | (caf::open_stream_msg) | + | (caf::upstream_msg::ack_open) | + +--------------------------------->+ + | | + | +---+ + | | | handle_peering_handshake_2 + | +<--+ + | (caf::upstream_msg::ack_open) | + +<---------------------------------+ + | | + +---+ +---+ + | | peer_added | | peer_added + +<--+ +<--+ + | | + +The diagram above depicts message flow between two peering Broker nodes. +Messaging for resolving network information, establishing connections, etc. +were omitted for brevity. + +After the user calls ``endpoint::peer``, the Originator sends a ping to the +Responder before starting the actual peering process. This step exists only for +populating the network cache on both ends. + +The ``(peer, init, ...)`` message triggers the actual handshake by invoking +``handle_peering_request``. This step creates a CAF stream from the Originator +to the Responder. + +After receiving the ``open_stream_msg``, the Originator calls +``handle_peering_handshake_1`` which emits two messages: (1) another +``open_stream_msg`` to establish a CAF stream from the Responder to the +Originator (this message invokes ``handle_peering_handshake_2`` at the +Responder) and (2) an ``ack_open`` message for the CAF stream. + +Both sides call ``peer_added`` after receiving the final message in the +handshake process. At this point, the Broker endpoints are fully connected. + +.. note:: + + The responer may process ``open_stream_msg`` and ``upstream_msg::ack_open`` in + any order. Since there is a lot of bookkeeping involved during the handshake, + this is tracked internally with a finite-state machine + (see ``detail::peer_handshake``). + +``mixin::connector`` +******************** + +The ``connector`` augments peers with a ``(atom::peer, network_info) -> void`` +message handler. This allows endpoints to hide the details for connecting to +remote nodes by offering an API that operates on network addresses. Internally, +each ``connector`` caches known network addresses and lazily connects to new +ones. When connection attempts fail, the ``connector`` automatically retries +to connects for a preconfigured number of times. + +``mixin::data_store_manager`` +***************************** + +This mixin adds state and message handlers to peers for attaching +:ref:`data store frontends `, i.e., *masters* and *clones*. + +``mixin::notifier`` +******************* + +This mixin hooks into the callbacks of |alm::peer|_ to publish ``status`` and +``error`` messages to ``topics::statuses`` and ``topics::errors``, respectively. + +``mixin::recorder`` +******************* + +The ``recorder`` augments peers with additional logic for recording meta data at +runtime. Recording is optional and depends on whether the user sets the +configuration parameter ``broker.recording-directory`` to a valid directory. + +After recording meta data for a Broker application, users can use the recording +as input to broker-cluster-benchmark_. + +``core_manager`` +**************** + +All mixins as well as the classes living in the namespace ``alm`` are templates. +The core manager instantiates all templates, defines the class hierarchy and +provides an ``id()`` member function that is required by |alm::peer|_. + +``core_state`` +************** + +By deriving from ``alm::stream_transport``, the ``core_manager`` becomes a +`stream manager`_. The manager internally multiplexes all streams necessary. +Hence, the core actor only needs a single instance of ``core_manager``. This +state class simply defines a member ``mgr`` that holds a ``core_manager`` and +provides the necessary glue code we need for using actors of type +``caf::stateful_actor`` (aliased as ``core_actor_type``). + +Application-Layer Multicast (ALM) +--------------------------------- + +Broker follows a peer-to-peer (P2P) approach in order to make setting up and +running a cluster convenient and straightforward for users. At the same time, we +wish to avoid "too much magic". Ultimately, we want the users to decide the +topology that best fits their deployment. + +Theoretically, IP multicast offers the most efficient option for publishing +events. However, IP multicast implements neither reliability nor encryption by +default. Further, IP multicast is not routed in the Internet. Extensions for +reliability and security exist, but we cannot rely on Broker users to provide +this infrastructure. To run "out of the box", we set up an overlay network on +top of TCP connections that minimizes message duplication through a custom +routing protocol implementing application-layer multicast. + +Goals + - Connections are secured/authenticated through TLS & certificates. + - Broker puts no constraints on topologies, i.e., users may establish all + peering relations that make sense in their deployment without having to + form a tree. Loops are explicitly allowed. + - Broker detects (and slows down) any publisher that produces data at a faster + rate than the subscribers or the network can handle. + - Each peer can publish to each topic. No central coordinator or root node + exists. + - Broker minimizes messages in the network with (application-layer) multicast + in order to scale effectively. + +Non-Goals + - Dynamic connection management and auto-balancing of a distribution tree. + While Broker generally follows a P2P philosophy, we still wish to give users + full control over Broker's topology. Also, Broker does not target + Internet-wide deployments with very high churn rates and unreliable nodes. + +Peers +~~~~~ + +Each Broker peer in the network has: + +- A globally unique ID. Currently, we use ``caf::node_id`` to identify peers. + CAF computes this ID automatically by combining a 160-bit hash value (based on + a seed plus various node-specific information) with the OS-specific process + ID. +- A filter for incoming messages. The core actor combines the filters of all + subscribers running in the endpoint to a single filter. The core actor removes + all redundant entries. For example, if the user starts subscribers with the + filters ``[/zeek/event/foo]``, ``[/zeek/event/bar]``, and ``[/zeek/event]``, + then core actor combines these three filters to ``[/zeek/event]``. Due to the + prefix matching, this one entry implicitly includes ``/zeek/event/foo`` and + ``/zeek/event/bar``. When distributing incoming messages to subscribers, each + individual subscriber of course only receives messages that match its filter. +- A logical clock (`Lamport timestamps + `_). This 64-bit integer + enables detection of repeated messages and ordering of events. Whenever a peer + sends a message to others, it increments its logical time and includes the + current value in the message. This timestamp is crucial for detecting outdated + or repeated subscriptions in the `Subscription Flooding`_. +- A routing table with paths to *all* known peers in the network. +- A ``peer_filters_`` map of type ``map`` for storing + the current filter of each known peer. + +Timestamps +~~~~~~~~~~ + +Broker has two types for modelling logical clocks: + +#. ``broker::alm::lamport_timestamp`` +#. ``broker::alm::vector_timestamp`` + +The former type is a thin wrapper (AKA *strong typedef*) for a 64-bit unsigned +integer. It provides ``operator++`` as well as the comparison operators. Each +peer keeps its own Lamport timestamp. The peer increments the timestamp whenever +it changes its routing table or its filter. + +The latter type is a list of ``lamport_timestamp``. Broker uses vector +timestamps to versionize paths. + +Routing Tables +~~~~~~~~~~~~~~ + +A routing table maps peer IDs to versioned paths. Conceptually, the routing +table maps each peer to a set of paths that lead to it. + +.. code-block:: C++ + + using path = std::vector; + using versioned_paths = std::map; + using routing_table = std::map; + +.. note:: + + The actual implementation of the routing table is slightly more complex, since + it also maps the peer IDs to communication handles (needed by CAF for message + passing). + +Source Routing +~~~~~~~~~~~~~~ + +Broker uses source routing. Messages between peers contain the forwarding path, +encoded as an ``alm::multipath`` object. + +The ``multipath`` class implements a recursive data structure for encoding +branching paths (directed acyclic graphs). For example: + +.. code-block:: text + + A ────> B ─┬──> C + └──> D ────> E + +In this scenario, A sends a message to B, which then forwards to C and D. After +receiving the message, D also forward to E. This gives senders full control over +the path that a message travels in the network. + +Furthermore, a message also contains IDs of receivers. Not every peer that +receives a message subscribed to its content. Hence, peers that are not in the +list of receivers only forward the message without inspecting the payload. + +Subscription Flooding +~~~~~~~~~~~~~~~~~~~~~ + +Whenever the filter of a peer changes, it sends a *subscription* message to all +peers it has a direct connection to (neighbors). When establishing a new +peering relation, the handshake also includes the *subscription* message. + +The subscription message consists of: + +#. A ``endpoint_id_list`` for storing the path of this message. Initially, this + list only contains the ID of the sender. +#. The ``filter`` for selecting messages. A node only receives messages for + topics that pass its filter (prefix matching). +#. A 64-bit (unsigned) timestamp. This is the logical time of the sender for + this event. + +Whenever receiving a *subscription* message (this ultimately calls +``handle_filter_update`` in ``include/broker/alm/peer.hh``), a peer first checks +whether the path already contains its ID, in which case it discards the message +since it contains a loop. + +If a peer sees the sender (the first entry in the path) for the first time, it +stores the filter in its ``peer_filters_`` map and the new path in its routing +table. Otherwise, it checks the timestamp of the message: + +- If the timestamp is *less* than the last timestamp, a peer simply drops the + outdated message. +- If the timestamp is *equal* to the last timestamp, a peer checks whether the + message contains a new path and updates it routing table if necessary. Complex + topologies can have multiple paths between two peers. The flooding eventually + reveals all existing paths between two peers. +- If the timestamp is *greater* than the last timestamp, a peer overrides the + subscription of the sender and stores the path in its routing table if + necessary. + +All messages that were not discarded by this point get forwarded to all direct +connections that are not yet in the path. For that, a peer adds itself to the +path and forwards the message otherwise unchanged (in particular, the timestamp +remains unchanged, since it represent the logical time *of the sender*). + +By flooding the subscriptions in this way, Broker is able to detect all possible +paths between nodes. However, this mechanism can cause a high volume of messages +for topologies with many loops that result in a large number of possible paths +between all nodes. + +The number of messages generated by the flooding depends on the topology. In a +trivial chain topology of :math:`n` nodes (:math:`n_0` peers with :math:`n_1`, +:math:`n_1` peers with :math:`n_2`, and so on), we generate a total of +:math:`n-1` messages. In a full mesh, however, we generate :math:`(n-1)^2` +messages. + +Should we observe severe performance degradations as a result of the flooding, +Broker could limit the maximum path length or select only a limited set of paths +(ideally, this subset should be as distinct as possible). + +Publishing Data +~~~~~~~~~~~~~~~ + +Whenever `the core actor`_ receives data from a local publisher, it scans its +routing table for all peers that subscribed to the topic (using prefix +matching). Afterwards, the core actor computes the shortest paths to all +receivers and combines then into a single `multipath `_ before +sending the data to the first hops. Because the class ``multipath`` models a +directed, tree-like data structure, messages always have a finite number of +hops. In addition to the ``multipath``, the core actor also sends the list of +receivers. + +Whenever a core actor receives published data, it first checks whether the list +of receivers includes its own ID. On a match, the core actor forwards the data +to all local subscribers for the topic. Then, the core actor retrieves the next +hops from the ``multipath`` and forwards the data accordingly, only including +the ``multipath`` branch that is relevant to the next hop. For example, consider +the core actor with ID ``X`` receives the following ``multipath``: + +.. code-block:: text + + X ─┬──> A + └──> B ────> C + +The next hops are ``A`` and ``B``. Hence, it forwards the data to ``A`` with the +upper branch (``A``) and to ``B`` with the lower branch (``B ────> C``). The +peer ``A`` terminates the upper branch, while ``B`` will forward the data to +``C`` on the lower branch. + +Because Broker separates routing information (stored as ``multipath``) from +recipients (stored as list of ``endpoint_id``), users can also publish data to a +single peer in the network to emulate direct sending (unicast). In this case, +the core actor computes the shortest path to the receiver, converts it to a +(trivial) ``multipath`` and then sends the data with a single ID in the list of +receivers. Because all in-between hops ignore the payload unless the list of +receivers includes their ID, only the single receiver is going to process the +content of the message. + +.. _devs.channels: + +Channels +-------- + +Channels model logical connections between one *producer* and any number of +*consumers* on top of an unreliable transport. Changes in the topology of Broker +at runtime can cause reordering of messages if a faster path appears or loss of +messages if a path disappears. + +In places where Broker requires ordered and reliable communication, e.g., +communication between clone and master actors, the class +``broker::detail::channel`` provides a building block to add ordering and +reliability. + +A channel is unaware of the underlying transport and leaves the rendezvous +process (i.e., how producers learn handles of new consumers) to the user. The +class ``channel`` defines message types as well as interfaces for ``producer`` +and ``consumer`` implementations (both use CRTP to interface with user code). + +Producer +~~~~~~~~ + +The producer requires a ``Backend`` template parameter and expects a pointer of +type ``Backend*`` in its constructor. This backend implements a transport layer +for the channel and must provide the following interface (pseudo code): + +.. code-block:: cpp + + interface Backend { + // Sends a unicast message to `hdl`. + void send(producer*, const Handle& hdl, const T& msg); + + // Sends a multicast message to all consumers. + void broadcast(producer*, const T& msg) + + // Called to indicate that a consumer got removed by the producer. + void drop(producer*, const Handle& hdl, ec reason) + + // Called to indicate that the producer received the initial ACK. + void handshake_completed(producer*, const Handle& hdl) + }; + +The first argument is always the ``this`` pointer of the producer. This enables +the backend to multiplex more than one producer at a time. The type ``Handle`` +identifies a single consumer. In the data store actors, this is an +``entity_id``. Finally, ``T`` is one of the following message types: + ++-----------------------+----------------------------------------------------+ +| Type | Semantics | ++=======================+====================================================+ +| ``handshake`` | Transmits the first sequence number to a consumer. | ++-----------------------+----------------------------------------------------+ +| ``event`` | Transmits ordered data to consumers. | ++-----------------------+----------------------------------------------------+ +| ``retransmit_failed`` | Notifies that an event is no longer available. | ++-----------------------+----------------------------------------------------+ +| ``heartbeat`` | Keeps connections to consumers alive. | ++-----------------------+----------------------------------------------------+ + +Consumer +~~~~~~~~ + +Similar to the producer, the consumer also requires a ``Backend`` for providing +a transport and consuming incoming events (pseudo code): + +.. code-block:: cpp + + interface Backend { + // process a single event. + void consume(consumer*, Payload) + + // Sends a control message to the producer. + void send(consumer*, T)` + + // Process a lost event. The callback may abort further processing by + // returning a non-default error. In this case, the consumer immediately + // calls `close` with the returned error. + error consume_nil(consumer*) + + // Drops this consumer. After calling this function, no further function + // calls on the consumer are allowed (except calling the destructor). + void close(consumer*, error) + }; + +The first argument is always the ``this`` pointer of the consumer. This enables +the backend to multiplex more than one consumer at a time. The member function +``send`` always implicitly transmits control messages to the single producer. +The type ``Payload`` is a template parameter of ``channel`` and denotes the +content of ``event`` messages of the producer. Finally, ``T`` is one of the +following message types: + ++--------------------+----------------------------------------------------+ +| Type | Semantics | ++====================+====================================================+ +| ``cumulative_ack`` | Notifies the producer which events were processed. | ++--------------------+----------------------------------------------------+ +| ``nack`` | Notifies the producer that events got lost. | ++--------------------+----------------------------------------------------+ + +Consumers send ``cumulative_ack`` messages periodically, even if no messages +were received. This enables the producer to keep track of which consumers are +still alive and reachable. + +Channels in Data Store Actors +----------------------------- + +In general, the master actor broadcasts state updates to its clones. This maps +directly to the one-producer-many-consumers model of ``channel``. However, +clones can also take the role a producer when forwarding mutating operations to +the master. + +In a nutshell, the master actor (see ``master_actor.hh``) always has a producer +attached to it and any number of consumers: + +.. code-block:: cpp + + using producer_type = channel_type::producer; + + using consumer_type = channel_type::consumer; + + producer_type output; + + std::unordered_map inputs; + +Conversely, the clone actor (see ``clone_actor.hh``) always has a consumer +attached to it and it *may* have a producer: + +.. code-block:: cpp + + using consumer_type = channel_type::consumer; + + using producer_type = channel_type::producer; + + consumer_type input; + + std::unique_ptr output_ptr; + +Clones initialize the field ``output_ptr`` lazily on the first mutating +operation they need to forward to the master. + +Mapping Channel to Command Messages +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The message types defined in ``channel`` are never used for actor-to-actor +communication directly. Instead, masters and clones exchange ``command_message`` +objects, which consist of a ``topic`` and an ``internal_command`` (the +``Payload`` type for the channels). Masters and clones convert between Broker +message types and channel message types on the fly (using a surjective mapping). +The essential interface for ``internal_command`` is defined as follows: + +.. code-block:: cpp + + enum class command_tag { + action, + producer_control, + consumer_control, + }; + + class internal_command { + public: + // ... + using variant_type + = caf::variant; + + detail::sequence_number_type seq; + + entity_id sender; + + variant_type content; + }; + + command_tag tag_of(const internal_command& cmd); + +Furthermore, data store actors define ``channel_type`` as +``channel``. When processing an +``internal_command``, the receiver first looks at the tag. + +Control messages directly map to channel messages: + ++-----------------------------------+-----------------------------------+ +| Internal Command Type | Channel Message Type | ++===================================+===================================+ +| ``attach_writer_command`` | ``channel::handshake`` | ++-----------------------------------+-----------------------------------+ +| ``ack_clone_command`` | ``channel::handshake`` | ++-----------------------------------+-----------------------------------+ +| ``cumulative_ack_command`` | ``channel::cumulative_ack`` | ++-----------------------------------+-----------------------------------+ +| ``nack_command`` | ``channel::nack`` | ++-----------------------------------+-----------------------------------+ +| ``keepalive_command`` | ``channel::heartbeat`` | ++-----------------------------------+-----------------------------------+ +| ``retransmit_failed_command`` | ``channel::retransmit_failed`` | ++-----------------------------------+-----------------------------------+ + +Note that ``attach_clone_command`` does *not* map to any channel message type. +This message is the discovery message used by clones to find the master. When +receiving it, the master initiates the handshake on the channel by sending +``ack_clone_command`` (which contains a snapshot of the state and is thus *not* +broadcasted). + +When a clone adds a writer, it already knows the master and thus skips the +discovery phase by directly sending the ``attach_writer_command`` handshake. + +All internal commands that contain an *action*, +such as ``put_comand``, get forwarded to the channel as payload. Either by +calling ``produce`` on a ``producer`` or by calling ``handle_event`` on a +consumer. The latter then calls ``consume`` on the data store actor with the +``internal_command`` messages in the order defined by the sequence number. + +Cluster Setup and Testing +------------------------- + +Peering, path discovery, subscription propagation, etc. takes some unspecified +amount of time when setting up a cluster. If a single manager is responsible for +this setup, the work flow usually relies on some feedback to the manager to +signal when the cluster is fully connected and ready to use. The same applies +when writing high-level integration tests. + +In order to wait for two nodes to add each other their routing tables and +exchange subscriptions, the class ``endpoint`` provides the member function +``await_peer``: + +.. literalinclude:: ../include/broker/endpoint.hh + :language: cpp + :start-after: --await-peer-start + :end-before: --await-peer-end + +The first overload blocks the caller, until a timeout (or error) occurs or the +awaited peer has connected. The second overload is an asynchronous version that +takes a callback instead. On success, the endpoint calls the callback with +``true`` and otherwise it calls the callback with ``false``. + +To retrieve the ``entity_id`` from an ``endpoint`` object, simply call +``node_id()``. For example, if both endpoints belong to the same process: + +.. code-block:: cpp + + endpoint ep0; + endpoint ep1; + // ... call listen and peer ... + ep0.await_peer(ep1.node_id()); + ep1.await_peer(ep0.node_id()); + +Note that ``ep0.await_peer(...)`` only confirms that ``ep0`` has a path to the +other endpoint and received a list of subscribed topics. To confirm a mutual +relation, always call ``await_peer`` on both endpoints. + +The Python bindings also expose the blocking overload of ``await_peer``. For +example, connecting three endpoints with data stores attached to them in a unit +test can follow this recipe: + +.. literalinclude:: ../tests/python/store.py + :language: python + :start-after: --tri-setup-start + :end-before: --tri-setup-end + +.. note:: + + When setting up a cluster, make sure to add subscribers (and data stores) + *before* establishing the peering relations. Otherwise, the subscriptions get + flooded after all connections have been established. This means any + broadcasted event that arrives before the subscriptions gets lost. + +Data Stores +~~~~~~~~~~~ + +When working with data stores, the member function ``store::await_idle`` allows +establishing a predefined order: + +.. literalinclude:: ../include/broker/store.hh + :language: cpp + :start-after: --await-idle-start + :end-before: --await-idle-end + +What *idle* means depends on the role: + +For a *master*, idle means the following: + - There are no pending handshakes to clones. + - All clones have ACKed the latest command. + - All input buffers are empty, i.e., there exists no buffered command from a + writer. + +For a *clone*, idle means the following: + - The clone successfully connected to the master. + - The input buffer is empty, i.e., there exists no buffered command from the + master. + - All local writes (if any) have been ACKed by the master. + +Just like ``await_peer``, calling ``await_idle`` on only one ``store`` object +usually does not guarantee the desired state. For example, consider a setup with +one master (``m``) and three clones (``c0``, ``c1``, and ``c2``). When calling +``put`` on ``c0``, ``await_idle`` would return after ``m`` has ACKed that it +received the ``put`` command. At this point, ``c1`` and ``c2`` might not yet +have seen the command. Hence, the process must also call ``await_idle`` on the +master before it make the assumption that all data stores are in sync: + +.. code-block:: cpp + + c0.put("foo", "bar"); + if (!c0.await_idle()) { + // ... handle timeout ... + } + if (!m.await_idle()) { + // ... handle timeout ... + } + +.. note:: + + In the example above, calling ``await_idle`` on ``c1`` and ``c2`` as well is + *not* necessary. The master enters the *idle* mode after all clones have ACKed + the latest command. + +.. _actor system: https://actor-framework.readthedocs.io/en/stable/Actors.html#environment-actor-systems +.. |alm::stream_transport| replace:: ``alm::stream_transport`` +.. |alm::peer| replace:: ``alm::peer`` +.. _stream manager: http://actor-framework.org/doc/classcaf_1_1stream__manager.html +.. _broker-cluster-benchmark: https://github.com/zeek/broker/tree/master/tests/benchmark#clustering-broker-cluster-benchmark diff --git a/doc/glossary.rst b/doc/glossary.rst new file mode 100644 index 00000000..162a7f0f --- /dev/null +++ b/doc/glossary.rst @@ -0,0 +1,22 @@ +Glossary +======== + +.. glossary:: + + Message + + A message consists of a ``broker::topic`` and a ``broker::data``. Broker + stores messages as copy-on-write tuples (``broker::data_message``). This + allows Broker to pass messages to many receivers without having to copy the + content for each subscriber. + + Filter + + Each endpoint (see :ref:`endpoint`) controls the amount of data it receives + from others by providing a list of topic prefixes. Whenever an endpoint + publishes data, this list (the filter) is used to determine which peering + endpoint should receive the data. For example, if the endpoints A and B have + a peering relationship and B has announced the filter ``[/zeek/events/123/, + /zeek/events/234/]`` then A would forward messages for the topic + ``/zeek/events/123/foo`` to B, while not forwarding messages for the topic + ``/zeek/events/456/foo``. diff --git a/doc/index.rst b/doc/index.rst index 183fd329..4d02e23c 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -26,12 +26,17 @@ abstraction operating with the complete :ref:`data model store *frontend*, which is either an authoritative *master* or a *clone* replica. The master can choose to keep its data in various *backends*, currently either in-memory, or persistently through -`SQLite `_. +`SQLite `_, or `RocksDB +`_. :ref:`python` discusses the Broker's Python bindings, which transparently expose all of the library's functionality to Python scripts. +Finally, :ref:`devs` dives deep into the architecture and implementation of +Broker. This Section is meant for guiding C++ software developers that wish to +contribute to Broker. + Synopsis -------- @@ -46,3 +51,5 @@ Synopsis data stores python + devs + glossary diff --git a/doc/overview.rst b/doc/overview.rst index c1c6d9ca..6b38cb93 100644 --- a/doc/overview.rst +++ b/doc/overview.rst @@ -104,7 +104,8 @@ figure below illustrates how one can deploy a master with several clones. Each data store has a name that identifies the master. This name must be unique among the endpoint's peers. The master can choose to keep its data in various -backends, which are currently: in-memory and `SQLite `_. +backends, which are currently: in-memory, `SQLite `_, and `RocksDB +`_. :ref:`data-stores` illustrates how to use data stores in different settings. @@ -122,7 +123,7 @@ Broker). In order to get a high-level view of what Broker is doing internally, we recommend setting: -.. code-block:: none +:: BROKER_CONSOLE_VERBOSITY=info @@ -135,7 +136,7 @@ Troubleshooting a Broker application (or Zeek_ scripts that communicate over Broker) sometimes requires tapping into the exchanged messages directly. Setting the verbosity to debug instead will provide such details: -.. code-block:: none +:: BROKER_CONSOLE_VERBOSITY=debug @@ -158,7 +159,7 @@ configuring Broker. A minimal configuration file that sets console and file verbosity looks like this: -.. code-block:: none +:: logger { ; note the single quotes! diff --git a/doc/python.rst b/doc/python.rst index ac967dda..c655ace8 100644 --- a/doc/python.rst +++ b/doc/python.rst @@ -148,28 +148,6 @@ indexed by vectors, tables are mapped to Python dictionaries, Python dictionaries only allow immutable index types, and so we must use a ``tuple`` to represent a ``vector``. -Most types should map to `Zeek Types -`_ intuitively, -but to explain how to represent `Zeek records -`_: -use a Python ``tuple`` with uninitialized ``&optional`` record fields being a -``None`` value. For example, this Zeek record type: - -.. code-block:: zeek - - type MyRec: record { - n: count; - o: addr &optional; - s: string; - t: table[int] of string; - }; - -Might be represented by this Python value: - -.. code-block:: python - - (broker.Count(42), None, "hi", {1: "one", 2: "two"}) - Status and Error Messages ------------------------- diff --git a/doc/stores.rst b/doc/stores.rst index 995be366..d21d2d90 100644 --- a/doc/stores.rst +++ b/doc/stores.rst @@ -6,7 +6,7 @@ Data Stores In addition to transmitting :ref:`data ` via publish/subscribe communication, Broker also offers a mechanism to store this very data. Data stores provide a distributed key-value interface that leverages the -existing :ref:`peer communication channels `. +existing :ref:`peer communication `. Aspects ------- @@ -21,10 +21,10 @@ Users interact with a data store through the frontend, which is either a *master* or a *clone*. A master is authoritative for the store, whereas a clone represents a local cache that is connected to the master. A clone cannot exist without a master. Only the master can perform mutating operations on the store, -which it pushes out to all its clones. A clone has a full copy of the data for -faster access, but sends any modifying operations to its master first. Only -when the master propagates back the change, the result of the operation becomes -visible at the clone. +which it pushes out to all its clones. A clone has a full copy of the data in +memory for faster access, but sends any modifying operations to its master +first. Only when the master propagates back the change, the result of the +operation becomes visible at the clone. It is possible to attach one or more data stores to an endpoint, but each store must have a unique master name. For example, two peers cannot both have a @@ -54,6 +54,11 @@ The master can choose to keep its data in various backends: SQLite3 format on disk. While offering persistence, it does not scale well to large volumes. +3. `RocksDB `_. This backend relies on an + industrial-strength, high-performance database with a variety of tuning + knobs. If your application requires persistence and also needs to scale, + this backend is your best choice. + Operations ---------- @@ -79,7 +84,7 @@ The factory function ``endpoint::attach_master`` has the following signature: The function takes as first argument the global name of the store, as second argument the type of store -(``broker::{memory,sqlite}``), and as third argument +(``broker::backend::{memory,sqlite,rocksdb}``), and as third argument optionally a set of backend options, such as the path where to keep the backend on the filesystem. The function returns a ``expected`` which encapsulates a type-erased reference to the @@ -87,8 +92,8 @@ data store. .. note:: - The type ``expected`` encapsulates an instance of type ``T`` or a - ``status``, with an interface that has "pointer semantics" for syntactic + The type ``expected`` encapsulates an instance of type ``T`` or an + ``error``, with an interface that has "pointer semantics" for syntactic convenience: .. code-block:: cpp @@ -101,7 +106,7 @@ data store. else std::cout << to_string(x.error()) << std::endl; -In the failure case, the ``expected::error()`` holds an ``error``. +In the failure case, the ``expected::error()`` returns the ``error``. Modification ~~~~~~~~~~~~ @@ -227,3 +232,33 @@ ID that is hauled through the response: The proxy provides the same set of retrieval methods as the direct interface, with all of them returning the corresponding ID to retrieve the result once it has come in. + + +Intra-store Communication +------------------------- + +Broker uses two reserved topics to model communication between masters and +clones: *M* and *C*. Masters subscribe to *M* when attached to an endpoint and +publish to *C* to broadcast state transitions to its clones. Clones subscribe to +*C* when attached to an endpoint and publish to *M* for propagating mutating +operations to the master. + +These topics also enable clones to find the master without knowing which +endpoint it was attached to. When starting a clone, it periodically publishes a +handshake to *M* until the master responds. This rendezvous process also makes +it easier to setup cluster instances, because users can attach clones while +connection to the master has not been established yet. + +The publish/subscribe layer of Broker generally targets loosely coupled +deployments and does neither ensure ordering nor guarantee delivery. Without +these two properties, masters and clones could quickly run out of sync when +messages arrive out of order or get lost. Hence, masters and clones use "virtual +channels" to implement reliable and ordered communication on top of the +publish/subscribe layer. The master includes a sequence number when publishing +to *C* in order to enable clones to detect out-of-order delivery and ask the +master to retransmit lost messages. Cumulative ACKs and per-clone state allow +the master to delete transmitted messages as well as to detect unresponsive +clones using timeouts. + +For the low-level details of the channel abstraction, see the +:ref:`channels section in the developer guide `. diff --git a/include/broker/address.hh b/include/broker/address.hh index 9e7224c6..84c6f92d 100644 --- a/include/broker/address.hh +++ b/include/broker/address.hh @@ -73,6 +73,8 @@ public: size_t hash() const; + // -- inspection support ----------------------------------------------------- + template friend bool inspect(Inspector& f, address& x) { // We transparently expose the member variable. Hence, broker::address and @@ -80,6 +82,8 @@ public: return inspect(f, x.addr_); } + // -- conversion support ----------------------------------------------------- + friend bool convert(const address& a, std::string& str) { str = to_string(a.addr_); return true; @@ -95,12 +99,6 @@ private: caf::ip_address addr_; }; -/// @relates address -bool convert(const std::string& str, address& a); - -/// @relates address -bool convert(const address& a, std::string& str); - } // namespace broker namespace std { diff --git a/include/broker/alm/async_transport.hh b/include/broker/alm/async_transport.hh new file mode 100644 index 00000000..e543d740 --- /dev/null +++ b/include/broker/alm/async_transport.hh @@ -0,0 +1,99 @@ +#pragma once + +#include +#include +#include +#include + +#include "broker/alm/peer.hh" +#include "broker/alm/routing_table.hh" +#include "broker/detail/lift.hh" +#include "broker/filter_type.hh" +#include "broker/message.hh" + +namespace broker::alm { + +/// A transport based on asynchronous messages. For testing only. +class async_transport : public peer { +public: + using super = peer; + + using peer_id_type = PeerId; + + void start_peering(const peer_id_type& remote_peer, caf::actor hdl) { + BROKER_TRACE(BROKER_ARG(remote_peer) << BROKER_ARG(hdl)); + auto& d = dref(); + if (!d.tbl().emplace(std::move(remote_peer), std::move(hdl)).second) { + BROKER_INFO("start_peering ignored: already peering with " + << remote_peer); + return; + } + send(hdl, atom::peer_v, d.id(), d.filter(), d.timestamp()); + } + + /// Starts the handshake process for a new peering (step #1 in core_actor.cc). + /// @param peer_hdl Handle to the peering (remote) core actor. + /// @param peer_filter Filter of our peer. + /// @param send_own_filter Sends a `(filter, self)` handshake if `true`, + /// `('ok', self)` otherwise. + /// @pre `current_sender() != nullptr` + auto handle_peering(const peer_id_type& remote_id, const filter_type& filter, + lamport_timestamp timestamp) { + BROKER_TRACE(BROKER_ARG(remote_id)); + // Check whether we already send outbound traffic to the peer. Could use + // `BROKER_ASSERT` instead, because this mustn't get called for known peers. + auto& d = dref(); + auto src = caf::actor_cast(d.self()->current_sender()); + if (!d.tbl().emplace(remote_id, src).second) + BROKER_INFO("received repeated peering request"); + // Propagate filter to peers. + std::vector path{remote_id}; + vector_timestamp path_ts{timestamp}; + d.handle_filter_update(path, path_ts, filter); + // Reply with our own filter. + return caf::make_message(atom::peer_v, atom::ok_v, d.id(), d.filter(), + d.timestamp()); + } + + auto handle_peering_response(const peer_id_type& remote_id, + const filter_type& filter, + lamport_timestamp timestamp) { + auto& d = dref(); + auto src = caf::actor_cast(d.self()->current_sender()); + if (!d.tbl().emplace(remote_id, src).second) + BROKER_INFO("received repeated peering response"); + // Propagate filter to peers. + std::vector path{remote_id}; + vector_timestamp path_ts{timestamp}; + d.handle_filter_update(path, path_ts, filter); + } + + template + void send(const caf::actor& receiver, Ts&&... xs) { + dref().self()->send(receiver, std::forward(xs)...); + } + + template + caf::behavior make_behavior(Fs... fs) { + using detail::lift; + auto& d = dref(); + return { + std::move(fs)..., + lift(d, &Derived::start_peering), + lift(d, &Derived::handle_peering), + lift(d, &Derived::handle_peering_response), + lift(d, &Derived::publish_data), + lift(d, &Derived::publish_command), + lift(d, &Derived::subscribe), + lift(d, &Derived::handle_publication), + lift(d, &Derived::handle_filter_update), + }; + } + +private: + auto& dref() { + return static_cast(*this); + } +}; + +} // namespace broker::alm diff --git a/include/broker/alm/lamport_timestamp.hh b/include/broker/alm/lamport_timestamp.hh new file mode 100644 index 00000000..e008bbc2 --- /dev/null +++ b/include/broker/alm/lamport_timestamp.hh @@ -0,0 +1,75 @@ +#pragma once + +#include +#include + +namespace broker::alm { + +/// A logical clock using a 64-bit counter. +struct lamport_timestamp { + uint64_t value = 1; + + lamport_timestamp& operator++() { + ++value; + return *this; + } +}; + +/// @relates lamport_timestamp +constexpr bool operator<(lamport_timestamp x, lamport_timestamp y) { + return x.value < y.value; +} + +/// @relates lamport_timestamp +constexpr bool operator<=(lamport_timestamp x, lamport_timestamp y) { + return x.value <= y.value; +} + +/// @relates lamport_timestamp +constexpr bool operator>(lamport_timestamp x, lamport_timestamp y) { + return x.value > y.value; +} + +/// @relates lamport_timestamp +constexpr bool operator>=(lamport_timestamp x, lamport_timestamp y) { + return x.value >= y.value; +} + +/// @relates lamport_timestamp +constexpr bool operator==(lamport_timestamp x, lamport_timestamp y) { + return x.value == y.value; +} + +/// @relates lamport_timestamp +constexpr bool operator!=(lamport_timestamp x, lamport_timestamp y) { + return x.value != y.value; +} + +/// @relates lamport_timestamp +constexpr lamport_timestamp operator+(lamport_timestamp x, uint64_t y) { + return lamport_timestamp{x.value + y}; +} + +/// @relates lamport_timestamp +constexpr lamport_timestamp operator+(uint64_t x, lamport_timestamp y) { + return lamport_timestamp{x + y.value}; +} + +/// @relates lamport_timestamp +template +typename Inspector::result_type inspect(Inspector& f, lamport_timestamp& x) { + return f.apply(x.value); +} + +/// @relates lamport_timestamp +using vector_timestamp = std::vector; + +} // namespace broker::alm + +namespace broker::literals { + +constexpr auto operator""_lt(unsigned long long value) noexcept { + return alm::lamport_timestamp{static_cast(value)}; +} + +} // namespace broker::literals diff --git a/include/broker/alm/multipath.hh b/include/broker/alm/multipath.hh new file mode 100644 index 00000000..7c0200fa --- /dev/null +++ b/include/broker/alm/multipath.hh @@ -0,0 +1,411 @@ +#pragma once + +#include +#include +#include +#include + +#include "caf/meta/omittable_if_empty.hpp" +#include "caf/meta/type_name.hpp" +#include "caf/node_id.hpp" +#include "caf/sec.hpp" + +#include "broker/detail/assert.hh" +#include "broker/detail/monotonic_buffer_resource.hh" +#include "broker/fwd.hh" + +namespace broker::alm { + +struct multipath_tree { + explicit multipath_tree(endpoint_id id); + ~multipath_tree(); + multipath_node* root; + detail::monotonic_buffer_resource mem; +}; + +template +class node_iterator { +public: + using iterator_category = std::forward_iterator_tag; + + using difference_type = ptrdiff_t; + + using value_type = T; + + using pointer = value_type*; + + using reference = value_type&; + + explicit node_iterator(pointer ptr) noexcept : ptr_(ptr) { + // nop + } + + node_iterator(const node_iterator&) noexcept = default; + + node_iterator& operator=(const node_iterator&) noexcept = default; + + node_iterator operator++(int) { + node_iterator cpy{ptr_}; + ptr_ = ptr_->right_; + return cpy; + } + + node_iterator& operator++() { + ptr_ = ptr_->right_; + return *this; + } + + reference operator*() { + return *ptr_; + } + + pointer operator->() { + return ptr_; + } + + pointer get() { + return ptr_; + } + +private: + pointer ptr_; +}; + +template +auto operator==(node_iterator x, node_iterator y) + -> decltype(x.get() == y.get()) { + return x.get() == y.get(); +} + +template +auto operator!=(node_iterator x, node_iterator y) + -> decltype(x.get() != y.get()) { + return x.get() != y.get(); +} + +class multipath_group { +public: + friend class multipath; + friend class multipath_node; + + using iterator = node_iterator; + + using const_iterator = node_iterator; + + multipath_group() noexcept = default; + + multipath_group(const multipath_group&) = delete; + + multipath_group& operator=(const multipath_group&) = delete; + + ~multipath_group(); + + size_t size() const noexcept { + return size_; + } + + bool empty() const noexcept { + return size_ == 0; + } + + iterator begin() noexcept { + return iterator{first_}; + } + + const_iterator begin() const noexcept { + return const_iterator{first_}; + } + + iterator end() noexcept { + return iterator{nullptr}; + } + + const_iterator end() const noexcept { + return const_iterator{nullptr}; + } + + bool equals(const multipath_group& other) const noexcept; + + bool contains(const endpoint_id& id) const noexcept; + + std::pair + emplace(detail::monotonic_buffer_resource& mem, const endpoint_id& id); + + bool emplace(multipath_node* node); + +private: + template + std::pair emplace_impl(const endpoint_id& id, + MakeNewNode make_new_node); + + void shallow_delete() noexcept; + + size_t size_ = 0; + multipath_node* first_ = nullptr; +}; + + +class multipath_node { +public: + friend class multipath; + friend class multipath_group; + friend class node_iterator; + friend class node_iterator; + friend struct multipath_tree; + + explicit multipath_node(const endpoint_id& id) noexcept : id_(id) { + // nop + } + + multipath_node() = delete; + + multipath_node(const multipath_node&) = delete; + + multipath_node& operator=(const multipath_node&) = delete; + + ~multipath_node(); + + auto& head() noexcept { + return *this; + } + + const auto& head() const noexcept { + return *this; + } + + const auto& id() const noexcept { + return id_; + } + + bool is_receiver() const noexcept { + return is_receiver_; + } + + auto& nodes() noexcept { + return down_; + } + + const auto& nodes() const noexcept { + return down_; + } + + bool equals(const multipath_node& other) const noexcept; + + bool contains(const endpoint_id& id) const noexcept; + + void stringify(std::string& buf) const; + +private: + template + bool save_children(Inspector& f) { + if (f.begin_sequence(down_.size())) + for (auto& child : down_) + if (!child.save(f)) + return false; + return f.end_sequence(); + } + + template + bool save(Inspector& f) { + // We are lying to the inspector about the type, because multipath_node and + // multipath_group are internal implementation details. + return f.begin_object(caf::type_id_v, + caf::type_name_v) + && f.begin_field("id") // + && f.apply(id_) // + && f.end_field() // + && f.begin_field("is_receiver") // + && f.apply(is_receiver_) // + && f.end_field() // + && f.begin_field("nodes") // + && save_children(f) // [...] + && f.end_field() // + && f.end_object(); + } + + template + bool load_children(detail::monotonic_buffer_resource& mem, Inspector& f) { + size_t n = 0; + if (f.begin_sequence(n)) { + for (size_t i = 0; i < n; ++i) { + auto child = detail::new_instance(mem, endpoint_id{}); + if (!child->load(mem, f)) { + child->shallow_delete(); + return false; + } + if (!down_.emplace(child)) { + child->shallow_delete(); + f.emplace_error(caf::sec::field_invariant_check_failed, + "a multipath may not contain duplicates"); + return false; + } + } + } + return f.end_sequence(); + } + + template + bool load(detail::monotonic_buffer_resource& mem, Inspector& f) { + return f.begin_object(caf::type_id_v, + caf::type_name_v) + && f.begin_field("id") // + && f.apply(id_) // + && f.end_field() // + && f.begin_field("is_receiver") // + && f.apply(is_receiver_) // + && f.end_field() // + && f.begin_field("nodes") // + && load_children(mem, f) // [...] + && f.end_field() // + && f.end_object(); + } + + void shallow_delete() noexcept; + + template + void splice_cont(detail::monotonic_buffer_resource& mem, Iterator first, + Sentinel last) { + if (first != last) { + auto child = down_.emplace(mem, *first).first; + child->splice_cont(mem, ++first, last); + } + } + + endpoint_id id_; + bool is_receiver_ = false; + multipath_node* right_ = nullptr; + multipath_group down_; +}; + +/// A recursive data structure for encoding branching paths for source routing. +/// For example: +/// +/// ~~~ +/// A ────> B ─┬──> C +/// └──> D ────> E +/// ~~~ +/// +/// In this topology, the sender A sends a message to B that B then has to +/// forward to C and D. After that, C is the final destination on that branch, +/// but D has to forward the message also to E. +class multipath { +public: + using tree_ptr = std::shared_ptr; + + multipath(); + + explicit multipath(const endpoint_id& id); + + /// Constructs a multipath from the linear path `[first, last)`. + /// @pre `first != last` + template + explicit multipath(Iterator first, Sentinel last) : multipath() { + if (first != last) { + head_->id_ = *first; + auto pos = head_; + for (++first; first != last; ++first) + pos = pos->down_.emplace(tree_->mem, *first).first; + pos->is_receiver_ = true; + } + } + + multipath(const tree_ptr&, multipath_node*); + + explicit multipath(const tree_ptr& tptr) : multipath(tptr, tptr->root) { + // nop + } + + multipath(multipath&& other) noexcept = default; + + multipath(const multipath& other) = default; + + multipath& operator=(multipath&& other) noexcept = default; + + multipath& operator=(const multipath& other) = default; + + const auto& head() const noexcept { + return *head_; + } + + bool equals(const multipath& other) const noexcept { + return head_->equals(*other.head_); + } + + bool contains(const endpoint_id& id) const noexcept { + return head_->contains(id); + } + + size_t num_nodes() const noexcept { + return head_->down_.size(); + } + + template + void for_each_node(F fun) const { + for (auto i = head_->down_.begin(); i != head_->down_.end(); ++i) + fun(multipath{tree_, i.get()}); + } + + template + friend bool inspect(Inspector& f, multipath& x) { + if constexpr (Inspector::is_loading) + return x.load(f); + else + return x.save(f); + } + + friend std::string to_string(const alm::multipath& x) { + std::string result; + x.head_->stringify(result); + return result; + } + + /// Fills the `routes` list such that all reachable receivers are included. + /// @param receivers List of nodes that should receive a certain message. + /// @param tbl The routing table for shorest path lookups. + /// @param routes Stores the source routing paths. + /// @param unreachables Stores receivers without routing table entries. + static void generate(const std::vector& receivers, + const routing_table& tbl, std::vector& routes, + std::vector& unreachables); + +private: + std::pair emplace(const endpoint_id& id) { + return head_->down_.emplace(tree_->mem, id); + } + + template + bool load(Inspector& f) { + return head_->load(tree_->mem, f); + } + + template + bool save(Inspector& f) { + return head_->save(f); + } + + // Splices a linear path into this multipath. The last entry in the path is + // automatically tagged as a receiver. + // @pre `!path.empty() && path[0] == id()` + void splice(const std::vector& path); + + /// Provides storage for children. + std::shared_ptr tree_; + + /// Stores information for this object. + multipath_node* head_; +}; + +/// @relates multipath +inline bool operator==(const multipath& x, const multipath& y) { + return x.equals(y); +} + +/// @relates multipath +inline bool operator!=(const multipath& x, const multipath& y) { + return !(x == y); +} + +/// @relates multipath +std::string to_string(const alm::multipath& x); + +} // namespace broker::alm diff --git a/include/broker/alm/peer.hh b/include/broker/alm/peer.hh new file mode 100644 index 00000000..7aa24d79 --- /dev/null +++ b/include/broker/alm/peer.hh @@ -0,0 +1,394 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include "broker/alm/multipath.hh" +#include "broker/alm/routing_table.hh" +#include "broker/atoms.hh" +#include "broker/defaults.hh" +#include "broker/detail/assert.hh" +#include "broker/detail/central_dispatcher.hh" +#include "broker/detail/lift.hh" +#include "broker/detail/prefix_matcher.hh" +#include "broker/error.hh" +#include "broker/filter_type.hh" +#include "broker/logger.hh" +#include "broker/message.hh" +#include "broker/network_info.hh" +#include "broker/shutdown_options.hh" + +namespace broker::alm { + +/// Base class that represents a Broker peer in the network. This class +/// implements subscription and path management for the overlay. Member +/// functions for data transport as well as shipping data to local subscribers +/// remain pure virtual. +/// +/// The derived class *must* provide the following interface: +/// +/// ~~~ +/// class Derived { +/// template +/// void send(const caf::actor& receiver, Ts&&... xs); +/// +/// const PeerId& id() const noexcept; +/// }; +/// ~~~ +/// +/// The derived class *can* extend any of the callback member functions by +/// hiding the implementation of ::peer. +/// +/// The peer registers these message handlers: +/// +/// ~~~ +/// (atom::get, atom::id) -> endpoint_id +/// => id() +/// +/// (atom::publish, data_message msg) -> void +/// => publish_data(msg) +/// +/// (atom::publish, command_message msg) -> void +/// => publish_command(msg) +/// +/// (atom::subscribe, filter_type filter) -> void +/// => subscribe(filter) +/// +/// (atom::publish, node_message msg) -> void +/// => handle_publication(msg) +/// +/// (atom::subscribe, endpoint_id_list path, filter_type filter, lamport_timestamp) +/// -> void +/// => handle_filter_update(path, filter, t) +/// +/// (atom::revoke, revoker, ts, hop) -> void +/// => handle_path_revocation(revoker, ts, hop) +/// +/// (atom::await, PeerId) -> void +/// => await_endpoint() +/// ~~~ +class peer : public detail::central_dispatcher { +public: + // -- constants -------------------------------------------------------------- + + /// Configures how many (additional) items the stream transport caches for + /// published events that bypass streaming. + static constexpr size_t item_stash_replenish_size = 32; + + /// Checks whether move-assigning an ID to the peer results in a `noexcept` + /// operation. + static constexpr bool has_nothrow_assignable_id + = std::is_nothrow_move_assignable::value; + + // -- nested types ----------------------------------------------------------- + + struct revocations_type { + alm::revocations entries; + + caf::timespan aging_interval; + + caf::timespan max_age; + + caf::actor_clock::time_point next_aging_cycle; + }; + + // -- constructors, destructors, and assignment operators -------------------- + + explicit peer(caf::event_based_actor* selfptr); + + peer() = delete; + + peer(const peer&) = delete; + + peer& operator=(const peer&) = delete; + + virtual ~peer(); + + // -- properties ------------------------------------------------------------- + + caf::event_based_actor* self() noexcept { + return self_; + } + + const endpoint_id& id() const noexcept { + return id_; + } + + void id(endpoint_id new_id) noexcept(has_nothrow_assignable_id) { + id_ = std::move(new_id); + } + + auto& tbl() noexcept { + return tbl_; + } + + const auto& tbl() const noexcept { + return tbl_; + } + + const auto& filter() const noexcept { + return filter_; + } + + const auto& peer_filters() const noexcept { + return peer_filters_; + } + + auto peer_filter(const endpoint_id& x) const { + auto i = peer_filters_.find(x); + if (i != peer_filters_.end()) + return i->second; + return filter_type{}; + } + + auto timestamp() const noexcept { + return timestamp_; + } + + auto peer_handles() const { + std::vector result; + for (auto& kvp : tbl_) + result.emplace_back(kvp.second.hdl); + return result; + } + + auto peer_ids() const { + endpoint_id_list result; + for (auto& kvp : tbl_) + result.emplace_back(kvp.first); + return result; + } + + auto& revocations() const noexcept { + return revocations_; + } + + bool disable_forwarding() const noexcept { + return disable_forwarding_; + } + + void disable_forwarding(bool value) noexcept { + disable_forwarding_ = value; + } + + // -- convenience functions for subscription information --------------------- + + bool has_remote_subscriber(const topic& x) const noexcept; + + static bool contains(const endpoint_id_list& ids, const endpoint_id& id); + + // -- topic management ------------------------------------------------------- + + virtual void subscribe(const filter_type& what); + + // -- detail::central_dispatcher overrides ----------------------------------- + + caf::event_based_actor* this_actor() noexcept final; + + endpoint_id this_endpoint() const final; + + filter_type local_filter() const final; + + alm::lamport_timestamp local_timestamp() const noexcept final; + + // -- additional dispatch overloads ------------------------------------------ + + /// @private + template + bool dispatch_to_impl(T&& msg, endpoint_id&& receiver); + + /// Dispatches `msg` to `receiver`, ignoring subscription filters. + /// @returns `true` on success, `false` if no path to the receiver exists. + bool dispatch_to(data_message msg, endpoint_id receiver); + + /// Dispatches `msg` to `receiver`, ignoring subscription filters. + /// @returns `true` on success, `false` if no path to the receiver exists. + bool dispatch_to(command_message msg, endpoint_id receiver); + + // -- flooding --------------------------------------------------------------- + + /// Floods the subscriptions on this peer to all other peers. + /// @note The functions *does not* bump the Lamport timestamp before sending. + void flood_subscriptions(); + + /// Floods a path revocation to all other peers. + /// @note The functions does *not* bump the Lamport timestamp before sending. + void flood_path_revocation(const endpoint_id& lost_peer); + + /// Checks whether a path and its associated vector timestamp are non-empty, + /// loop-free and not revoked. + bool valid(endpoint_id_list& path, vector_timestamp path_ts); + + /// Removes all entries from the revocations that exceeded their maximum age. + /// The purpose of this function is to clean up state periodically to avoid + /// unbound growth of the revocations. Calling it at fixed intervals is not + /// required. Triggering aging on peer messages suffices, since only peer + /// messages can grow the revocations in the first place. + void age_revocations(); + + /// Adds the reverse `path` to the routing table and stores the subscription + /// if it is new. + /// @returns A pair containing a list of new peers learned through the update + /// and a boolean that is set to `true` if this update increased the + /// local time. + /// @note increases the local time if the routing table changes. + std::pair handle_update(endpoint_id_list& path, + vector_timestamp path_ts, + const filter_type& filter); + + virtual void handle_filter_update(endpoint_id_list& path, + vector_timestamp& path_ts, + const filter_type& filter); + + virtual void handle_path_revocation(endpoint_id_list& path, + vector_timestamp& path_ts, + const endpoint_id& revoked_hop, + const filter_type& filter); + + // -- interface to the transport --------------------------------------------- + + /// Publishes (floods) a subscription update to @p dst. + /// @param dst Destination (receiver) for the published data. + /// @param path Lists Broker peers that forwarded the subscription on the + /// overlay in chronological order. + /// @param ts Stores the logical time of each peer at the time of forwarding. + /// @param new_filter The new filter to apply to the origin of the update. + virtual void publish(const caf::actor& dst, atom::subscribe, + const endpoint_id_list& path, const vector_timestamp& ts, + const filter_type& new_filter) + = 0; + + /// Publishes (floods) a path revocation update to @p dst. + /// @param dst Destination (receiver) for the published data. + /// @param path Lists Broker peers that forwarded the subscription on the + /// overlay in chronological order. + /// @param ts Stores the logical time of each peer at the time of forwarding. + /// @param lost_peer ID of the affected peer, i.e., the origin of the update + /// lost its communication path to @p lost_peer. + /// @param new_filter The new filter to apply to the origin of the update. + virtual void publish(const caf::actor& dst, atom::revoke, + const endpoint_id_list& path, const vector_timestamp& ts, + const endpoint_id& lost_peer, + const filter_type& new_filter) + = 0; + + /// Publishes @p msg to all local subscribers. + /// @param msg The published data. + virtual void publish_locally(const data_message& msg) = 0; + + /// Publishes @p msg to all local subscribers. + /// @param msg The published command. + virtual void publish_locally(const command_message& msg) = 0; + + /// Publishes @p msg to all local subscribers. + /// @param msg The published message, either data or a command. + void publish_locally(const node_message_content& msg); + + // -- callbacks -------------------------------------------------------------- + + /// Called whenever this peer discovers a new peer in the network. + /// @param peer_id ID of the new peer. + /// @note The new peer gets stored in the routing table *before* calling this + /// member function. + virtual void peer_discovered(const endpoint_id& peer_id); + + /// Called whenever this peer established a new connection. + /// @param peer_id ID of the newly connected peer. + /// @param hdl Communication handle for exchanging messages with the new peer. + /// The handle is default-constructed if no direct connection + /// exists (yet). + /// @note The new peer gets stored in the routing table *before* calling this + /// member function. + virtual void peer_connected(const endpoint_id& peer_id, + const caf::actor& hdl); + + /// Called whenever this peer lost a connection to a remote peer. + /// @param peer_id ID of the disconnected peer. + /// @param hdl Communication handle of the disconnected peer. + /// @param reason None if we closed the connection gracefully, otherwise + /// contains the transport-specific error code. + virtual void peer_disconnected(const endpoint_id& peer_id, + const caf::actor& hdl, const error& reason); + + /// Called whenever this peer removed a direct connection to a remote peer. + /// @param peer_id ID of the removed peer. + /// @param hdl Communication handle of the removed peer. + virtual void peer_removed(const endpoint_id& peer_id, const caf::actor& hdl); + + /// Called after removing the last path to `peer_id` from the routing table. + /// @param peer_id ID of the (now unreachable) peer. + virtual void peer_unreachable(const endpoint_id& peer_id); + + /// Called whenever the user tried to unpeer from an unknown peer. + /// @param xs Either a peer ID, an actor handle or a network info. + virtual void cannot_remove_peer(const endpoint_id& x); + + /// Called whenever the user tried to unpeer from an unknown peer. + /// @param xs Either a peer ID, an actor handle or a network info. + virtual void cannot_remove_peer(const caf::actor& x); + + /// Called whenever the user tried to unpeer from an unknown peer. + /// @param xs Either a peer ID, an actor handle or a network info. + virtual void cannot_remove_peer(const network_info& x); + + /// Called whenever establishing a connection to a remote peer failed. + /// @param xs Either a peer ID or a network info. + virtual void peer_unavailable(const network_info& x); + + /// Called when the @ref endpoint signals system shutdown. + virtual void shutdown(shutdown_options options); + + // -- initialization --------------------------------------------------------- + + /// Creates the default behavior for the actor that remains valid until the + /// system is shutting down. + virtual caf::behavior make_behavior(); + +protected: + // -- implementation details ------------------------------------------------- + + void cleanup(const endpoint_id& peer_id, const caf::actor& hdl); + + // -- member variables ------------------------------------------------------- + + /// Points to the actor owning this object. + caf::event_based_actor* self_; + + /// Identifies this peer in the network. + endpoint_id id_; + + /// Stores routing information for reaching other peers. The *transport* adds + /// new entries to this table (before calling ::peer_connected) and the peer + /// removes entries in its ::peer_disconnected callback implementation. + routing_table tbl_; + + /// A logical timestamp. + lamport_timestamp timestamp_; + + /// Keeps track of the logical timestamps last seen from other peers. + std::unordered_map peer_timestamps_; + + /// Stores prefixes with subscribers on this peer. + filter_type filter_; + + /// Stores all filters from other peers. + std::unordered_map peer_filters_; + + /// Stores revoked paths. + revocations_type revocations_; + + /// Stores whether this peer disabled forwarding, i.e., only appears as leaf + /// node to other peers. + bool disable_forwarding_ = false; + + /// Stores IDs of peers that we have no path to yet but some local actor is + /// arleady waiting for. Usually for testing purposes. + std::multimap awaited_peers_; +}; + +} // namespace broker::alm diff --git a/include/broker/alm/routing_table.hh b/include/broker/alm/routing_table.hh new file mode 100644 index 00000000..e561eab6 --- /dev/null +++ b/include/broker/alm/routing_table.hh @@ -0,0 +1,388 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "broker/alm/lamport_timestamp.hh" +#include "broker/detail/algorithms.hh" +#include "broker/detail/assert.hh" +#include "broker/detail/iterator_range.hh" +#include "broker/detail/map_index_iterator.hh" +#include "broker/fwd.hh" +#include "broker/optional.hh" + +namespace broker::alm { + +/// Compares two paths by size, falling back to lexicographical comparison on +/// equal sizes. +struct path_less_t { + using path_type = std::vector; + + using versioned_path_type = std::pair; + + /// Returns `true` if X is shorter than Y or both paths have equal length but + /// X comes before Y lexicographically, `false` otherwise. + bool operator()(const path_type& x, const path_type& y) const noexcept { + if (x.size() < y.size()) + return true; + if (x.size() == y.size()) + return x < y; + return false; + } + + bool operator()(const path_type& x, + const versioned_path_type& y) const noexcept { + return (*this)(x, y.first); + } + + bool operator()(const versioned_path_type& x, + const path_type& y) const noexcept { + return (*this)(x.first, y); + } + + bool operator()(const versioned_path_type& x, + const versioned_path_type& y) const noexcept { + return (*this)(x.first, y.first); + } +}; + +constexpr auto path_less = path_less_t{}; + +/// Stores paths to all peers. For direct connection, also stores a +/// communication handle for reaching the peer. +class routing_table_row { +public: + /// Stores a linear path to another peer. + using path_type = std::vector; + + /// Stores a linear path to another peer with logical timestamps for when this + /// route was announced. + using versioned_path_type = std::pair; + + /// Stores an implementation-specific handle for talking to the peer. The + /// handle is null if no direct connection exists. + caf::actor hdl; + + /// Stores all paths leading to this peer, using a vector timestamp for + /// versioning (stores only the latest version). Sorted by path length. + std::vector versioned_paths; + + routing_table_row() = default; + routing_table_row(routing_table_row&&) = default; + routing_table_row(const routing_table_row&) = default; + routing_table_row& operator=(routing_table_row&&) = default; + routing_table_row& operator=(const routing_table_row&) = default; + + explicit routing_table_row(caf::actor hdl) : hdl(std::move(hdl)) { + versioned_paths.reserve(32); + } +}; + +template +bool inspect(Inspector& f, routing_table_row& x) { + return f.object(x).fields(f.field("hdl", x.hdl), + f.field("paths", x.versioned_paths)); +} + +/// Stores direct connections to peers as well as distances to all other peers +/// that we can reach indirectly. +using routing_table = std::unordered_map; + +/// Returns the ID of the peer if `hdl` is a direct connection, `nil` +/// otherwise. +optional get_peer_id(const routing_table& tbl, + const caf::actor& hdl); + +/// Returns all hops to the destination (including `dst` itself) or +/// `nullptr` if the destination is unreachable. +const std::vector* shortest_path(const routing_table& tbl, + const endpoint_id& peer); + +/// Checks whether the routing table `tbl` contains a path to the `peer`. +inline bool reachable(const routing_table& tbl, const endpoint_id& peer) { + return tbl.count(peer) != 0; +} + +/// Returns whether `tbl` contains a direct connection to `peer`. +inline bool is_direct_connection(const routing_table& tbl, + const endpoint_id& peer) { + if (auto i = tbl.find(peer); i != tbl.end()) + return static_cast(i->second.hdl); + return false; +} + +/// Returns the hop count on the shortest path or `nil` if no route to the peer +/// exists. +inline optional distance_to(const routing_table& tbl, + const endpoint_id& peer) { + if (auto ptr = shortest_path(tbl, peer)) + return ptr->size(); + else + return nil; +} + +/// Erases all state for `whom` and also removes all paths that include `whom`. +/// Other peers can become unreachable as a result. In this case, the algorithm +/// calls `on_remove` and recurses for all unreachable peers. +template +void erase(routing_table& tbl, const endpoint_id& whom, + OnRemovePeer on_remove) { + std::vector unreachable_peers; + auto impl = [&](const endpoint_id& peer) { + auto stale = [&](const auto& vpath) { + return std::find(vpath.first.begin(), vpath.first.end(), peer) + != vpath.first.end(); + }; + tbl.erase(peer); + for (auto& [id, row] : tbl) { + auto& paths = row.versioned_paths; + auto sep = std::remove_if(paths.begin(), paths.end(), stale); + if (sep != paths.end()) { + paths.erase(sep, paths.end()); + if (paths.empty()) + unreachable_peers.emplace_back(id); + } + } + }; + impl(whom); + while (!unreachable_peers.empty()) { + // Our lambda modifies unreachable_peers, so we can't use iterators here. + endpoint_id peer = std::move(unreachable_peers.back()); + unreachable_peers.pop_back(); + impl(peer); + on_remove(peer); + } +} + +/// Erases connection state for a direct connection to `whom`. Routing paths to +/// `whom` may still remain in the table if `whom` is reachable through others. +/// Other peers can become unreachable as a result. In this case, the algorithm +/// calls `on_remove` and recurses for all unreachable peers. +/// @returns `true` if a direct connection was removed, `false` otherwise. +/// @note The callback `on_remove` gets called while changing the routing table. +/// Hence, it must not mutate the routing table and ideally doesn't access +/// it at all. +template +bool erase_direct(routing_table& tbl, const endpoint_id& whom, + OnRemovePeer on_remove) { + // Reset the connection handle. + if (auto i = tbl.find(whom); i == tbl.end()) { + return false; + } else { + i->second.hdl = nullptr; + } + // Drop all paths with whom as first hop. + for (auto i = tbl.begin(); i != tbl.end();) { + auto& paths = i->second.versioned_paths; + for (auto j = paths.begin(); j != paths.end();) { + auto& path = j->first; + if (path[0] == whom) + j = paths.erase(j); + else + ++j; + } + if (paths.empty()) { + on_remove(i->first); + i = tbl.erase(i); + } else { + ++i; + } + } + return true; +} + +template +void for_each_direct(const routing_table& tbl, F fun) { + for (auto& [peer, row] : tbl) + if (row.hdl) + fun(peer, row.hdl); +} + +/// Returns a pointer to the row of the remote peer if it exists, `nullptr` +/// otherwise. +const routing_table_row* find_row(const routing_table& tbl, + const endpoint_id& peer); + +/// @copydoc find_row. +routing_table_row* find_row(routing_table& tbl, const endpoint_id& peer); + +/// Adds a path to the peer, inserting a new row for the peer is it does not +/// exist yet. +/// @return `true` if a new entry was added to `tbl`, `false` otherwise. +bool add_or_update_path(routing_table& tbl, const endpoint_id& peer, + std::vector path, vector_timestamp ts); + +/// A 3-tuple for storing a revoked path between two peers with the logical time +/// when the connection was severed. +template +struct revocation { + /// The source of the event. + PeerId revoker; + + /// Time of the connection loss, as seen by `revoker`. + lamport_timestamp ts; + + /// The disconnected hop. + PeerId hop; + + /// Time when this revocations entry got created. + caf::actor_clock::time_point first_seen; +}; + +/// @relates revocation +template +bool operator==(const revocation& x, + const revocation& y) noexcept { + return std::tie(x.revoker, x.ts, x.hop) == std::tie(y.revoker, y.ts, y.hop); +} + +/// @relates revocation +template +bool operator!=(const revocation& x, + const revocation& y) noexcept { + return !(x == y); +} + +/// @relates revocation +template +bool operator<(const revocation& x, + const revocation& y) noexcept { + return std::tie(x.revoker, x.ts, x.hop) < std::tie(y.revoker, y.ts, y.hop); +} + +/// @relates revocation +template +bool operator<(const revocation& x, + const std::tuple& y) noexcept { + return std::tie(x.revoker, x.ts, x.hop) < y; +} + +/// @relates revocation +template +bool operator<(const std::tuple& x, + const revocation& y) noexcept { + return x < std::tie(y.revoker, y.ts, y.hop); +} + +/// @relates revocation +template +typename Inspector::result_type inspect(Inspector& f, revocation& x) { + return f.object(x) + .pretty_name("revocation") + .fields(f.field("revoker", x.revoker), f.field("ts", x.ts), + f.field("hop", x.hop)); +} + +/// A container for storing path revocations, sorted by `revoker` then `ts` then +/// `hop`. +template +using revocations = std::vector>; + +/// Inserts a new entry into the sorted list of revocations, constructed +/// in-place with the given args if this entry does not exist yet. +template +auto emplace(revocations& lst, Self* self, Revoker&& revoker, + lamport_timestamp ts, Hop&& hop) { + auto i = std::lower_bound(lst.begin(), lst.end(), std::tie(revoker, ts, hop)); + if (i == lst.end() || i->revoker != revoker || i->ts != ts || i->hop != hop) { + revocation entry{std::forward(revoker), ts, + std::forward(hop), self->clock().now()}; + auto j = lst.emplace(i, std::move(entry)); + return std::make_pair(j, true); + } + return std::make_pair(i, false); +} + +template +auto equal_range(revocations& lst, const Revoker& revoker) { + auto key_less = [](const auto& x, const auto& y) { + if constexpr (std::is_same, Revoker>::value) + return x.revoker < y; + else + return x < y.revoker; + }; + auto i = std::lower_bound(lst.begin(), lst.end(), revoker, key_less); + if (i == lst.end()) + return std::make_pair(i, i); + return std::make_pair(i, std::upper_bound(i, lst.end(), revoker, key_less)); +} + +/// Checks whether `path` routes through either `revoker -> hop` or +/// `hop -> revoker` with a timestamp <= `revoke_time`. +template +bool revoked(const std::vector& path, const vector_timestamp& path_ts, + const PeerId& revoker, lamport_timestamp ts, const PeerId& hop) { + BROKER_ASSERT(path.size() == path_ts.size()); + // Short-circuit trivial cases. + if (path.size() <= 1) + return false; + // Scan for the revoker anywhere in the path and see if it's next to the + // revoked hop. + if (path.front() == revoker) + return path_ts.front() <= ts && path[1] == hop; + for (size_t index = 1; index < path.size() - 1; ++index) + if (path[index] == revoker) + return path_ts[index] <= ts + && (path[index - 1] == hop || path[index + 1] == hop); + if (path.back() == revoker) + return path_ts.back() <= ts && path[path.size() - 2] == hop; + return false; +} + +/// @copydoc revoked +template +bool revoked(const std::vector& path, const vector_timestamp& ts, + const revocation& entry) { + return revoked(path, ts, entry.revoker, entry.ts, entry.hop); +} + +/// Checks whether `path` is revoked by any entry in `entries`. +template +std::enable_if_t< + std::is_same>::value, bool> +revoked(const std::vector& path, const vector_timestamp& ts, + const Container& entries) { + for (const auto& entry : entries) + if (revoked(path, ts, entry)) + return true; + return false; +} + +/// Removes all entries form `tbl` where `revoked` returns true for given +/// arguments. +template +void revoke(routing_table& tbl, const endpoint_id& revoker, + lamport_timestamp revoke_time, const endpoint_id& hop, + OnRemovePeer callback) { + auto i = tbl.begin(); + while (i != tbl.end()) { + detail::erase_if(i->second.versioned_paths, [&](auto& kvp) { + return revoked(kvp.first, kvp.second, revoker, revoke_time, hop); + }); + if (i->second.versioned_paths.empty()) { + callback(i->first); + i = tbl.erase(i); + } else { + ++i; + } + } +} + +/// @copydoc revoke +template +void revoke(routing_table& tbl, const revocation& entry, + OnRemovePeer callback) { + return revoke(tbl, entry.revoker, entry.ts, entry.hop, callback); +} + +} // namespace broker::alm diff --git a/include/broker/alm/stream_transport.hh b/include/broker/alm/stream_transport.hh index 893b7f99..c30730cc 100644 --- a/include/broker/alm/stream_transport.hh +++ b/include/broker/alm/stream_transport.hh @@ -1,570 +1,216 @@ #pragma once #include -#include -#include #include -#include -#include -#include #include #include #include +#include +#include #include -#include -#include -#include -#include -#include -#include - -#include "broker/atoms.hh" -#include "broker/data.hh" -#include "broker/defaults.hh" -#include "broker/detail/assert.hh" -#include "broker/detail/central_dispatcher.hh" -#include "broker/detail/filesystem.hh" -#include "broker/detail/generator_file_writer.hh" + +#include "broker/alm/peer.hh" +#include "broker/alm/routing_table.hh" +#include "broker/detail/lift.hh" +#include "broker/detail/peer_handshake.hh" #include "broker/detail/prefix_matcher.hh" #include "broker/detail/unipath_manager.hh" #include "broker/error.hh" #include "broker/filter_type.hh" -#include "broker/internal_command.hh" +#include "broker/fwd.hh" #include "broker/logger.hh" #include "broker/message.hh" -#include "broker/peer_filter.hh" -#include "broker/status.hh" -#include "broker/topic.hh" +#include "broker/shutdown_options.hh" namespace broker::alm { -/// Sets up a configurable stream manager to act as a distribution tree for -/// Broker. -template -class stream_transport : public detail::unipath_manager::observer { +/// The transport registers these message handlers: +/// +/// ~~~ +/// (atom::peer, endpoint_id, actor) -> void +/// => start_peering(id, hdl) +/// +/// (atom::peer, atom::init, endpoint_id, actor) -> slot +/// => handle_peering_request(...) +/// +/// (stream, actor, endpoint_id, filter_type, lamport_timestamp) -> slot +/// => handle_peering_handshake_1(...) +/// +/// (stream, actor, endpoint_id) -> void +/// => handle_peering_handshake_2(...) +/// +/// (atom::unpeer, actor hdl) -> void +/// => disconnect(hdl) +/// ~~~ +class stream_transport : public peer, public detail::unipath_manager::observer { public: - // -- constants -------------------------------------------------------------- - - /// Configures how many (additional) items the stream transport caches for - /// published events that bypass streaming. - static constexpr size_t item_stash_replenish_size = 32; - // -- member types ----------------------------------------------------------- - using peer_id_type = PeerId; - - using communication_handle_type = caf::actor; + using super = peer; - using manager_ptr = detail::unipath_manager_ptr; - - struct pending_connection { - manager_ptr mgr; - caf::response_promise rp; - }; + using endpoint_id_list = std::vector; /// Maps peer actor handles to their stream managers. - using hdl_to_mgr_map = std::unordered_map; + using hdl_to_mgr_map + = std::unordered_map; /// Maps stream managers to peer actor handles. - using mgr_to_hdl_map = std::unordered_map; + using mgr_to_hdl_map + = std::unordered_map; // -- constructors, destructors, and assignment operators -------------------- - stream_transport(caf::event_based_actor* self, const filter_type& filter) - : self_(self), dispatcher_(self) { - using caf::get_or; - auto& cfg = self->system().config(); - auto meta_dir = get_or(cfg, "broker.recording-directory", - defaults::recording_directory); - if (!meta_dir.empty() && detail::is_directory(meta_dir)) { - auto file_name = meta_dir + "/messages.dat"; - recorder_ = detail::make_generator_file_writer(file_name); - if (recorder_ == nullptr) { - BROKER_WARNING("cannot open recording file" << file_name); - } else { - BROKER_DEBUG("opened file for recording:" << file_name); - remaining_records_ = get_or(cfg, "broker.output-generator-file-cap", - defaults::output_generator_file_cap); - } - } - } - - // -- initialization --------------------------------------------------------- - - template - caf::behavior make_behavior(Fs... fs) { - return {std::move(fs)...}; - } + explicit stream_transport(caf::event_based_actor* self); // -- properties ------------------------------------------------------------- - caf::event_based_actor* self() noexcept { - return self_; - } - - auto& pending_connections() noexcept { - return pending_connections_; - } - - uint16_t ttl() const noexcept { - return dref().options().ttl; - } - - // -- peer management -------------------------------------------------------- - - /// Queries whether `hdl` is a known peer. - [[nodiscard]] bool connected_to(const caf::actor& hdl) const { - return hdl_to_mgr_.count(hdl) != 0; - } - - /// Drops a peer either due to an error or due to a graceful disconnect. - void drop_peer(const caf::actor& hdl, bool graceful, const error& reason) { - if (auto i = hdl_to_mgr_.find(hdl); i != hdl_to_mgr_.end()) { - auto mgr = i->second; - mgr_to_hdl_.erase(mgr); - hdl_to_mgr_.erase(i); - if (graceful) - BROKER_DEBUG(hdl.node() << "disconnected gracefully"); - else - BROKER_DEBUG(hdl.node() << "disconnected abnormally:" << reason); - dref().peer_disconnected(hdl.node(), hdl, reason); - } else if (auto j = pending_connections_.find(hdl); - j != pending_connections_.end()) { - BROKER_DEBUG("peer failed to connect"); - auto err = make_error(ec::peer_disconnect_during_handshake); - j->second.rp.deliver(err); - pending_connections_.erase(j); - dref().peer_unavailable(hdl.node(), hdl, err); - } - // Shut down when the last peer stops listening. - if (dref().shutting_down() && pending_connections_.empty() - && hdl_to_mgr_.empty()) - self()->quit(caf::exit_reason::user_shutdown); - } + /// Returns whether this manager has connection to `hdl`.` + [[nodiscard]] bool connected_to(const caf::actor& hdl) const noexcept; - /// Disconnects a peer by demand of the user. - void unpeer(const peer_id_type& peer_id, const caf::actor& hdl) { - BROKER_TRACE(BROKER_ARG(peer_id) << BROKER_ARG(hdl)); - if (auto i = hdl_to_mgr_.find(hdl); i != hdl_to_mgr_.end()) { - auto mgr = i->second; - mgr->unobserve(); - mgr->stop(); - mgr_to_hdl_.erase(mgr); - hdl_to_mgr_.erase(i); - dref().peer_removed(peer_id, hdl); - } else if (auto j = pending_connections_.find(hdl); - j != pending_connections_.end()) { - auto mgr = j->second.mgr; - mgr->unobserve(); - mgr->stop(); - auto err = make_error(ec::peer_disconnect_during_handshake); - j->second.rp.deliver(err); - pending_connections_.erase(j); - dref().peer_unavailable(peer_id, hdl, err); - } else { - dref().cannot_remove_peer(peer_id, hdl); - } - // Shut down when the last peer stops listening. - if (dref().shutting_down() && pending_connections_.empty() - && mgr_to_hdl_.empty()) - self()->quit(caf::exit_reason::user_shutdown); - } + // -- adding local subscribers ----------------------------------------------- - /// Disconnects a peer by demand of the user. - void unpeer(const caf::actor& hdl) { - BROKER_TRACE(BROKER_ARG(hdl)); - if (hdl) - unpeer(hdl.node(), hdl); - } - - /// Starts the handshake process for a new peering (step #1 in core_actor.cc). - /// @returns `false` if the peer is already connected, `true` otherwise. - /// @param peer_hdl Handle to the peering (remote) core actor. - /// @param peer_filter Filter of our peer. - /// @param send_own_filter Sends a `(filter, self)` handshake if `true`, - /// `('ok', self)` otherwise. + /// Subscribes `current_sender()` to @p data_message events that match + /// @p filter. /// @pre `current_sender() != nullptr` - template - auto start_handshake(const caf::actor& peer_hdl, filter_type peer_filter) { - BROKER_TRACE(BROKER_ARG(peer_hdl) << BROKER_ARG(peer_filter)); - using result_type = std::conditional_t< - SendOwnFilter, - caf::outbound_stream_slot, - caf::outbound_stream_slot>; - // Check whether we already send outbound traffic to the peer. Could use - // `CAF_ASSERT` instead, because this must'nt get called for known peers. - if (hdl_to_mgr_.count(peer_hdl) != 0) { - BROKER_ERROR("peer already connected"); - return result_type{}; - } - // Add outbound path to the peer. - auto self_hdl = caf::actor_cast(self()); - if constexpr (SendOwnFilter) { - if (auto i = pending_connections_.count(peer_hdl) == 0) { - auto mgr = detail::make_peer_manager(&dispatcher_, this); - mgr->filter(std::move(peer_filter)); - pending_connections_[peer_hdl].mgr = mgr; - return mgr->template add_unchecked_outbound_path( - peer_hdl, std::make_tuple(dref().filter(), std::move(self_hdl))); - } else { - BROKER_ERROR("already connecting to peer"); - return result_type{}; - } - } else if (auto i = pending_connections_.find(peer_hdl); - i != pending_connections_.end()) { - auto mgr = i->second.mgr; - mgr->filter(std::move(peer_filter)); - return mgr->template add_unchecked_outbound_path( - peer_hdl, std::make_tuple(atom::ok_v, std::move(self_hdl))); - } else { - BROKER_ERROR("received handshake #2 from unknown peer"); - return result_type{}; - } - } - - /// Initiates peering between this peer and `remote_peer`. - void start_peering(const peer_id_type&, caf::actor remote_peer, - caf::response_promise rp) { - BROKER_TRACE(BROKER_ARG(remote_peer)); - // Sanity checking. - if (remote_peer == nullptr) { - rp.deliver(caf::sec::invalid_argument); - return; - } - // Ignore repeated peering requests without error. - if (pending_connections().count(remote_peer) > 0 - || connected_to(remote_peer)) { - rp.deliver(caf::unit); - return; - } - // Create necessary state and send message to remote core. - auto mgr = make_peer_manager(&dispatcher_, this); - pending_connections().emplace(remote_peer, - pending_connection{mgr, std::move(rp)}); - self()->send(self() * remote_peer, atom::peer_v, dref().filter(), self()); - self()->monitor(remote_peer); - } - - /// Acknowledges an incoming peering request (step #2/3 in core_actor.cc). - /// @param peer_hdl Handle to the peering (remote) core actor. - /// @returns `false` if the peer is already connected, `true` otherwise. - /// @pre Current message is an `open_stream_msg`. - bool ack_peering(const caf::stream& in, - const caf::actor& peer_hdl) { - BROKER_TRACE(BROKER_ARG(peer_hdl)); - BROKER_ASSERT(hdl_to_mgr_.count(peer_hdl) == 0); - if (auto i = pending_connections_.find(peer_hdl); - i != pending_connections_.end()) { - if (!i->second.mgr->has_inbound_path()) { - i->second.mgr->add_unchecked_inbound_path(in); - return true; - } else { - BROKER_ERROR("ack_peering called, but an inbound path already exists"); - return false; - } - } else { - BROKER_ERROR("ack_peering but no peering started yet"); - return false; - } - } - - /// Updates the filter of an existing peer. - bool update_peer(const caf::actor& hdl, filter_type filter) { - BROKER_TRACE(BROKER_ARG(hdl) << BROKER_ARG(filter)); - if (auto i = hdl_to_mgr_.find(hdl); i != hdl_to_mgr_.end()) { - i->second->filter(std::move(filter)); - return true; - } else { - BROKER_DEBUG("cannot update filter of unknown peer"); - return false; - } - } - - void try_finalize_handshake(const caf::actor& hdl) { - if (auto i = pending_connections_.find(hdl); - i != pending_connections_.end()) { - if (auto mgr = i->second.mgr; mgr->fully_connected()) { - mgr->unblock_inputs(); - dispatcher_.add(mgr); - hdl_to_mgr_.emplace(hdl, mgr); - mgr_to_hdl_.emplace(mgr, hdl); - i->second.rp.deliver(hdl); - pending_connections_.erase(i); - dref().peer_connected(hdl.node(), hdl); - } - } - } - - // -- filter management ------------------------------------------------------ - - void set_filter(caf::stream_slot out_slot, filter_type filter) { - auto i = std::find_if(dispatcher_.managers().begin(), - dispatcher_.managers().end(), - [out_slot](const auto& ptr) { - return ptr->outbound_path_slot() == out_slot; - }); - if (i != dispatcher_.managers().end()) - (*i)->filter(std::move(filter)); - } - - // -- management of worker and storage streams ------------------------------- - - /// Adds the sender of the current message as worker by starting an output - /// stream to it. + caf::outbound_stream_slot + add_sending_worker(filter_type filter); + + /// Subscribes @p hdl to @p data_message events that match @p filter. + error add_worker(const caf::actor& hdl, filter_type filter); + + /// Subscribes `current_sender()` to @p command_message events that match + /// @p filter. /// @pre `current_sender() != nullptr` - auto add_worker(filter_type filter) { - BROKER_TRACE(BROKER_ARG(filter)); - dref().subscribe(filter); - auto mgr = make_data_sink(&dispatcher_, std::move(filter)); - auto res = mgr->template add_unchecked_outbound_path(); - BROKER_ASSERT(res != caf::invalid_stream_slot); - return res; - } - - /// Subscribes `self->sender()` to `store_manager()`. - auto add_sending_store(filter_type filter) { - BROKER_TRACE(BROKER_ARG(filter)); - dref().subscribe(filter); - auto mgr = make_command_sink(&dispatcher_, std::move(filter)); - auto res = mgr->template add_unchecked_outbound_path(); - BROKER_ASSERT(res != caf::invalid_stream_slot); - return res; - } - - /// Subscribes `hdl` to `store_manager()`. - caf::error add_store(const caf::actor& hdl, const filter_type& filter) { - BROKER_TRACE(BROKER_ARG(hdl) << BROKER_ARG(filter)); - auto mgr = make_command_sink(&dispatcher_, filter); - auto res = mgr->template add_unchecked_outbound_path(hdl); - if (res != caf::invalid_stream_slot) { - dref().subscribe(filter); - return caf::none; - } else { - return caf::sec::cannot_add_downstream; - } - } - - // -- selectively pushing data into the streams ------------------------------ - - /// Pushes messages to local subscribers without forwarding it to peers. - template - void local_push(T msg) { - BROKER_TRACE(BROKER_ARG(msg)); - auto wrapped = make_node_message(std::move(msg), ttl()); - dispatcher_.enqueue(nullptr, detail::item_scope::local, - caf::make_span(&wrapped, 1)); - } - - /// Pushes messages to peers without forwarding it to local subscribers. - void remote_push(node_message msg) { - BROKER_TRACE(BROKER_ARG(msg)); - dispatcher_.enqueue(nullptr, detail::item_scope::remote, - caf::make_span(&msg, 1)); - } - - /// Pushes data to peers. - void push(data_message msg) { - remote_push(make_node_message(std::move(msg), ttl())); - } - - /// Pushes data to peers. - void push(command_message msg) { - remote_push(make_node_message(std::move(msg), ttl())); - } - - /// Pushes data to peers. - void push(node_message msg) { - remote_push(std::move(msg)); - } - - // -- communication that bypasses the streams -------------------------------- - - void ship(data_message& msg, const communication_handle_type& hdl) { - self()->send(hdl, atom::publish_v, atom::local_v, std::move(msg)); - } + caf::outbound_stream_slot + add_sending_store(filter_type filter); - template - void ship(T& msg) { - push(std::move(msg)); - } + /// Subscribes @p hdl to @p command_message events that match @p filter. + error add_store(const caf::actor& hdl, filter_type filter); + + // -- overrides for peer::publish -------------------------------------------- + + void publish(const caf::actor& dst, atom::subscribe, + const endpoint_id_list& path, const vector_timestamp& ts, + const filter_type& new_filter) override; + + void publish(const caf::actor& dst, atom::revoke, + const endpoint_id_list& path, const vector_timestamp& ts, + const endpoint_id& lost_peer, + const filter_type& new_filter) override; + + using super::publish_locally; + + void publish_locally(const data_message& msg) override; + + void publish_locally(const command_message& msg) override; + + // -- peering ---------------------------------------------------------------- + + detail::peer_manager_ptr + get_or_insert_pending(const endpoint_id& remote_peer); + + detail::peer_manager_ptr get_pending(const caf::actor& hdl); + + detail::peer_manager_ptr get_pending(const endpoint_id& remote_peer); + + // Initiates peering between A (this node) and B (remote peer). + void start_peering(const endpoint_id& remote_peer, const caf::actor& hdl, + caf::response_promise rp); + + // Establishes a stream from B to A. + caf::outbound_stream_slot + handle_peering_request(const endpoint_id& remote_peer, const caf::actor& hdl); + + // Acks the stream from B to A and establishes a stream from A to B. + caf::outbound_stream_slot + handle_peering_handshake_1(caf::stream, const caf::actor& hdl, + const endpoint_id& remote_peer, + const filter_type& filter, + lamport_timestamp timestamp); + + // Acks the stream from A to B. + void handle_peering_handshake_2(caf::stream in, atom::ok, + const caf::actor& hdl, + const endpoint_id& remote_peer, + const filter_type& filter, + lamport_timestamp timestamp); + + // void cleanup(detail::peer_handshake_ptr ptr); - void ship(node_message& msg) { - push(std::move(msg)); - } + // void cleanup_and_replay_buffer_if_done(detail::peer_handshake_ptr ptr); + bool finalize(detail::peer_handshake* hs); + + // -- overrides for alm::peer ------------------------------------------------ + + void shutdown(shutdown_options options) override; + + void handle_filter_update(endpoint_id_list& path, vector_timestamp& path_ts, + const filter_type& filter) override; + + void handle_path_revocation(endpoint_id_list& path, vector_timestamp& path_ts, + const endpoint_id& revoked_hop, + const filter_type& filter) override; + + // -- overrides for detail::central_dispatcher ------------------------------- + + /// @private template - void publish(T msg) { - dref().ship(msg); - } + void dispatch_impl(const T& msg); + + void dispatch(const data_message& msg) override; - void publish(node_message_content msg) { - visit([this](auto& x) { dref().ship(x); }, msg); - } + void dispatch(const command_message& msg) override; - // -- unipath manager callbacks ---------------------------------------------- + void dispatch(node_message&& msg) override; + + void flush() override; + + // -- overrides for detail::unipath_manager::observer ------------------------ void closing(detail::unipath_manager* ptr, bool graceful, - const error& reason) override { - drop_peer(ptr->hdl(), graceful, reason); - } - - void downstream_connected(detail::unipath_manager*, - const caf::actor& hdl) override { - try_finalize_handshake(hdl); - } - - // -- overridden member functions of caf::stream_manager --------------------- - - /// Applies `f` to each peer. - template - void for_each_peer(F f) { - auto peers = peer_handles(); - std::for_each(peers.begin(), peers.end(), std::move(f)); - } - - /// Returns all known peers. - auto peer_handles() { - std::vector peers; - for (auto& kvp : hdl_to_mgr_) - peers.emplace_back(kvp.first); - return peers; - } - - /// Finds the first peer handle that satisfies the predicate. - template - caf::actor find_output_peer_hdl(Predicate pred) { - for (auto& kvp : hdl_to_mgr_) - if (pred(kvp.first)) - return kvp.first; - return nullptr; - } - - /// Applies `f` to each filter. - template - void for_each_filter(F f) { - for (auto& kvp : mgr_to_hdl_) - if (kvp.first->message_type() == caf::type_id_v) - f(kvp.first->filter()); - } - - /// Checks whether the predicate `f` holds for any @ref unicast_manager object - /// that represents a remote peer. - template - bool any_peer_manager(Predicate f) { - for (auto& kvp : mgr_to_hdl_) - if (f(kvp.first)) - return true; - return false; - } - - // -- fallback implementations to enable forwarding chains ------------------- - - void subscribe(const filter_type&) { - // nop - } - - // -- callbacks -------------------------------------------------------------- - - /// Called whenever new data for local subscribers became available. - /// @param msg Data or command message, either received by peers or generated - /// from a local publisher. - /// @tparam T Either ::data_message or ::command_message. - template - void ship_locally(T msg) { - local_push(std::move(msg)); - } - - /// Called whenever this peer established a new connection. - /// @param peer_id ID of the newly connected peer. - /// @param hdl Communication handle for exchanging messages with the new peer. - /// The handle is default-constructed if no direct connection - /// exists (yet). - /// @note The new peer gets stored in the routing table *before* calling this - /// member function. - void peer_connected([[maybe_unused]] const peer_id_type& peer_id, - [[maybe_unused]] const communication_handle_type& hdl) { - // nop - } - - /// Called whenever this peer lost a connection to a remote peer. - /// @param peer_id ID of the disconnected peer. - /// @param hdl Communication handle of the disconnected peer. - /// @param reason None if we closed the connection gracefully, otherwise - /// contains the transport-specific error code. - void peer_disconnected([[maybe_unused]] const peer_id_type& peer_id, - [[maybe_unused]] const communication_handle_type& hdl, - [[maybe_unused]] const error& reason) { - // nop - } - - /// Called whenever this peer removed a direct connection to a remote peer. - /// @param peer_id ID of the removed peer. - /// @param hdl Communication handle of the removed peer. - void peer_removed([[maybe_unused]] const peer_id_type& peer_id, - [[maybe_unused]] const communication_handle_type& hdl) { - // nop - } - - /// Called whenever the user tried to unpeer from an unconnected peer. - /// @param addr Host information for the unconnected peer. - void cannot_remove_peer([[maybe_unused]] const network_info& addr) { - // nop - } - - /// Called whenever the user tried to unpeer from an unconnected peer. - /// @param peer_id ID of the unconnected peer. - /// @param hdl Communication handle of the unconnected peer (may be null). - void - cannot_remove_peer([[maybe_unused]] const peer_id_type& peer_id, - [[maybe_unused]] const communication_handle_type& hdl) { - // nop - } - - /// Called whenever establishing a connection to a remote peer failed. - /// @param addr Host information for the unavailable peer. - void peer_unavailable([[maybe_unused]] const network_info& addr) { - // nop - } - - /// Called whenever we could obtain a connection handle to a remote peer but - /// received a `down_msg` before completing the handshake. - /// @param peer_id ID of the unavailable peer. - /// @param hdl Communication handle of the unavailable peer. - /// @param reason Exit reason of the unavailable peer. - void peer_unavailable([[maybe_unused]] const peer_id_type& peer_id, - [[maybe_unused]] const communication_handle_type& hdl, - [[maybe_unused]] const error& reason) { - // nop - } + const error& reason) override; + + void downstream_connected(detail::unipath_manager* ptr, + const caf::actor& hdl) override; + + bool finalize_handshake(detail::peer_manager*) override; + + void abort_handshake(detail::peer_manager*) override; + + // -- initialization --------------------------------------------------------- + + caf::behavior make_behavior() override; protected: - manager_ptr mgr_by_hdl(const caf::actor& hdl) { - if (auto i = hdl_to_mgr_.find(hdl); i != hdl_to_mgr_.end()) - return i->second; - else if (auto j = pending_connections_.find(hdl); - j != pending_connections_.end()) - return j->second.mgr; - else - return nullptr; - } - - caf::actor hdl_by_mgr(const manager_ptr& mgr) { - if (auto i = mgr_to_hdl_.find(mgr); i != mgr_to_hdl_.end()) - return i->second; - else if (auto j = pending_connections_.find(mgr); - j != pending_connections_.end()) - return j->second.mgr; - else - return nullptr; - } - - /// Pointer to the actor that owns this state. - caf::event_based_actor* self_; - - /// Our dispatcher "singleton" that holds the item allocator as well as - /// pointers to all active unipath managers for outbound traffic. - detail::central_dispatcher dispatcher_; + // -- utility ---------------------------------------------------------------- + + /// Updates the filter of a data or command sink. + bool update_filter(caf::stream_slot slot, filter_type&& filter); + + /// Removes state for `hdl` and returns whether any cleanup steps were + /// performed. + /// @param reason When removed by user action `null`, otherwise a pointer to + /// the error that triggered the cleanup. + bool peer_cleanup(const endpoint_id& peer_id, const error* reason = nullptr); + + /// Disconnects a peer as a result of an error. + void drop_peer(const caf::actor& hdl, const error& reason); + + /// Disconnects a peer by demand of the user. + void unpeer(const endpoint_id& peer_id, const caf::actor& hdl); + + /// Disconnects a peer by demand of the user. + void unpeer(const endpoint_id& peer_id); + + /// Disconnects a peer by demand of the user. + void unpeer(const caf::actor& hdl); + + /// Tries to find a peer manager for `peer_id` in pending_ or hdl_to_mgr_. + detail::peer_manager* peer_lookup(const endpoint_id& peer_id); /// Maps peer handles to their respective unipath manager. hdl_to_mgr_map hdl_to_mgr_; @@ -572,27 +218,14 @@ protected: /// Maps unipath managers to their respective peer handle. mgr_to_hdl_map mgr_to_hdl_; - /// Maps pending peer handles to output IDs. An invalid stream ID indicates - /// that only "step #0" was performed so far. An invalid stream ID corresponds - /// to `peer_status::connecting` and a valid stream ID cooresponds to - /// `peer_status::connected`. The status for a given handle `x` is - /// `peer_status::peered` if `governor->has_peer(x)` returns true. - std::unordered_map pending_connections_; - - /// Helper for recording meta data of published messages. - detail::generator_file_writer_ptr recorder_; - - /// Counts down when using a `recorder_` to cap maximum file entries. - size_t remaining_records_ = 0; + /// Stores connections to peer that yet have to finish the handshake. + std::unordered_map pending_; -private: - Derived& dref() { - return static_cast(*this); - } + /// Stores local data message subscribers . + std::vector data_sinks_; - const Derived& dref() const { - return static_cast(*this); - } + /// Stores local command message subscribers . + std::vector command_sinks_; }; } // namespace broker::alm diff --git a/include/broker/configuration.hh b/include/broker/configuration.hh index 98167feb..5025aab3 100644 --- a/include/broker/configuration.hh +++ b/include/broker/configuration.hh @@ -10,14 +10,10 @@ struct broker_options { /// If true, peer connections won't use SSL. bool disable_ssl = false; - /// If true, endpoints will forward incoming messages to peers. - bool forward = true; - - /// TTL to insert into forwarded messages. Messages will be droppped once - /// they have traversed more than this many hops. Note that the 1st - /// receiver inserts the TTL (not the sender!). The 1st receiver does - /// already count against the TTL. - unsigned int ttl = defaults::ttl; + /// If true, endpoints no longer forward incoming subscriptions and other + /// routing-related messages to peers. Setting this flag to `true` turns the + /// endpoint into a leaf node that never offers forwarding paths to others. + bool disable_forwarding = false; /// Whether to use real/wall clock time for data store time-keeping /// tasks or whether the application will simulate time on its own. @@ -106,6 +102,9 @@ protected: void init(int argc, char** argv); private: + /// Makes sure the config content is in-sync with the `options_`. + void sync_options(); + broker_options options_; }; diff --git a/include/broker/core_actor.hh b/include/broker/core_actor.hh index 0e750151..79b9467f 100644 --- a/include/broker/core_actor.hh +++ b/include/broker/core_actor.hh @@ -1,118 +1,40 @@ #pragma once -#include -#include -#include -#include -#include - -#include -#include -#include -#include +#include +#include "broker/alm/peer.hh" #include "broker/alm/stream_transport.hh" -#include "broker/atoms.hh" -#include "broker/configuration.hh" -#include "broker/detail/network_cache.hh" -#include "broker/detail/radix_tree.hh" -#include "broker/endpoint.hh" -#include "broker/endpoint_info.hh" -#include "broker/error.hh" -#include "broker/filter_type.hh" -#include "broker/logger.hh" #include "broker/mixin/connector.hh" #include "broker/mixin/data_store_manager.hh" #include "broker/mixin/notifier.hh" #include "broker/mixin/recorder.hh" -#include "broker/network_info.hh" -#include "broker/optional.hh" -#include "broker/peer_info.hh" -#include "broker/status.hh" namespace broker { -class core_state - : public caf::extend, - core_state>:: // - with { +/// The core registers these message handlers: +/// +/// ~~~ +/// (atom::publish, endpoint_info receiver, data_message msg) -> void +/// => ship(msg, receiver.node) +/// ~~~ +class core_state : public // + mixin::notifier< // + mixin::connector< // + mixin::data_store_manager< // + mixin::recorder< // + alm::stream_transport>>>> { // public: - // --- constants ------------------------------------------------------------- - - /// Gives this actor a recognizable name in log output. - static inline const char* name = "core"; - - // --- member types ---------------------------------------------------------- - using super = extended_base; - /// Identifies the two individual streams forming a bidirectional channel. - /// The first ID denotes the *input* and the second ID denotes the - /// *output*. - using stream_id_pair = std::pair; - - // --- construction ---------------------------------------------------------- - - core_state(caf::event_based_actor* ptr, const filter_type& filter, - broker_options opts = broker_options{}, - endpoint::clock* ep_clock = nullptr); - - // --- initialization -------------------------------------------------------- - - caf::behavior make_behavior(); - - // --- properties ------------------------------------------------------------ - - const auto& filter() const noexcept { - return filter_; - } - - const auto& options() const noexcept { - return options_; - } - - bool shutting_down() const noexcept { - return shutting_down_; - } - - // --- filter management ----------------------------------------------------- - - /// Sends the current filter to all peers. - void update_filter_on_peers(); - - /// Adds `xs` to our filter and update all peers on changes. - void subscribe(filter_type xs); - - // --- convenience functions for querying state ------------------------------ - - /// Returns whether `x` is either a pending peer or a connected peer. - bool has_peer(const caf::actor& x); - - /// Returns whether a master for `name` probably exists already on one of - /// our peers. - bool has_remote_subscriber(const topic& x) noexcept; - - // --- callbacks ------------------------------------------------------------- - // - void peer_connected(const peer_id_type& peer_id, - const communication_handle_type& hdl); - -private: - // --- member variables ------------------------------------------------------ - - /// A copy of the current Broker configuration options. - broker_options options_; + static inline const char* name = "broker.core"; - /// Requested topics on this core. - filter_type filter_; + explicit core_state(caf::event_based_actor* self, filter_type initial_filter, + endpoint::clock* clock = nullptr, + const domain_options* adaptation = nullptr); - /// Set to `true` after receiving a shutdown message from the endpoint. - bool shutting_down_ = false; + ~core_state() override; - /// Keeps track of all actors that currently wait for handshakes to - /// complete. - std::unordered_map peers_awaiting_status_sync_; + caf::behavior make_behavior() override; }; using core_actor_type = caf::stateful_actor; diff --git a/include/broker/data.hh b/include/broker/data.hh index 742223af..d4c206df 100644 --- a/include/broker/data.hh +++ b/include/broker/data.hh @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -71,22 +72,24 @@ class data { public: using types = typename data_variant::types; + // Warning: *must* have the same order as `data_variant`, because the integer + // value for this tag must be equal to `get_data().index()`. enum class type : uint8_t { - address, + none, boolean, count, - enum_value, integer, - none, - port, real, - set, string, + address, subnet, - table, - timespan, + port, timestamp, - vector + timespan, + enum_value, + set, + table, + vector, }; template @@ -203,11 +206,17 @@ DATA_TAG_ORACLE(vector); /// Returns the `data::type` tag for `T`. /// @relates data template -constexpr data::type data_tag() { +constexpr data::type data_tag() noexcept { return detail::data_tag_oracle::value; } +/// Checks whether `data_tag` is defined for `T`. /// @relates data +template +constexpr bool has_data_tag() { + return detail::is_complete>; +} + template bool inspect(Inspector& f, data::type& x) { auto get = [&] { return static_cast(x); }; diff --git a/include/broker/defaults.hh b/include/broker/defaults.hh index 28bf3df7..37380f8d 100644 --- a/include/broker/defaults.hh +++ b/include/broker/defaults.hh @@ -11,14 +11,36 @@ extern const caf::string_view recording_directory; extern const size_t output_generator_file_cap; -constexpr uint16_t ttl = 20; - -constexpr size_t max_pending_inputs_per_source = 512; +/// Configures the default timeout of @ref endpoint::await_peer. +extern const caf::timespan await_peer_timeout; } // namespace broker::defaults namespace broker::defaults::store { +/// Configures the time interval for advancing the local Lamport time. extern const caf::timespan tick_interval; +/// Configures how many ticks pass between sending heartbeat messages. +extern const uint16_t heartbeat_interval; + +/// Configures how many ticks without any progress we wait before sending NACK +/// messages, i.e., requesting retransmits. +extern const uint16_t nack_timeout; + +/// Configures how many missed heartbeats we wait before assuming the remote +/// store actore dead. +extern const uint16_t connection_timeout; + +/// Configures the default timeout of @ref peer::await_idle. +extern const caf::timespan await_idle_timeout; + } // namespace broker::defaults::store + +namespace broker::defaults::path_revocations { + +extern const caf::timespan aging_interval; + +extern const caf::timespan max_age; + +} // namespace broker::defaults::path_revocations diff --git a/include/broker/detail/algorithms.hh b/include/broker/detail/algorithms.hh new file mode 100644 index 00000000..36150fb5 --- /dev/null +++ b/include/broker/detail/algorithms.hh @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace broker::detail { + +// TODO: switch to the new std::erase_if overloads when switching to C++20. +template +size_t erase_if(Container& xs, Predicate predicate) { + size_t old_size = xs.size(); + auto i = xs.begin(); + while (i != xs.end()) + if (predicate(*i)) + i = xs.erase(i); + else + ++i; + return old_size - xs.size(); +} + +} // namespace broker::detail diff --git a/include/broker/detail/blob.hh b/include/broker/detail/blob.hh new file mode 100644 index 00000000..fd345716 --- /dev/null +++ b/include/broker/detail/blob.hh @@ -0,0 +1,38 @@ +#pragma once + +#include +#include + +#include +#include + +namespace broker::detail { + +template +auto to_blob(T&& x, Ts&&... xs) { + typename caf::binary_serializer::container_type buf; + caf::binary_serializer sink{nullptr, buf}; + auto res = sink.apply(std::forward(x)) + && (sink.apply(std::forward(xs)) && ...); + // TODO: maybe throw? No other way to report errors here. + static_cast(res); + return buf; +} + +template +T from_blob(const void* buf, size_t size) { + caf::binary_deserializer source{nullptr, reinterpret_cast(buf), + size}; + auto result = T{}; + auto res = source.apply(result); + // TODO: maybe throw? No other way to report errors here. + static_cast(res); + return result; +} + +template +T from_blob(const Container& buf) { + return from_blob(buf.data(), buf.size()); +} + +} // namespace broker::detail diff --git a/include/broker/detail/central_dispatcher.hh b/include/broker/detail/central_dispatcher.hh index 51243d32..1784e5fc 100644 --- a/include/broker/detail/central_dispatcher.hh +++ b/include/broker/detail/central_dispatcher.hh @@ -1,12 +1,9 @@ #pragma once -#include #include #include -#include "broker/detail/assert.hh" -#include "broker/detail/unipath_manager.hh" #include "broker/fwd.hh" namespace broker::detail { @@ -14,25 +11,43 @@ namespace broker::detail { /// Central point for all `unipath_manager` instances to enqueue items. class central_dispatcher { public: - explicit central_dispatcher(caf::scheduled_actor* self); + virtual ~central_dispatcher(); - void enqueue(const unipath_manager* source, item_scope scope, - caf::span messages); + /// Forwards given item to remote subscribers. + virtual void dispatch(const data_message& msg) = 0; - /// Adds a new output path to the dispatcher. - void add(unipath_manager_ptr sink); + /// Forwards given item to remote subscribers. + virtual void dispatch(const command_message& msg) = 0; - auto self() const noexcept { - return self_; - } + /// Forwards given item to remote subscribers. + void dispatch(const node_message_content& msg); + + /// Forwards given item to remote subscribers and to local subscribers if + /// `this_endpoint()` is in `get_receivers(msg)`. + virtual void dispatch(node_message&& msg) = 0; + + /// Tries to emit batches on all nested output paths. + virtual void flush() = 0; + + /// Returns a pointer to the actor that owns this dispatcher. + virtual caf::event_based_actor* this_actor() noexcept = 0; + + /// Returns the ID for this Broker endpoint. + virtual endpoint_id this_endpoint() const = 0; + + /// Returns the current filter on this Broker endpoint. + virtual filter_type local_filter() const = 0; + + /// Returns the current logical time on this Broker endpoint. + virtual alm::lamport_timestamp local_timestamp() const noexcept = 0; - const auto& managers() const noexcept { - return sinks_; + /// Returns whether the dispatcher is shutting down. + [[nodiscard]] bool tearing_down() const noexcept { + return tearing_down_; } -private: - caf::scheduled_actor* self_; - std::vector sinks_; +protected: + bool tearing_down_ = false; }; } // namespace broker::detail diff --git a/include/broker/detail/channel.hh b/include/broker/detail/channel.hh new file mode 100644 index 00000000..01ed916a --- /dev/null +++ b/include/broker/detail/channel.hh @@ -0,0 +1,773 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +#include "broker/alm/lamport_timestamp.hh" +#include "broker/error.hh" +#include "broker/logger.hh" + +namespace broker::detail { + +/// Integer type for the monotonically increasing counters large enough to +/// neglect wraparounds. At 1000 messages per second, a sequence number of this +/// type overflows after 580 *million* years. +using sequence_number_type = uint64_t; + +/// Integer type for measuring configurable intervals in ticks. +using tick_interval_type = uint16_t; + +/// A message-driven channel for ensuring reliable and ordered transport over an +/// unreliable and unordered communication layer. A channel connects a single +/// producer with any number of consumers. +template +class channel { +public: + // -- member types: messages from consumers to the producer ------------------ + + /// Notifies the producer that a consumer received all events up to a certain + /// sequence number (including that number). Consumers send the latest ACK + /// periodically as a keepalive message. + struct cumulative_ack { + sequence_number_type seq; + + template + friend bool inspect(Inspector& f, cumulative_ack& x) { + return f.object(x) + .pretty_name("cumulative_ack") + .fields(f.field("seq", x.seq)); + } + }; + + /// Notifies the producer that a consumer failed to received some events. + /// Sending a NACK for the sequence number 0 causes the publisher to re-send + /// the handshake. + struct nack { + std::vector seqs; + + template + friend bool inspect(Inspector& f, nack& x) { + return f.object(x).pretty_name("nack").fields(f.field("seqs", x.seqs)); + } + }; + + // -- member types: messages from the producer to consumers ------------------ + + /// Notifies a consumer which is the first sequence number after it started + /// listening to the producer. + struct handshake { + /// The first sequence number a consumer should process and acknowledge. + sequence_number_type offset; + + /// The interval (in ticks) between heartbeat messages. Allows the consumer + /// to adjust its timeouts for detecting failed producers. + tick_interval_type heartbeat_interval; + + /// Maximum number of missed heartbeats before connections time out. + tick_interval_type connection_timeout; + + template + friend bool inspect(Inspector& f, handshake& x) { + return f.object(x) + .pretty_name("handshake") + .fields(f.field("offset", x.offset), + f.field("heartbeat_interval", x.heartbeat_interval)); + } + }; + + /// Transmits ordered data to a consumer. + struct event { + sequence_number_type seq; + Payload content; + + template + friend bool inspect(Inspector& f, event& x) { + return f.object(x) + .pretty_name("event") // + .fields(f.field("seq", x.seq), f.field("content", x.content)); + } + }; + + /// Notifies a consumer that the producer can no longer retransmit an event. + struct retransmit_failed { + sequence_number_type seq; + + template + friend bool inspect(Inspector& f, retransmit_failed& x) { + return f.object(x) + .pretty_name("retransmit_failed") + .fields(f.field("seq", x.seq)); + } + }; + + /// Notifies all consumers that the master is still alive and what is the + /// latest sequence number. + struct heartbeat { + sequence_number_type seq; + + template + friend bool inspect(Inspector& f, heartbeat& x) { + return f.object(x).pretty_name("heartbeat").fields(f.field("seq", x.seq)); + } + }; + + // -- implementation of the producer ----------------------------------------- + + /// Messages sent by the producer. + using producer_message + = caf::variant; + + struct default_producer_base {}; + + /// Produces events (messages) for any number of consumers. + /// @tparam Backend Hides the underlying (unreliable) communication layer. The + /// backend must provide the following member functions: + /// - `void send(producer*, const Handle&, const T&)` sends a + /// unicast message to a single consumer, where `T` is any + /// type in `producer_message`. + /// - `void broadcast(producer*, const T&)` sends a multicast + /// message to all consumers, where `T` is any type in + /// `producer_message`. + /// - `void drop(producer*, const Handle&, ec)` called to + /// indicate that a consumer got removed by the producer. + /// - `void handshake_completed(producer*, const Handle&)` + /// called to indicate that the producer received an ACK + template + class producer : public Base { + public: + // -- member types --------------------------------------------------------- + + /// Bundles bookkeeping state for a consumer. + struct path { + /// Allows the backend to uniquely address this consumer. + Handle hdl; + + /// The sequence number that was active when adding this consumer. + sequence_number_type offset; + + /// The sequence number of the last cumulative ACK. + sequence_number_type acked; + + /// The last time we have received a message on this path. + alm::lamport_timestamp last_seen; + }; + + using buf_type = std::deque; + + using path_list = std::vector; + + // -- constructors, destructors, and assignment operators ------------------ + + explicit producer(Backend* backend) : backend_(backend) { + // nop + } + + // -- message processing --------------------------------------------------- + + void produce(Payload content) { + if (paths_.empty()) + return; + ++seq_; + buf_.emplace_back(event{seq_, std::move(content)}); + last_broadcast_ = tick_; + backend_->broadcast(this, buf_.back()); + } + + error add(const Handle& hdl) { + if (find_path(hdl) != paths_.end()) + return ec::consumer_exists; + BROKER_DEBUG("add" << hdl << "to the channel"); + paths_.emplace_back(path{hdl, seq_, 0, tick_}); + backend_->send(this, hdl, handshake{seq_, heartbeat_interval_}); + return nil; + } + + void trigger_handshakes() { + for (auto& path : paths_) + if (path.offset == 0) + backend_->send(this, path.hdl, + handshake{path.offset, heartbeat_interval_}); + } + + void handle_ack(const Handle& hdl, sequence_number_type seq) { + sequence_number_type acked = seq; + // Iterate all paths once, fetching minimum acknowledged sequence number + // and updating the path belonging to `hdl` in one go. + for (auto& x : paths_) { + if (x.hdl == hdl) { + if (x.acked > seq) { + // A blast from the past. Ignore. + return; + } + x.last_seen = tick_; + if (x.acked == 0) { + backend_->handshake_completed(this, hdl); + } else if (x.acked == seq) { + // Old news. Stop processing this event, since it won't allow us to + // clear events from the buffer anyways. + return; + } + x.acked = seq; + } else { + acked = std::min(x.acked, acked); + } + } + // Drop events from the buffer if possible. + auto not_acked = [acked](const event& x) { return x.seq > acked; }; + buf_.erase(buf_.begin(), + std::find_if(buf_.begin(), buf_.end(), not_acked)); + } + + void handle_nack(const Handle& hdl, + const std::vector& seqs) { + // Sanity checks. + if (seqs.empty()) + return; + // Nack 0 implicitly acts as a handshake. + auto p = find_path(hdl); + if (p == paths_.end()) { + if (seqs.size() == 1 && seqs.front() == 0) { + auto err = add(hdl); + static_cast(err); // Discard: always default-constructed. + } + return; + } + // Seqs must be sorted. Everything before the first missing ID is ACKed. + p->last_seen = tick_; + if (seqs.size() > 1 && !std::is_sorted(seqs.begin(), seqs.end())) { + backend_->drop(this, p->hdl, ec::invalid_message); + paths_.erase(p); + return; + } + auto first = seqs.front(); + if (first == 0) { + backend_->send(this, hdl, handshake{p->offset, heartbeat_interval_}); + return; + } + handle_ack(hdl, first - 1); + for (auto seq : seqs) { + if (auto i = find_event(seq); i != buf_.end()) + backend_->send(this, hdl, *i); + else + backend_->send(this, hdl, retransmit_failed{seq}); + } + } + + // -- time-based processing ------------------------------------------------ + + void tick() { + BROKER_TRACE(""); + // Increase local time and send heartbeats. + ++tick_; + if (heartbeat_interval_ == 0) + return; + if (last_broadcast_ + heartbeat_interval_ == tick_) { + last_broadcast_ = tick_; + backend_->broadcast(this, heartbeat{seq_}); + } + // Check whether any consumer timed out. + auto timeout = connection_timeout(); + assert(timeout > 0); + size_t erased_paths = 0; + for (auto i = paths_.begin(); i != paths_.end();) { + if (tick_.value - i->last_seen.value >= timeout) { + BROKER_DEBUG("remove" << i->hdl << "from channel: consumer timeout"); + backend_->drop(this, i->hdl, ec::connection_timeout); + i = paths_.erase(i); + ++erased_paths; + } else { + ++i; + } + } + // Check whether we can clear some items from the buffer. + if (paths_.empty()) { + buf_.clear(); + } else if (erased_paths > 0) { + auto i = paths_.begin(); + auto acked = i->acked; + for (++i; i != paths_.end(); ++i) + if (i->acked < acked) + acked = i->acked; + auto not_acked = [acked](const event& x) { return x.seq > acked; }; + buf_.erase(buf_.begin(), + std::find_if(buf_.begin(), buf_.end(), not_acked)); + } + } + + // -- properties ----------------------------------------------------------- + + auto& backend() noexcept { + return *backend_; + } + + const auto& backend() const noexcept { + return *backend_; + } + + auto seq() const noexcept { + return seq_; + } + + auto next_seq() const noexcept { + return seq_ + 1; + } + + const auto& buf() const noexcept { + return buf_; + } + + const auto& paths() const noexcept { + return paths_; + } + + auto heartbeat_interval() const noexcept { + return heartbeat_interval_; + } + + void heartbeat_interval(tick_interval_type value) noexcept { + heartbeat_interval_ = value; + } + + auto connection_timeout() const noexcept { + return uint64_t{heartbeat_interval_} * connection_timeout_factor_; + } + + auto connection_timeout_factor() const noexcept { + return connection_timeout_factor_; + } + + void connection_timeout_factor(tick_interval_type value) noexcept { + connection_timeout_factor_ = value; + } + + bool idle() const noexcept { + auto at_head = [seq{seq_}](const path& x) { return x.acked == seq; }; + return std::all_of(paths_.begin(), paths_.end(), at_head); + } + + /// Checks whether any path was added but not yet acknowledged. + bool has_pending_paths() const noexcept { + auto pending = [](const path& x) { return x.acked == 0; }; + return std::any_of(paths_.begin(), paths_.end(), pending); + } + + // -- path and event lookup ------------------------------------------------ + + auto find_path(const Handle& hdl) noexcept { + auto has_hdl = [&hdl](const path& x) { return x.hdl == hdl; }; + return std::find_if(paths_.begin(), paths_.end(), has_hdl); + } + + auto find_path(const Handle& hdl) const noexcept { + auto has_hdl = [&hdl](const path& x) { return x.hdl == hdl; }; + return std::find_if(paths_.begin(), paths_.end(), has_hdl); + } + + auto find_event(sequence_number_type seq) const noexcept { + auto has_seq = [seq](const event& x) { return x.seq == seq; }; + return std::find_if(buf_.begin(), buf_.end(), has_seq); + } + + private: + // -- member variables ----------------------------------------------------- + + /// Transmits messages to the consumers. + Backend* backend_; + + /// Monotonically increasing counter (starting at 1) to establish ordering + /// of messages on this channel. Since we start at 1, the first message we + /// send is going to have a sequence number of *2*. This enables us to + /// use 0 on a path to mean "added but we never received an ack yet", + /// because an ACK cannot have the sequence number 0. + sequence_number_type seq_ = 1; + + /// Monotonically increasing counter to keep track of time. + alm::lamport_timestamp tick_; + + /// Stores the last time we've broadcasted something. + alm::lamport_timestamp last_broadcast_; + + /// Stores outgoing events with their sequence number. + buf_type buf_; + + /// List of consumers with the last acknowledged sequence number. + path_list paths_; + + /// Maximum time between to broadcasted messages. When not sending anything + /// else, insert heartbeats after this amount of time. + tick_interval_type heartbeat_interval_ = 5; + + /// Factor for computing the timeout for consumers, i.e., after how many + /// heartbeats of not receiving any message do we assume the consumer no + /// longer exists. + tick_interval_type connection_timeout_factor_ = 4; + }; + + // -- implementation of the consumer ----------------------------------------- + + /// Messages sent by the consumer. + using consumer_message = caf::variant; + + /// Handles events (messages) from a single producer. + /// @tparam Backend Hides the underlying (unreliable) communication layer. The + /// backend must provide the following member functions: + /// - `void consume(consumer*, Payload)` process a single + /// event. + /// - `void send(consumer*, T)` sends a message to the + /// producer, where `T` is any type in `consumer_message`. + /// - `error consume_nil(consumer*)` process a lost event. The + /// callback may abort further processing by returning a + /// non-default `error`. In this case, the `consumer` + /// immediately calls `close` with the returned error. + /// - `void close(consumer*, error)` drops this consumer. + /// After calling this function, no further function calls + /// on the consumer are allowed (except calling the + /// destructor). + template + class consumer { + public: + // -- member types --------------------------------------------------------- + + struct optional_event { + sequence_number_type seq; + optional content; + + explicit optional_event(sequence_number_type seq) : seq(seq) { + // nop + } + + template + optional_event(sequence_number_type seq, T&& x) + : seq(seq), content(std::forward(x)) { + // nop + } + + template + friend bool inspect(Inspector& f, optional_event& x) { + return f.object(x) + .pretty_name("optional_event") + .fields(f.field("seq", x.seq), f.field("content", x.content)); + } + }; + + using buf_type = std::deque; + + // -- constructors, destructors, and assignment operators ------------------ + + explicit consumer(Backend* backend) : backend_(backend) { + // nop + } + + // -- message processing --------------------------------------------------- + + /// Initializes the consumer from the settings in the handshake. + /// @returns `true` if the consumer was initialized, `false` on a repeated + /// handshake that got dropped by the consumer. + bool handle_handshake(Handle producer_hdl, sequence_number_type offset, + tick_interval_type heartbeat_interval) { + BROKER_TRACE(BROKER_ARG(producer_hdl) + << BROKER_ARG(offset) << BROKER_ARG(heartbeat_interval)); + if (initialized()) + return false; + producer_ = std::move(producer_hdl); + return handle_handshake_impl(offset, heartbeat_interval); + } + + /// @copydoc handle_handshake + bool handle_handshake(sequence_number_type offset, + tick_interval_type heartbeat_interval) { + BROKER_TRACE(BROKER_ARG(offset) << BROKER_ARG(heartbeat_interval)); + if (initialized()) + return false; + return handle_handshake_impl(offset, heartbeat_interval); + } + + bool handle_handshake_impl(sequence_number_type offset, + tick_interval_type heartbeat_interval) { + BROKER_TRACE(BROKER_ARG(offset) << BROKER_ARG(heartbeat_interval)); + // Initialize state. + next_seq_ = offset + 1; + last_seq_ = next_seq_; + heartbeat_interval_ = heartbeat_interval; + // Find the first message in the assigned offset and drop any buffered + // message before that point. + if (!buf_.empty()) { + auto pred = [=](const optional_event& x) { return x.seq > offset; }; + auto i = std::find_if(buf_.begin(), buf_.end(), pred); + buf_.erase(buf_.begin(), i); + } + // Consume buffered messages if possible and send initial ACK. + try_consume_buffer(); + send_ack(); + return true; + } + + void handle_heartbeat(sequence_number_type seq) { + // Do nothing when receiving this before the handshake or if the master + // did not produce any events yet. + if (last_seq_ == 0 || seq == 0) + return; + if (seq + 1 > last_seq_) + last_seq_ = seq + 1; + } + + void handle_event(sequence_number_type seq, Payload payload) { + BROKER_TRACE(BROKER_ARG(seq) << BROKER_ARG(payload)); + if (next_seq_ == seq) { + // Process immediately. + backend_->consume(this, payload); + bump_seq(); + try_consume_buffer(); + } else if (seq > next_seq_) { + if (seq > last_seq_) + last_seq_ = seq; + // Insert event into buf_: sort by the sequence number, drop duplicates. + auto pred = [seq](const optional_event& x) { return x.seq >= seq; }; + auto i = std::find_if(buf_.begin(), buf_.end(), pred); + if (i == buf_.end()) + buf_.emplace_back(seq, std::move(payload)); + else if (i->seq != seq) + buf_.emplace(i, seq, std::move(payload)); + else if (!i->content) + i->content = std::move(payload); + } + } + + void handle_retransmit_failed(sequence_number_type seq) { + if (next_seq_ == seq) { + // Process immediately. + if (auto err = backend_->consume_nil(this)) { + backend_->close(this, std::move(err)); + return; + } + bump_seq(); + try_consume_buffer(); + } else if (seq > next_seq_) { + // Insert event into buf_: sort by the sequence number, drop duplicates. + auto pred = [seq](const optional_event& x) { return x.seq >= seq; }; + auto i = std::find_if(buf_.begin(), buf_.end(), pred); + if (i == buf_.end()) + buf_.emplace_back(seq); + else if (i->seq != seq) + buf_.emplace(i, seq); + } + } + + // -- time-based processing ------------------------------------------------ + + void tick() { + BROKER_TRACE(BROKER_ARG2("next_seq", next_seq_) + << BROKER_ARG2("last_seq", last_seq_) + << BROKER_ARG2("buf.size", buf().size())); + ++tick_; + // Ask for repeated handshake each heartbeat interval when not fully + // initialized yet. + if (!initialized()) { + BROKER_DEBUG("not fully initialized: waiting for producer handshake"); + ++idle_ticks_; + if (idle_ticks_ >= nack_timeout_) { + idle_ticks_ = 0; + backend_->send(this, nack{std::vector{0}}); + } + return; + } + // Update state. + bool progressed = next_seq_ > last_tick_seq_; + last_tick_seq_ = next_seq_; + if (progressed) { + BROKER_DEBUG("made progress since last tick"); + if (idle_ticks_ > 0) + idle_ticks_ = 0; + if (heartbeat_interval_ > 0 && num_ticks() % heartbeat_interval_ == 0) + send_ack(); + return; + } + ++idle_ticks_; + BROKER_DEBUG("made no progress for" << idle_ticks_ << "ticks"); + if (next_seq_ < last_seq_ && idle_ticks_ >= nack_timeout_) { + idle_ticks_ = 0; + auto first = next_seq_; + auto last = last_seq_; + std::vector seqs; + seqs.reserve(last - first); + auto generate = [&, i{first}](sequence_number_type found) mutable { + for (; i < found; ++i) + seqs.emplace_back(i); + ++i; + }; + for (const auto& x : buf_) + generate(x.seq); + generate(last); + backend_->send(this, nack{std::move(seqs)}); + return; + } + if (heartbeat_interval_ > 0 && num_ticks() % heartbeat_interval_ == 0) + send_ack(); + } + + // -- properties ----------------------------------------------------------- + + auto& backend() noexcept { + return *backend_; + } + + const auto& backend() const noexcept { + return *backend_; + } + + const auto& producer() const { + return producer_; + } + + void producer(Handle hdl) { + producer_ = std::move(hdl); + } + + const auto& buf() const noexcept { + return buf_; + } + + auto num_ticks() const noexcept { + // Lamport timestamps start at 1. + return tick_.value - 1; + } + + auto idle_ticks() const noexcept { + return idle_ticks_; + } + + auto heartbeat_interval() const noexcept { + return heartbeat_interval_; + } + + auto heartbeat_interval(tick_interval_type value) noexcept { + heartbeat_interval_ = value; + } + + auto connection_timeout() const noexcept { + return uint64_t{heartbeat_interval_} * connection_timeout_factor_; + } + + auto connection_timeout_factor() const noexcept { + return connection_timeout_factor_; + } + + void connection_timeout_factor(tick_interval_type value) noexcept { + connection_timeout_factor_ = value; + } + + auto nack_timeout() const noexcept { + return nack_timeout_; + } + + void nack_timeout(tick_interval_type value) noexcept { + nack_timeout_ = value; + } + + auto next_seq() const noexcept { + return next_seq_; + } + + auto last_seq() const noexcept { + return last_seq_; + } + + bool initialized() const noexcept { + return next_seq_ != 0; + } + + bool idle() const noexcept { + return initialized() && buf_.empty() && next_seq_ == last_seq_; + } + + void reset() { + producer_ = Handle{}; + next_seq_ = 0; + last_seq_ = 0; + buf_.clear(); + tick_ = alm::lamport_timestamp{}; + last_tick_seq_ = 0; + idle_ticks_ = 0; + heartbeat_interval_ = 0; + nack_timeout_ = 5; + } + + private: + // -- helper functions ----------------------------------------------------- + + // Bumps the sequence number for the next expected event. + void bump_seq() { + if (++next_seq_ > last_seq_) + last_seq_ = next_seq_; + } + + // Consumes all events from buf_ until either hitting the end or hitting a + // gap (i.e. events that are neither available yet nor known missing). + void try_consume_buffer() { + auto i = buf_.begin(); + for (; i != buf_.end() && i->seq == next_seq_; ++i) { + if (i->content) { + backend_->consume(this, *i->content); + } else { + if (auto err = backend_->consume_nil(this)) { + buf_.erase(buf_.begin(), i); + backend_->close(this, std::move(err)); + return; + } + } + bump_seq(); + } + buf_.erase(buf_.begin(), i); + } + + void send_ack() { + backend_->send(this, cumulative_ack{next_seq_ > 0 ? next_seq_ - 1 : 0}); + } + + // -- member variables ----------------------------------------------------- + + /// Handles incoming events. + Backend* backend_; + + /// Stores the handle of the producer. + Handle producer_; + + /// Monotonically increasing counter (starting at 1) to establish ordering + /// of messages on this channel. + sequence_number_type next_seq_ = 0; + + /// The currently known end of the event stream. + sequence_number_type last_seq_ = 0; + + /// Stores outgoing events with their sequence number. + buf_type buf_; + + /// Monotonically increasing counter to keep track of time. + alm::lamport_timestamp tick_; + + /// Stores the value of `next_seq_` at our last tick. + sequence_number_type last_tick_seq_ = 0; + + /// Number of ticks without progress. + tick_interval_type idle_ticks_ = 0; + + /// Frequency of ACK messages (configured by the master). + tick_interval_type heartbeat_interval_ = 0; + + /// Number of ticks without progress before sending a NACK. + tick_interval_type nack_timeout_ = 5; + + /// Factor for computing the timeout for producers, i.e., after how many + /// heartbeats of not receiving any message do we assume the producer no + /// longer exists. + tick_interval_type connection_timeout_factor_ = 4; + }; +}; + +} // namespace broker::detail diff --git a/include/broker/detail/clone_actor.hh b/include/broker/detail/clone_actor.hh index 594e5769..f4167eec 100644 --- a/include/broker/detail/clone_actor.hh +++ b/include/broker/detail/clone_actor.hh @@ -11,86 +11,129 @@ #include "broker/data.hh" #include "broker/detail/store_actor.hh" #include "broker/endpoint.hh" +#include "broker/entity_id.hh" #include "broker/internal_command.hh" -#include "broker/publisher_id.hh" #include "broker/topic.hh" -namespace broker { -namespace detail { +namespace broker::detail { class clone_state : public store_actor_state { public: + // -- member types ----------------------------------------------------------- + using super = store_actor_state; + using consumer_type = channel_type::consumer; + + struct producer_base { + /// Stores whether writes are currently disabled by the clone. This flag + /// solves a race between the members `input` and `output_ptr` by disabling + /// any output before the master completed the handshake with `input`. + /// Without this stalling, a clone might "miss" its own writes. This becomes + /// particularly problematic for `put_unique` operations if the master + /// performs these operations before attaching the clone as a consumer. + bool stalled = true; + }; + + using producer_type = channel_type::producer; + + // -- initialization --------------------------------------------------------- + + clone_state(); + /// Initializes the state. - void init(caf::event_based_actor* ptr, std::string&& nm, - caf::actor&& parent, endpoint::clock* ep_clock); + void init(caf::event_based_actor* ptr, endpoint_id this_endpoint, + std::string&& nm, caf::actor&& parent, endpoint::clock* ep_clock); /// Sends `x` to the master. void forward(internal_command&& x); - /// Wraps `x` into a `data` object and forwards it to the master. + // -- callbacks for the behavior --------------------------------------------- + + void dispatch(command_message& msg); + + void tick(); + + // -- callbacks for the consumer --------------------------------------------- + + void consume(consumer_type*, command_message& msg); + + void consume(put_command& cmd); + + void consume(put_unique_result_command& cmd); + + void consume(erase_command& cmd); + + void consume(expire_command& cmd); + + void consume(clear_command& cmd); + template - void forward_from(T& x) { - forward(make_internal_command(std::move(x))); + void consume(T& cmd) { + BROKER_ERROR("master got unexpected command:" << cmd); } - void command(internal_command::variant_type& cmd); + error consume_nil(consumer_type* src); - void command(internal_command& cmd); + void close(consumer_type* src, error); - void operator()(none); + void send(consumer_type*, channel_type::cumulative_ack); - void operator()(put_command&); + void send(consumer_type*, channel_type::nack); - void operator()(put_unique_command&); + // -- callbacks for the producer --------------------------------------------- - void operator()(erase_command&); + void send(producer_type*, const entity_id&, const channel_type::event&); - void operator()(expire_command&); + void send(producer_type*, const entity_id&, channel_type::handshake); - void operator()(add_command&); + void send(producer_type*, const entity_id&, channel_type::retransmit_failed); - void operator()(subtract_command&); + void broadcast(producer_type*, channel_type::heartbeat); - void operator()(snapshot_command&); + void broadcast(producer_type*, const channel_type::event&); - void operator()(snapshot_sync_command&); + void drop(producer_type*, const entity_id&, ec); - void operator()(set_command&); + void handshake_completed(producer_type*, const entity_id&); - void operator()(clear_command&); + // -- properties ------------------------------------------------------------- + /// Returns all keys of the store. data keys() const; - topic master_topic; + /// Returns the writer instance, lazily creating it if necessary. + producer_type& output(); - caf::actor master; + /// Sets the store content of the clone. + void set_store(std::unordered_map x); - std::unordered_map store; + /// Returns whether the clone received a handshake from the master. + bool has_master() const noexcept; - bool is_stale = true; + bool idle() const noexcept; - double stale_time = -1.0; + // -- member variables ------------------------------------------------------- - double unmutable_time = -1.0; - - std::vector mutation_buffer; + topic master_topic; - std::vector pending_remote_updates; + std::unordered_map store; - bool awaiting_snapshot = true; + consumer_type input; - bool awaiting_snapshot_sync = true; + std::unique_ptr output_ptr; - static inline constexpr const char* name = "clone_actor"; + static inline constexpr const char* name = "broker.clone"; }; -caf::behavior clone_actor(caf::stateful_actor* self, +// -- master actor ------------------------------------------------------------- + +using clone_actor_type = caf::stateful_actor; + +caf::behavior clone_actor(clone_actor_type* self, endpoint_id this_endpoint, caf::actor core, std::string id, double resync_interval, double stale_interval, double mutation_buffer_interval, endpoint::clock* ep_clock); -} // namespace detail -} // namespace broker +} // namespace broker::detail diff --git a/include/broker/detail/data_generator.hh b/include/broker/detail/data_generator.hh index 2e6f6cc9..b6d6f0f9 100644 --- a/include/broker/detail/data_generator.hh +++ b/include/broker/detail/data_generator.hh @@ -82,12 +82,16 @@ public: void shuffle(data& x); - void shuffle(vector& xs); - void shuffle(set&); void shuffle(table& xs); + template + void shuffle(std::vector& xs) { + for (auto& x : xs) + shuffle(x); + } + private: char next_char(); diff --git a/include/broker/detail/generator_file_writer.hh b/include/broker/detail/generator_file_writer.hh index 26eabce7..d046ffb6 100644 --- a/include/broker/detail/generator_file_writer.hh +++ b/include/broker/detail/generator_file_writer.hh @@ -23,7 +23,7 @@ public: struct format { static constexpr uint32_t magic = 0x2EECC0DE; - static constexpr uint8_t version = 1; + static constexpr uint8_t version = 2; static constexpr size_t header_size = sizeof(magic) + sizeof(version); diff --git a/include/broker/detail/inspect_objects.hh b/include/broker/detail/inspect_objects.hh new file mode 100644 index 00000000..90d7f92d --- /dev/null +++ b/include/broker/detail/inspect_objects.hh @@ -0,0 +1,22 @@ +#pragma once + +#include "broker/error.hh" + +#include + +namespace broker::detail { + +// Wraps object inspection and always returns an error for backwards +// compatibility with CAF 0.17 versions. +template +error inspect_objects(Inspector& f, Ts&... xs) { +#if CAF_VERSION >= 1800 + if (f.apply_objects(xs...)) + return {}; + return f.get_error(); +#else + return f(xs...); +#endif +} + +} // namespace broker::detail diff --git a/include/broker/detail/is_legacy_inspector.hh b/include/broker/detail/is_legacy_inspector.hh new file mode 100644 index 00000000..8500547a --- /dev/null +++ b/include/broker/detail/is_legacy_inspector.hh @@ -0,0 +1,16 @@ +#pragma once + +// TODO: compatibility header for CAF < 0.18. Drop when setting the minimum +// required CAF version to ≥ 0.18. + +#include + +namespace broker::detail { + +/// Evaulates to `true` if the `Inspector` uses the CAF inspection API prior to +/// CAF 0.18, `false` otherwise. +template +constexpr bool is_legacy_inspector + = !std::is_same::value; + +} // namespace broker::detail diff --git a/include/broker/detail/iterator_range.hh b/include/broker/detail/iterator_range.hh new file mode 100644 index 00000000..5096e7da --- /dev/null +++ b/include/broker/detail/iterator_range.hh @@ -0,0 +1,95 @@ +#pragma once + +#include +#include + +namespace broker::detail { + +/// A lightweight range abstraction using a pair of iterators. +template +class iterator_range { +public: + using trait = std::iterator_traits; + + using iterator = Iterator; + + using value_type = typename trait::value_type; + + iterator_range(iterator first, iterator last) : begin_(first), end_(last) { + // nop + } + + iterator_range(const iterator_range&) = default; + + iterator_range& operator=(const iterator_range&) = default; + + iterator begin() const { + return begin_; + } + + iterator end() const { + return end_; + } + + bool empty() const { + return begin_ == end_; + } + +private: + iterator begin_; + iterator end_; +}; + +/// Convenience function for creating an iterator range from a pair of +/// iterators. +/// @relates iterator_range +template +auto make_iterator_range(Iterator begin, Iterator end) { + return iterator_range(begin, end); +} + +/// Convenience function for creating an iterator range from a pair of +/// iterators. +/// @relates iterator_range +template +auto make_iterator_range(const Container& xs) { + return make_iterator_range(xs.begin(), xs.end()); +} + +/// @relates iterator_range +template +bool operator==(iterator_range xs, iterator_range ys) { + return std::equal(xs.begin(), xs.end(), ys.begin(), ys.end()); +} + +/// @relates iterator_range +template +bool operator==(iterator_range xs, const Container& ys) { + return xs == make_iterator_range(ys); +} + +/// @relates iterator_range +template +bool operator==(const Container& xs, iterator_range ys) { + return make_iterator_range(xs) == ys; +} + +/// @relates iterator_range +template +bool operator!=(iterator_range xs, iterator_range ys) { + return !(xs == ys); +} + +/// @relates iterator_range +template +bool operator!=(iterator_range xs, const Container& ys) { + return !(xs == ys); +} + +/// @relates iterator_range +template +bool operator!=(const Container& xs, iterator_range ys) { + return !(xs == ys); +} + +} // namespace broker::detail diff --git a/include/broker/detail/lift.hh b/include/broker/detail/lift.hh index 3079fb64..13379f41 100644 --- a/include/broker/detail/lift.hh +++ b/include/broker/detail/lift.hh @@ -2,6 +2,7 @@ #include +#include "broker/error.hh" #include "broker/none.hh" namespace broker::detail { @@ -19,4 +20,31 @@ struct lift_helper { template constexpr lift_helper lift = lift_helper{}; +template +struct drop_helper { + template + auto operator()(R (U::*fun)(Ts...)) const { + return [](AtomPrefix..., Ts... xs) {}; + } +}; + +/// Deduces the signature from a message handler (prefixed with `AtomPrefix`) +/// but returns a lambda with an empty body. +template +constexpr drop_helper drop = drop_helper{}; + +template +struct reject_helper { + template + auto operator()(R (U::*fun)(Ts...)) const { + return [](AtomPrefix..., Ts... xs) -> error { return Code; }; + } +}; + +/// Deduces the signature from a message handler (prefixed with `AtomPrefix`) +/// but returns a lambda with an empty body. +template +constexpr reject_helper reject + = reject_helper{}; + } // namespace broker::detail diff --git a/include/broker/detail/map_index_iterator.hh b/include/broker/detail/map_index_iterator.hh new file mode 100644 index 00000000..13896ae4 --- /dev/null +++ b/include/broker/detail/map_index_iterator.hh @@ -0,0 +1,166 @@ +#pragma once + +#include +#include +#include + +namespace broker::detail { + +/// Maps an iterator over `std::pair` or `std::tuple` elements to an iterator +/// over the `N`th type in each element. For example, iterating over a range of +/// `std::pair` elements with `N = 0` would only visit the `int` +/// portion of each element. +template +class map_index_iterator { +public: + using base_trait = std::iterator_traits; + + static constexpr bool is_const = std::is_const< + std::remove_reference_t>::value; + + using value_type = std::tuple_element_t; + + using pointer = std::conditional_t; + + using reference + = std::conditional_t; + + using difference_type = typename base_trait::difference_type; + + using iterator_category = typename base_trait::iterator_category; + + static constexpr bool is_random_access + = std::is_same::value; + + explicit map_index_iterator(Iterator iter) : iter_(iter) { + // nop + } + + map_index_iterator(const map_index_iterator&) = default; + + map_index_iterator& operator=(const map_index_iterator&) = default; + + // -- proerties -------------------------------------------------------------- + + auto base() const { + return iter_; + } + + // -- operators -------------------------------------------------------------- + + reference operator*() { + return std::get(*iter_); + } + + map_index_iterator& operator++() { + ++iter_; + return *this; + } + + map_index_iterator operator++(int) { + return map_index_iterator{iter_++}; + } + + map_index_iterator& operator--() { + --iter_; + return *this; + } + + map_index_iterator operator--(int) { + return map_index_iterator{iter_--}; + } + + // -- conditional operators -------------------------------------------------- + + template + std::enable_if_t operator-=(difference_type n) { + iter_ -= n; + return *this; + } + + template + std::enable_if_t operator+=(difference_type n) { + iter_ += n; + return *this; + } + + template + std::enable_if_t operator[](size_t index) { + return std::get(iter_[index]); + } + + +private: + Iterator iter_; +}; + +template +using map_first_iterator = map_index_iterator; + +template +using map_second_iterator = map_index_iterator; + +template +auto map_first(Iterator iter) { + return map_index_iterator(iter); +} + +template +auto map_second(Iterator iter) { + return map_index_iterator(iter); +} + +// -- free operators ----------------------------------------------------------- + +template +bool operator==(map_index_iterator x, + map_index_iterator y) { + return x.base() == y.base(); +} + +template +bool operator!=(map_index_iterator x, + map_index_iterator y) { + return x.base() != y.base(); +} + +// -- conditional free operators ----------------------------------------------- + +template +std::enable_if_t::is_random_access, + map_index_iterator> +operator+(map_index_iterator x, + typename map_index_iterator::difference_type n) { + auto result = x; + result += n; + return result; +} + +template +std::enable_if_t::is_random_access, + map_index_iterator> +operator+(typename map_index_iterator::difference_type n, + map_index_iterator x) { + auto result = x; + result += n; + return result; +} + +template +std::enable_if_t::is_random_access, + map_index_iterator> +operator-(map_index_iterator x, + typename map_index_iterator::difference_type n) { + auto result = x; + result -= n; + return result; +} + +template +auto operator-(map_index_iterator x, + map_index_iterator y) + -> decltype(x.base() - y.base()) { + return x.base() - y.base(); +} + +} // namespace broker::detail diff --git a/include/broker/detail/master_actor.hh b/include/broker/detail/master_actor.hh index 84e02373..ec1c26d2 100644 --- a/include/broker/detail/master_actor.hh +++ b/include/broker/detail/master_actor.hh @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include @@ -10,9 +10,10 @@ #include "broker/data.hh" #include "broker/detail/store_actor.hh" #include "broker/endpoint.hh" +#include "broker/detail/abstract_backend.hh" +#include "broker/entity_id.hh" #include "broker/fwd.hh" #include "broker/internal_command.hh" -#include "broker/publisher_id.hh" #include "broker/topic.hh" namespace broker { @@ -22,68 +23,126 @@ class abstract_backend; class master_state : public store_actor_state { public: + // -- member types ----------------------------------------------------------- + using super = store_actor_state; + using producer_type = channel_type::producer; + + using consumer_type = channel_type::consumer; + /// Owning smart pointer to a backend. using backend_pointer = std::unique_ptr; + template + void broadcast(T&& cmd) { + BROKER_TRACE(BROKER_ARG(cmd)); + // Suppress message if no one is listening. + if (output.paths().empty()) + return; + auto seq = output.next_seq(); + auto msg = make_command_message( + clones_topic, internal_command{seq, id, std::forward(cmd)}); + output.produce(std::move(msg)); + } + + // -- initialization --------------------------------------------------------- + + master_state(); + /// Initializes the object. - void init(caf::event_based_actor* ptr, std::string&& nm, - backend_pointer&& bp, caf::actor&& parent, endpoint::clock* clock); + void init(caf::event_based_actor* ptr, endpoint_id this_endpoint, + std::string&& nm, backend_pointer&& bp, caf::actor&& parent, + endpoint::clock* clock); - /// Sends `x` to all clones. - void broadcast(internal_command&& x); + // -- callbacks for the behavior --------------------------------------------- - template - void broadcast_cmd_to_clones(T cmd) { - BROKER_DEBUG("broadcast" << cmd << "to" << clones.size() << "clones"); - if (!clones.empty()) - broadcast(internal_command{std::move(cmd)}); - } + void dispatch(command_message& msg); + + void tick(); void remind(timespan expiry, const data& key); void expire(data& key); - void command(internal_command& cmd); + // -- callbacks for the consumer --------------------------------------------- + + void consume(consumer_type* src, command_message& cmd); + + void consume(put_command& cmd); + + void consume(put_unique_command& cmd); + + void consume(erase_command& cmd); - void command(internal_command::variant_type& cmd); + void consume(add_command& cmd); - void operator()(none); + void consume(subtract_command& cmd); - void operator()(put_command&); + void consume(clear_command& cmd); - void operator()(put_unique_command&); + template + void consume(T& cmd) { + BROKER_ERROR("master got unexpected command:" << cmd); + } + + error consume_nil(consumer_type* src); + + void close(consumer_type* src, error); - void operator()(erase_command&); + void send(consumer_type*, channel_type::cumulative_ack); - void operator()(expire_command&); + void send(consumer_type*, channel_type::nack); - void operator()(add_command&); + // -- callbacks for the producer --------------------------------------------- - void operator()(subtract_command&); + void send(producer_type*, const entity_id&, const channel_type::event&); - void operator()(snapshot_command&); + void send(producer_type*, const entity_id&, channel_type::handshake); - void operator()(snapshot_sync_command&); + void send(producer_type*, const entity_id&, channel_type::retransmit_failed); - void operator()(set_command&); + void broadcast(producer_type*, channel_type::heartbeat); - void operator()(clear_command&); + void broadcast(producer_type*, const channel_type::event&); + void drop(producer_type*, const entity_id&, ec); + + void handshake_completed(producer_type*, const entity_id&); + + // -- properties ------------------------------------------------------------- + + bool exists(const data& key); + + bool idle() const noexcept; + + // -- member variables ------------------------------------------------------- + + /// Caches the topic for broadcasting to all clones. topic clones_topic; + /// Manages the key-value store. backend_pointer backend; - std::unordered_map clones; + /// Manages outgoing commands. + producer_type output; - bool exists(const data& key); + /// Maps senders to manager objects for incoming commands. + std::unordered_map inputs; - static inline constexpr const char* name = "master_actor"; + /// Maps senders to manager objects for incoming commands. + std::unordered_map open_handshakes; + + /// Gives this actor a recognizable name in log files. + static inline constexpr const char* name = "broker.master"; }; -caf::behavior master_actor(caf::stateful_actor* self, - caf::actor core, std::string id, +// -- master actor ------------------------------------------------------------- + +using master_actor_type = caf::stateful_actor; + +caf::behavior master_actor(master_actor_type* self, endpoint_id this_endpoint, + caf::actor core, std::string store_name, master_state::backend_pointer backend, endpoint::clock* clock); diff --git a/include/broker/detail/meta_command_writer.hh b/include/broker/detail/meta_command_writer.hh deleted file mode 100644 index dc44ac9d..00000000 --- a/include/broker/detail/meta_command_writer.hh +++ /dev/null @@ -1,49 +0,0 @@ -#pragma once - -#include - -#include - -#include "broker/detail/meta_data_writer.hh" -#include "broker/fwd.hh" - -namespace broker { -namespace detail { - -/// Writes meta information of Broker commands to a serializer. -class meta_command_writer { -public: - meta_command_writer(caf::binary_serializer& sink); - - caf::error operator()(const internal_command& x); - - caf::error operator()(const none& x); - - caf::error operator()(const put_command& x); - - caf::error operator()(const put_unique_command& x); - - caf::error operator()(const erase_command& x); - - caf::error operator()(const expire_command& x); - - caf::error operator()(const add_command& x); - - caf::error operator()(const subtract_command& x); - - caf::error operator()(const snapshot_command& x); - - caf::error operator()(const snapshot_sync_command& x); - - caf::error operator()(const set_command& x); - - caf::error operator()(const clear_command& x); - -private: - caf::error apply_tag(uint8_t tag); - - detail::meta_data_writer writer_; -}; - -} // namespace detail -} // namespace broker diff --git a/include/broker/detail/meta_data_writer.hh b/include/broker/detail/meta_data_writer.hh index 63cc22ab..58ac3357 100644 --- a/include/broker/detail/meta_data_writer.hh +++ b/include/broker/detail/meta_data_writer.hh @@ -1,88 +1,36 @@ #pragma once +#include + +#include #include #include #include +#include #include #include #include "broker/data.hh" +#include "broker/entity_id.hh" #include "broker/error.hh" -#include "broker/publisher_id.hh" +#include "broker/fwd.hh" -namespace broker { -namespace detail { +namespace broker::detail { /// Writes meta information (type and size) of Broker ::data to a serializer. class meta_data_writer { public: - meta_data_writer(caf::binary_serializer& sink); - - template - caf::error operator()(const T&) { - return apply(data_tag()); - } - - caf::error operator()(const std::pair& x) { - // Ignore the publisher ID in recording mode. - return (*this)(x.first); - } - - template - caf::error operator()(const std::pair& x) { - BROKER_TRY((*this)(x.first)); - return (*this)(x.second); - } - - caf::error operator()(const std::string& x) { - BROKER_TRY(apply(data_tag())); - return apply(x.size()); - } - - caf::error operator()(const enum_value& x) { - BROKER_TRY(apply(data_tag())); - return apply(x.name.size()); - } + static constexpr bool is_loading = false; - caf::error operator()(const set& xs) { - BROKER_TRY(apply(data_tag())); - return apply_container(xs); - } + explicit meta_data_writer(caf::binary_serializer& sink); - caf::error operator()(const table& xs) { - BROKER_TRY(apply(data_tag())); - return apply_container(xs); - } + error operator()(const data& x); - caf::error operator()(const vector& xs) { - BROKER_TRY(apply(data_tag())); - return apply_container(xs); - } - - caf::error operator()(const data& x) { - return caf::visit(*this, x); - } - - caf::binary_serializer& sink() { - return sink_; - } - - template - caf::error apply_container(const T& xs) { - BROKER_TRY(apply(xs.size())); - for (const auto& x : xs) - BROKER_TRY((*this)(x)); - return caf::none; - } + error operator()(const internal_command& x); private: - caf::error apply(data::type tag); - - caf::error apply(size_t container_size); - caf::binary_serializer& sink_; }; -} // namespace detail -} // namespace broker +} // namespace broker::detail diff --git a/include/broker/detail/monotonic_buffer_resource.hh b/include/broker/detail/monotonic_buffer_resource.hh new file mode 100644 index 00000000..41be5f14 --- /dev/null +++ b/include/broker/detail/monotonic_buffer_resource.hh @@ -0,0 +1,56 @@ +#pragma once + +#include +#include + +namespace broker::detail { + +// Drop-in replacement for std::pmr::monotonic_buffer_resource. +// TODO: drop this class once the PMR API is available on supported platforms. +class monotonic_buffer_resource { +public: + monotonic_buffer_resource() { + allocate_block(nullptr); + } + + monotonic_buffer_resource(const monotonic_buffer_resource&) = delete; + + monotonic_buffer_resource& operator=(const monotonic_buffer_resource&) + = delete; + + ~monotonic_buffer_resource() noexcept { + destroy(); + } + + // Allocates memory. + [[nodiscard]] void* allocate(size_t bytes, + size_t alignment = alignof(max_align_t)); + + // Fancy no-op. + void deallocate(void*, size_t, size_t = alignof(std::max_align_t)) { + // nop + } + +private: + struct block { + block* next; + void* bytes; + }; + + void allocate_block(block* prev_block); + + void destroy() noexcept; + + size_t remaining_ = 0; + block* current_; +}; + +// Non-standard convenience function to avoid having to implement a drop-in +// replacement for polymorphic_allocator. +template +T* new_instance(monotonic_buffer_resource& buf, Args&&... args) { + auto ptr = buf.allocate(sizeof(T), alignof(T)); + return new (ptr) T(std::forward(args)...); +} + +} // namespace broker::detail diff --git a/include/broker/detail/network_cache.hh b/include/broker/detail/network_cache.hh index e29c475a..972ac692 100644 --- a/include/broker/detail/network_cache.hh +++ b/include/broker/detail/network_cache.hh @@ -17,8 +17,7 @@ #include "broker/logger.hh" #include "broker/network_info.hh" -namespace broker { -namespace detail { +namespace broker::detail { /// Maps any number of network addresses to remote actor handles. Actors can be /// reachable under several addresses for multiple reasons. For example, @@ -28,7 +27,7 @@ class network_cache { public: network_cache(caf::event_based_actor* selfptr); - void set_use_ssl(bool use_ssl_) { use_ssl = use_ssl_; } + void set_use_ssl(bool use_ssl); /// Either returns an actor handle immediately if the entry is cached or /// queries the middleman actor and responds later via response promise. @@ -36,51 +35,59 @@ public: template void fetch(const network_info& x, OnResult f, OnError g) { + BROKER_TRACE(BROKER_ARG(x)); using namespace caf; - auto y = find(x); - if (y) { - f(*y); + if (auto result = find(x)) { + BROKER_DEBUG("found" << x << "in cache, call OnResult immediately with" + << *result); + f(*result); return; } - BROKER_INFO("initiating connection to" - << (x.address + ":" + std::to_string(x.port)) - << (use_ssl ? "(SSL)" : "(no SSL)")); - auto hdl = (use_ssl ? self->home_system().openssl_manager().actor_handle() - : self->home_system().middleman().actor_handle()); - self->request(hdl, infinite, atom::connect_v, x.address, x.port) + BROKER_DEBUG("ask middleman to establish a connection to" << x); + self->request(mm_, infinite, connect_atom_v, x.address, x.port) .then( [=](const node_id&, strong_actor_ptr& res, std::set& ifs) mutable { - if (!ifs.empty()) - g(sec::unexpected_actor_messaging_interface); - else if (res == nullptr) - g(sec::no_actor_published_at_port); - else { + if (!ifs.empty()) { + BROKER_DEBUG( + "unexpected actor messaging interface for remote core"); + error err{sec::unexpected_actor_messaging_interface}; + g(err); + } else if (res == nullptr) { + BROKER_DEBUG( + "connected to CAF node without broker endpoint at given port"); + error err{sec::no_actor_published_at_port}; + g(err); + } else { + BROKER_DEBUG("resolved" << x << "to actor handle" << res); auto hdl = actor_cast(std::move(res)); hdls_.emplace(x, hdl); addrs_.emplace(hdl, x); f(std::move(hdl)); } }, - [=](error& err) mutable { g(std::move(err)); }); + [=](error& err) mutable { + BROKER_DEBUG("middleman was unable to connect to" << x + << BROKER_ARG(err)); + g(err); + }); } template void fetch(const caf::actor& x, OnResult f, OnError g) { + BROKER_TRACE(BROKER_ARG(x)); using namespace caf; - auto y = find(x); - if (y) { - f(*y); + if (auto result = find(x)) { + BROKER_DEBUG("found" << x << "in cache, call OnResult immediately with" + << *result); + f(*result); return; } - BROKER_INFO("retrieving connection for" - << x << (use_ssl ? "(SSL)" : "(no SSL)")); - auto hdl = (use_ssl ? self->home_system().openssl_manager().actor_handle() - : self->home_system().middleman().actor_handle()); - self->request(hdl, infinite, atom::get_v, x.node()) + self->request(mm_, infinite, atom::get_v, x.node()) .then( [=](const node_id&, std::string& address, uint16_t port) mutable { network_info result{std::move(address), port}; + BROKER_DEBUG("resolved" << x << "to" << result); hdls_.emplace(result, x); addrs_.emplace(x, result); f(std::move(result)); @@ -103,17 +110,26 @@ public: /// Removes mapping for `x` and the corresponding actor handle. void remove(const network_info& x); + /// @cond PRIVATE + + void mm(caf::actor hdl) { + mm_ = hdl; + } + + /// @endcond + private: - // Parent. + /// Points to the parent. caf::event_based_actor* self; - bool use_ssl = true; - // Maps remote actor handles to network addresses. + /// Type-erased reference to the I/O or OpenSSL middleman actor. + caf::actor mm_; + + /// Maps remote actor handles to network addresses. std::unordered_map addrs_; - // Maps network addresses to remote actor handles. + /// Maps network addresses to remote actor handles. std::unordered_map hdls_; }; -} // namespace detail -} // namespace broker +} // namespace broker::detail diff --git a/include/broker/detail/overload.hh b/include/broker/detail/overload.hh new file mode 100644 index 00000000..0a766c59 --- /dev/null +++ b/include/broker/detail/overload.hh @@ -0,0 +1,32 @@ +#pragma once + +#include + +namespace broker::detail { + +template +struct overload; + +template +struct overload : F { + using F::operator(); + overload(F f) : F(f) { + // nop + } +}; + +template +struct overload : F, overload { + using F::operator(); + using overload::operator(); + overload(F f, Fs... fs) : F(f), overload(fs...) { + // nop + } +}; + +template +overload make_overload(Fs... fs) { + return {std::move(fs)...}; +} + +} // namespace broker::detail diff --git a/include/broker/detail/peer_handshake.hh b/include/broker/detail/peer_handshake.hh new file mode 100644 index 00000000..0806af0d --- /dev/null +++ b/include/broker/detail/peer_handshake.hh @@ -0,0 +1,321 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include + +#include "broker/alm/lamport_timestamp.hh" +#include "broker/detail/assert.hh" +#include "broker/detail/overload.hh" +#include "broker/error.hh" +#include "broker/filter_type.hh" +#include "broker/fwd.hh" +#include "broker/logger.hh" + +namespace broker::detail { + +/// A finite-state machine for modeling a peering handshake between two Broker +/// endpoints. +class peer_handshake { +public: + // -- nested types ----------------------------------------------------------- + + /// FSM scaffold for both implementations. + struct fsm { + // -- constants ------------------------------------------------------------ + + /// This flag signals that the originator sent + /// `(peer, init, peer_id, actor)` and is currently waiting for + /// `open_stream_msg`. + static constexpr int started = 0x01; + + /// This flag signals that the originator received the `open_stream_msg` + /// from the responder. + static constexpr int has_open_stream_msg = 0x02; + + /// This flag signals that the originator received the + /// `upstream_msg::ack_open` from the responder. + static constexpr int has_ack_open_msg = 0x04; + + /// The state after constructing the FSM. + static constexpr int init_state = 0x00; + + /// The state after the FSM completed. + static constexpr int done_state = 0x07; + + /// The state after an invalid transition. + static constexpr int fail_state = 0x10; + + // -- constructors, destructors, and assignment operators ------------------ + + explicit fsm(peer_handshake* parent) : parent(parent) { + // nop + } + + fsm(fsm&&) = default; + + fsm& operator=(fsm&&) = default; + + // -- member variables ----------------------------------------------------- + + /// Points to the handshake object. + peer_handshake* parent; + + /// Keeps track of the progress. + int state = init_state; + + // -- properties ----------------------------------------------------------- + + /// Queries whether the all three flags are present. + constexpr bool done() const { + return state == done_state; + } + + constexpr bool has_flag(int flag) const { + return (state & flag) == flag; + } + + std::string pretty_state() const; + }; + + /// Implementation for the Originator of the handshake. This FSM is a simple + /// sequence of steps: + /// + /// ~~~ + /// +---------------------------+ + /// | init | + /// +-+-------------------------+ + /// | + /// | (peer, peer_id, actor) + /// v + /// +-+-------------------------+ + /// | started | + /// +-+-------------------------+ + /// | + /// | (caf::open_stream_msg) + /// v + /// +-+-------------------------+ + /// | has_open_stream_msg | + /// +-+-------------------------+ + /// | + /// | (caf::upstream_msg::ack_open) + /// v + /// +-+-------------------------+ + /// | done | + /// +---------------------------+ + /// ~~~ + struct originator : fsm { + // -- constructors, destructors, and assignment operators ------------------ + + using fsm::fsm; + + // -- state transitions ---------------------------------------------------- + + bool start(); + + bool handle_open_stream_msg(); + + bool handle_ack_open_msg(); + }; + + /// Implementation for the Responder of the handshake. The FSM allows + /// processing of `caf::upstream_msg::ack_open` and `caf::open_stream_msg` in + /// any order. + /// + /// ~~~ + /// +---------------------------+ + /// | init | + /// +-+-------------------------+ + /// | + /// | (peer, init, peer_id, actor) + /// v + /// +-+-------------------------+ + /// | started | + /// +-+--+----------------------+ + /// | | + /// +---------------+ +------------+ + /// | | + /// | (caf::upstream_msg::ack_open) | (caf::open_stream_msg) + /// v v + /// +-+-------------------------+ +-+-------------------------+ + /// | has_ack_open_msg | | has_open_stream_msg | + /// +-+-------------------------+ +-+-------------------------+ + /// | | + /// | (caf::open_stream_msg) | (caf::upstream_msg::ack_open) + /// | | + /// +---------------+ +------------+ + /// | | + /// v v + /// +-+--+----------------------+ + /// | done | + /// +---------------------------+ + /// ~~~ + struct responder : fsm { + // -- constructors, destructors, and assignment operators ------------------ + + using fsm::fsm; + + // -- state transitions ---------------------------------------------------- + + bool start(); + + bool handle_open_stream_msg(); + + bool handle_ack_open_msg(); + + bool post_msg_action(); + }; + + // -- member types (continued) ----------------------------------------------- + + using impl_type = caf::variant; + + using input_msg_type + = caf::variant; + + // -- constructors, destructors, and assignment operators -------------------- + + explicit peer_handshake(peer_manager* owner) : owner(owner) { + // nop + } + + peer_handshake() = delete; + + peer_handshake(const peer_handshake&) = delete; + + peer_handshake& operator=(const peer_handshake&) = delete; + + // -- state transitions ------------------------------------------------------ + + /// Starts the handshake. This FSM takes on the role of the Originator. + [[nodiscard]] bool originator_start_peering(endpoint_id peer_id, + caf::actor peer_hdl, + caf::response_promise rp); + + /// Processes the open_stream_msg addressed at the originator. + [[nodiscard]] bool + originator_handle_open_stream_msg(filter_type filter, + alm::lamport_timestamp timestamp); + + /// Starts the handshake. This FSM takes on the role of the Originator. + [[nodiscard]] bool responder_start_peering(endpoint_id peer_id, + caf::actor peer_hdl); + + /// Processes the open_stream_msg addressed at the responder. + [[nodiscard]] bool + responder_handle_open_stream_msg(filter_type filter, + alm::lamport_timestamp timestamp); + + /// Processes the `ack_open` message. Unlike the other functions, the message + /// is always the same, whether Originator or Responder receive it. Hence, + /// this function internally dispatches on the implementation type of the FSM. + [[nodiscard]] bool handle_ack_open_msg(); + + // -- callbacks for the FSM implementations ---------------------------------- + + bool done_transition(); + + // -- error handling --------------------------------------------------------- + + /// Fulfills all response promises with `reason`, sets `err` to `reason` and + /// sets the FSM state to `fail_state`; + void fail(error reason); + + // -- utilities -------------------------------------------------------------- + + template + auto visit_impl(Fs... fs) { + auto overload_set = detail::make_overload(std::move(fs)...); + return caf::visit(overload_set, impl); + } + + template + auto visit_impl(Fs... fs) const { + auto overload_set = detail::make_overload(std::move(fs)...); + return caf::visit(overload_set, impl); + } + + // -- properties ------------------------------------------------------------- + + // caf::intrusive_ptr strong_this() noexcept { + // return this; + // } + + bool is_originator() const noexcept { + return caf::holds_alternative(impl); + } + + bool is_responder() const noexcept { + return caf::holds_alternative(impl); + } + + bool failed() const noexcept { + return static_cast(err); + } + + int state() const noexcept; + + auto started() const noexcept { + return state() != fsm::init_state; + } + + auto done() const noexcept { + return state() == fsm::done_state; + } + + [[nodiscard]] bool has_inbound_path() const noexcept { + return in != caf::invalid_stream_slot; + } + + [[nodiscard]] bool has_outbound_path() const noexcept { + return out != caf::invalid_stream_slot; + } + + caf::actor self_hdl(); + + std::string pretty_impl() const; + + // -- FSM management --------------------------------------------------------- + + /// Forces the implementation to `responder` if the FSM has not started yet. + bool to_responder(); + + // -- member variables ------------------------------------------------------- + + /// Pointer to the object that performs the handshake. + /// @ref broker::alm::stream_transport. + peer_manager* owner; + + /// ID of the remote endpoint. + endpoint_id remote_id; + + /// Handle to the remote core. + caf::actor remote_hdl; + + /// Topic filter of the remote endpoint. + filter_type remote_filter; + + /// Logical time at the remote peer when establishing the connection. + alm::lamport_timestamp remote_timestamp; + + error err; + + std::vector promises; + + caf::stream_slot in = caf::invalid_stream_slot; + + caf::stream_slot out = caf::invalid_stream_slot; + + impl_type impl; + + std::vector input_buffer; +}; + +} // namespace broker::detail diff --git a/include/broker/detail/retry_state.hh b/include/broker/detail/retry_state.hh index 4bae702a..c6659989 100644 --- a/include/broker/detail/retry_state.hh +++ b/include/broker/detail/retry_state.hh @@ -16,5 +16,3 @@ struct retry_state { }; } // namespace broker::detail - -CAF_ALLOW_UNSAFE_MESSAGE_TYPE(broker::detail::retry_state) diff --git a/include/broker/detail/store_actor.hh b/include/broker/detail/store_actor.hh index 724b6c82..d5a3a540 100644 --- a/include/broker/detail/store_actor.hh +++ b/include/broker/detail/store_actor.hh @@ -1,31 +1,114 @@ #pragma once +#include #include +#include +#include #include #include +#include +#include +#include "broker/defaults.hh" +#include "broker/detail/channel.hh" +#include "broker/detail/store_state.hh" #include "broker/endpoint.hh" +#include "broker/fwd.hh" +#include "broker/logger.hh" #include "broker/optional.hh" #include "broker/topic.hh" namespace broker::detail { +using local_request_key = std::pair; + +} // namespace broker::detail + +namespace std { + +template <> +struct hash { + size_t operator()(const broker::detail::local_request_key& x) const noexcept { + return caf::hash::fnv::compute(x.first, x.second); + } +}; + +} // namespace std + +namespace broker::detail { + class store_actor_state { public: + // -- member types ----------------------------------------------------------- + /// Allows us to apply this state as a visitor to internal commands. using result_type = void; + using channel_type = command_channel; + + using sequence_number_type = detail::sequence_number_type; + + using local_request_key = std::pair; + + // -- initialization --------------------------------------------------------- + /// Initializes the state. /// @pre `ptr != nullptr` /// @pre `clock != nullptr` - void init(caf::event_based_actor* self, endpoint::clock* clock, - std::string&& id, caf::actor&& core); + void init(caf::event_based_actor* self, endpoint_id this_endpoint, + endpoint::clock* clock, std::string&& id, caf::actor&& core); + + template + void init(channel_type::producer& out) { + using caf::get_or; + auto& cfg = self->config(); + out.heartbeat_interval(get_or(cfg, "broker.store.heartbeat-interval", + defaults::store::heartbeat_interval)); + out.connection_timeout_factor(get_or(cfg, "broker.store.connection-timeout", + defaults::store::connection_timeout)); + } + + template + void init(channel_type::consumer& in) { + using caf::get_or; + auto& cfg = self->config(); + auto heartbeat_interval = get_or(cfg, "broker.store.heartbeat-interval", + defaults::store::heartbeat_interval); + auto connection_timeout = get_or(cfg, "broker.store.connection-timeout", + defaults::store::connection_timeout); + auto nack_timeout = get_or(cfg, "broker.store.nack-timeout", + defaults::store::nack_timeout); + BROKER_DEBUG(BROKER_ARG(heartbeat_interval) + << BROKER_ARG(connection_timeout) << BROKER_ARG(nack_timeout)); + in.heartbeat_interval(heartbeat_interval); + in.connection_timeout_factor(connection_timeout); + in.nack_timeout(nack_timeout); + } + + template + caf::behavior make_behavior(Fs... fs) { + BROKER_TRACE(""); + return { + std::move(fs)..., + [this](atom::increment, store_state_ptr ptr) { + attached_states.emplace(std::move(ptr), size_t{0}).first->second += 1; + }, + [this](atom::decrement, store_state_ptr ptr) { + auto& xs = attached_states; + if (auto i = xs.find(ptr); i != xs.end()) + if (--(i->second) == 0) + xs.erase(i); + }, + }; + } + + // -- event signaling -------------------------------------------------------- /// Emits an `insert` event to topics::store_events subscribers. void emit_insert_event(const data& key, const data& value, const optional& expiry, - const publisher_id& publisher); + const entity_id& publisher); /// Convenience function for calling /// `emit_insert_event(msg.key, msg.value, msg.expiry)`. @@ -38,7 +121,7 @@ public: void emit_update_event(const data& key, const data& old_value, const data& new_value, const optional& expiry, - const publisher_id& publisher); + const entity_id& publisher); /// Convenience function for calling /// `emit_update_event(msg.key, old_value, msg.value, msg.expiry, @@ -49,7 +132,7 @@ public: } /// Emits an `erase` event to topics::store_events subscribers. - void emit_erase_event(const data& key, const publisher_id& publisher); + void emit_erase_event(const data& key, const entity_id& publisher); /// Convenience function for calling /// `emit_erase_event(msg.key, msg.publisher)`. @@ -59,7 +142,7 @@ public: } /// Emits an `expire` event to topics::store_events subscribers. - void emit_expire_event(const data& key, const publisher_id& publisher); + void emit_expire_event(const data& key, const entity_id& publisher); /// Convenience function for calling /// `emit_expire_event(msg.key, msg.publisher)`. @@ -68,20 +151,48 @@ public: emit_expire_event(msg.key, msg.publisher); } + // -- callbacks for the behavior --------------------------------------------- + + void on_down_msg(const caf::actor_addr& source, const error& reason); + + // -- member variables ------------------------------------------------------- + /// Points to the actor owning this state. caf::event_based_actor* self = nullptr; /// Points to the endpoint's clock. endpoint::clock* clock = nullptr; - /// Stores the ID of the store. - std::string id; + /// Caches the configuration parameter `broker.store.tick-interval`. + caf::timespan tick_interval; + + /// Stores the name, i.e., the prefix of the topic. + std::string store_name; + + /// Stores the ID of this actor when communication to other store actors. + entity_id id; /// Points the core actor of the endpoint this store belongs to. caf::actor core; /// Destination for emitted events. topic dst; + + /// Stores requests from local actors. + std::unordered_map local_requests; + + /// Stores promises to fulfill when reaching an idle state. + std::vector idle_callbacks; + + /// Strong pointers for all locally attached store objects. The stores + /// themselves only keep weak pointers to their state in order to couple the + /// validity of their state to the lifetime of their (frontend) actor. The + /// `size_t` value reflects the number of `store` objects that currently have + /// access to the stored state. + /// @note the state keeps an actor handle to this actor, but CAF breaks this + /// cycle automatically by destroying this vector when the actor + /// terminates. + std::unordered_map attached_states; }; } // namespace broker::detail diff --git a/include/broker/detail/store_state.hh b/include/broker/detail/store_state.hh new file mode 100644 index 00000000..f9802528 --- /dev/null +++ b/include/broker/detail/store_state.hh @@ -0,0 +1,63 @@ +#pragma once + +#include + +#include +#include + +#include "broker/expected.hh" +#include "broker/fwd.hh" +#include "broker/timeout.hh" + +namespace broker::detail { + +struct store_state { + std::string name; + caf::actor frontend; + caf::scoped_actor self; + request_id req_id = 1; + + store_state(std::string name, caf::actor frontend_hdl) + : name(std::move(name)), + frontend(std::move(frontend_hdl)), + self(frontend->home_system()) { + BROKER_DEBUG("created state for store" << name); + } + + ~store_state() { + BROKER_DEBUG("destroyed state for store" << name); + } + + template + expected request(Ts&&... xs) { + expected res{T{}}; + self->request(frontend, timeout::frontend, std::forward(xs)...) + .receive([&](T& x) { res = std::move(x); }, + [&](caf::error& e) { res = std::move(e); }); + return res; + } + + template + expected request_tagged(request_id tag, Ts&&... xs) { + expected res{T{}}; + self->request(frontend, timeout::frontend, std::forward(xs)...) + .receive( + [&, tag](T& x, request_id res_tag) { + if (res_tag == tag) { + res = std::move(x); + } else { + BROKER_ERROR("frontend responded with unexpected tag"); + res = make_error(caf::sec::runtime_error, "tag mismatch"); + } + }, + [&](caf::error& e) { res = std::move(e); }); + return res; + } + + template + void anon_send(Ts&&... xs) { + caf::anon_send(frontend, std::forward(xs)...); + } +}; + +} // namespace broker::detail diff --git a/include/broker/detail/unipath_manager.hh b/include/broker/detail/unipath_manager.hh index 67b04848..16d51cb7 100644 --- a/include/broker/detail/unipath_manager.hh +++ b/include/broker/detail/unipath_manager.hh @@ -2,47 +2,94 @@ #include +#include #include #include -#include "broker/detail/item_scope.hh" +#include "broker/detail/peer_handshake.hh" #include "broker/fwd.hh" +// This file contains the declaration of `unipath_manager` as well as the +// declarations of the derived types: +// +// +// +-----------------------+ +// | unipath_manager | +// +-----------+-----------+ +// ^ +// | +// +-----------------------------------------+ +// | | | +// +-----------+-----------+ | +-----------+-----------+ +// | peer_manager | | | unipath_source | +// +-----------------------+ | +-----------------------+ +// | +// +-------------+-------------+ +// | | +// +-----------+-----------+ +-----------+-----------+ +// | unipath_data_sink | | unipath_command_sink | +// +-----------------------+ +-----------------------+ + namespace broker::detail { +// -- unipath_manager ---------------------------------------------------------- + /// A stream manager with at most one inbound and at most one outbound path. /// /// Unlike CAF's regular stream managers, this manager does *not* forward data /// from its inbound paths to its outbound paths. In this design, all inbound /// paths feed into the central dispatcher. The dispatcher then pushes data into -/// the outbound paths of *all* unipath managers. Further, manager implicitly -/// filter out all items created by themselves. This is because our only use -/// case for managers with in- and outbound paths is modeling Broker peers. +/// the outbound paths. +/// /// We keep both paths to a single peer in one stream manager to model a /// bidirectional connection. Hence, forwarding from the in- to the outbound /// path would in our case send messages received from a peer back to itself. class unipath_manager : public caf::stream_manager { public: + // -- friends ---------------------------------------------------------------- + + friend class peer_manager; + friend class unipath_command_sink; + friend class unipath_data_sink; + friend class unipath_source; + + // -- member types ----------------------------------------------------------- + using super = caf::stream_manager; + /// Sum type holding one of three possible derived types of `unipath_manager`. + using derived_pointer = std::variant; + + // -- nested types ----------------------------------------------------------- + struct observer { virtual ~observer(); virtual void closing(unipath_manager*, bool, const caf::error&) = 0; virtual void downstream_connected(unipath_manager*, const caf::actor&) = 0; + virtual bool finalize_handshake(peer_manager*) = 0; + virtual void abort_handshake(peer_manager*) = 0; }; - explicit unipath_manager(central_dispatcher*, observer*); + // -- constructors, destructors, and assignment operators -------------------- + + unipath_manager() = delete; + + unipath_manager(const unipath_manager&) = delete; + + unipath_manager& operator=(const unipath_manager&) = delete; ~unipath_manager() override; - using super::handle; + // -- properties ------------------------------------------------------------- - virtual bool enqueue(const unipath_manager* source, item_scope scope, - caf::span xs) - = 0; + /// Removes the observer, thereby discarding all future events. + void unobserve() noexcept { + observer_ = nullptr; + } /// Returns the filter that this manager applies to enqueued items. - virtual filter_type filter() = 0; + [[nodiscard]] virtual filter_type filter() const = 0; /// Sets the filter that this manager applies to enqueued items. virtual void filter(filter_type) = 0; @@ -51,49 +98,68 @@ public: /// topic. [[nodiscard]] virtual bool accepts(const topic&) const noexcept = 0; - /// Causes the manager to cache incoming batches until `unblock_inputs()` gets - /// called. - virtual void block_inputs(); - - /// Release all currently blocked batches and allow processing of batches - /// again. - virtual void unblock_inputs(); - - /// Returns whether this manager currently blocks incoming batches. - virtual bool blocks_inputs(); - /// Returns the type ID of the message type accepted by this manager. - virtual caf::type_id_t message_type() const noexcept = 0; - - /// Returns whether this manager has exactly one inbound path. - bool has_inbound_path() const noexcept; + [[nodiscard]] virtual caf::type_id_t message_type_id() const noexcept = 0; - /// Returns whether this manager has exactly one outbound path. - bool has_outbound_path() const noexcept; + /// Returns the connected actor. + [[nodiscard]] caf::actor hdl() const noexcept; /// Returns the slot for the inbound path or `caf::invalid_stream_slot` if /// none exists. - caf::stream_slot inbound_path_slot() const noexcept; + [[nodiscard]] caf::stream_slot inbound_path_slot() const noexcept; /// Returns the slot for the outbound path or `caf::invalid_stream_slot` if /// none exists. - caf::stream_slot outbound_path_slot() const noexcept; + [[nodiscard]] caf::stream_slot outbound_path_slot() const noexcept; - /// Returns the connected actor. - caf::actor hdl() const noexcept; + /// Returns whether this manager has exactly one inbound path. + [[nodiscard]] bool has_inbound_path() const noexcept { + return inbound_path_slot() != caf::invalid_stream_slot; + } + + /// Returns whether this manager has exactly one outbound path. + [[nodiscard]] bool has_outbound_path() const noexcept { + return outbound_path_slot() != caf::invalid_stream_slot; + } /// Returns whether this manager has exactly one inbound and exactly one /// outbound path. - bool fully_connected() const noexcept { + [[nodiscard]] bool fully_connected() const noexcept { return has_inbound_path() && has_outbound_path(); } - /// Removes the observer, thereby discarding all future events. - void unobserve() { - observer_ = nullptr; + /// Returns whether this manager has neither an inbound nor an outbound path. + [[nodiscard]] bool unconnected() const noexcept { + return !has_inbound_path() && !has_outbound_path(); } - // -- overrides -------------------------------------------------------------- + /// Returns the dispatcher that owns this manager. + [[nodiscard]] auto dispatcher() const noexcept { + return dispatcher_; + } + + /// Returns a pointer to the actor that owns this dispatcher. + [[nodiscard]] caf::event_based_actor* this_actor() noexcept; + + /// Returns the ID for this Broker endpoint. + [[nodiscard]] endpoint_id this_endpoint() const; + + /// Returns the current filter on this Broker endpoint. + [[nodiscard]] filter_type local_filter() const; + + /// Returns the current logical time on this Broker endpoint. + [[nodiscard]] alm::lamport_timestamp local_timestamp() const noexcept; + + /// Returns whether this manager currently blocks incoming batches because the + /// handshake did not complete yet. + [[nodiscard]] virtual bool blocks_inputs() const noexcept; + + /// Returns `this` as one of the three possible derived types. + [[nodiscard]] virtual derived_pointer derived_ptr() noexcept = 0; + + // -- caf::stream_manager overrides ------------------------------------------ + + using super::handle; bool congested(const caf::inbound_path&) const noexcept override; @@ -106,46 +172,222 @@ public: void handle(caf::stream_slots, caf::upstream_msg::forced_drop&) override; protected: - void closing(bool graceful, const caf::error& reason); + virtual void closing(bool graceful, const caf::error& reason); + + void downstream_connected(caf::actor hdl); central_dispatcher* dispatcher_; - observer* observer_ = nullptr; + + observer* observer_; + +private: + unipath_manager(central_dispatcher*, observer*); // accessible to friends only }; +/// @relates unipath_manager using unipath_manager_ptr = caf::intrusive_ptr; -unipath_manager_ptr make_data_source(central_dispatcher* dispatcher); +// -- unipath_data_sink -------------------------------------------------------- + +/// Represents a @ref data_message sink. +class unipath_data_sink : public unipath_manager { +public: + // -- member types ----------------------------------------------------------- + + using super = unipath_manager; + + using message_type = data_message; + + // -- constructors, destructors, and assignment operators -------------------- + + unipath_data_sink(central_dispatcher* cd, observer* obs) : super(cd, obs) { + // nop + } + + ~unipath_data_sink() override; + + // -- properties ------------------------------------------------------------- + + [[nodiscard]] derived_pointer derived_ptr() noexcept override; -unipath_manager_ptr make_command_source(central_dispatcher* dispatcher); + // -- item processing -------------------------------------------------------- + + /// Pushes an item downstream. + virtual void enqueue(const data_message& msg) = 0; +}; + +/// @relates unipath_data_sink +using unipath_data_sink_ptr = caf::intrusive_ptr; + +/// @relates unipath_data_sink +unipath_data_sink_ptr make_unipath_data_sink(central_dispatcher* dispatcher, + filter_type filter); + +// -- unipath_command_sink ----------------------------------------------------- + +/// Represents a @ref command_message sink or source. +class unipath_command_sink : public unipath_manager { +public: + // -- member types ----------------------------------------------------------- -unipath_manager_ptr make_source(central_dispatcher* dispatcher, - caf::stream in); + using super = unipath_manager; -unipath_manager_ptr make_source(central_dispatcher* dispatcher, - caf::stream in); + using message_type = command_message; + + // -- constructors, destructors, and assignment operators -------------------- + + unipath_command_sink(central_dispatcher* cd, observer* obs) : super(cd, obs) { + // nop + } + + ~unipath_command_sink() override; + + // -- properties ------------------------------------------------------------- + + [[nodiscard]] derived_pointer derived_ptr() noexcept override; + + // -- item processing -------------------------------------------------------- + + /// Pushes an item downstream. + virtual void enqueue(const message_type& msg) = 0; +}; + +/// @relates unipath_command_sink +using unipath_command_sink_ptr = caf::intrusive_ptr; + +/// @relates unipath_command_sink +unipath_command_sink_ptr +make_unipath_command_sink(central_dispatcher* dispatcher, filter_type filter); + +// -- peer_manager ------------------------------------------------------------- + +/// Represents a bidirectional connection to a peer. +class peer_manager : public unipath_manager { +public: + // -- member types ----------------------------------------------------------- + + using super = unipath_manager; + + using message_type = node_message; + + // -- constructors, destructors, and assignment operators -------------------- + + peer_manager(central_dispatcher*, observer*); + + ~peer_manager() override; + + // -- overrides -------------------------------------------------------------- + + void closing(bool graceful, const caf::error& reason) override; + + // -- item processing -------------------------------------------------------- + + /// Pushes an item downstream. + virtual void enqueue(const message_type& msg) = 0; + + // -- properties ------------------------------------------------------------- + + /// Checks whether the Broker handshake fully completed. + [[nodiscard]] bool handshake_completed() const noexcept; + + bool blocks_inputs() const noexcept override; + + /// @pre `!blocks_inputs()` + void release_blocked_inputs(); + + /// @pre `blocks_inputs()` + virtual void add_blocked_input(caf::message msg) = 0; + + [[nodiscard]] detail::peer_handshake& handshake() noexcept { + return handshake_; + } + + [[nodiscard]] derived_pointer derived_ptr() noexcept override; + + // -- peer_handshake callbacks ----------------------------------------------- + + void handshake_failed(error reason); + + bool finalize_handshake(); + +protected: + // -- member variables ------------------------------------------------------- + + peer_handshake handshake_; + alm::lamport_timestamp remote_ts_; + +private: + virtual void unblock_inputs() = 0; +}; + +/// @relates peer_unipath +using peer_manager_ptr = caf::intrusive_ptr; + +/// @relates peer_manager +peer_manager_ptr make_peer_manager(central_dispatcher* dispatcher, + peer_manager::observer* observer); + +/// @relates peer_manager +peer_manager_ptr make_peer_manager(alm::stream_transport* transport); + +// -- unipath_source ----------------------------------------------------------- + +class unipath_source : public unipath_manager { +public: + using super = unipath_manager; + + unipath_source(central_dispatcher* dispatcher, + unipath_manager::observer* observer) + : super(dispatcher, observer), out_(this) { + // nop + } + + virtual ~unipath_source() override; + + [[nodiscard]] derived_pointer derived_ptr() noexcept override; + + filter_type filter() const override; + + void filter(filter_type) override; + + bool accepts(const topic&) const noexcept override; + + caf::downstream_manager& out() override; + + bool done() const override; + + bool idle() const noexcept override; + +protected: + caf::downstream_manager out_; + +private: + virtual void unblock_inputs() = 0; + virtual void add_blocked_input(caf::message msg) = 0; +}; -unipath_manager_ptr make_source(central_dispatcher* dispatcher, - caf::stream in); +/// @relates unipath_source +using unipath_source_ptr = caf::intrusive_ptr; -unipath_manager_ptr make_data_sink(central_dispatcher* dispatcher, - filter_type filter); +/// @relates unipath_data_source +unipath_source_ptr make_unipath_source(central_dispatcher* dispatcher, + caf::stream in); -unipath_manager_ptr make_command_sink(central_dispatcher* dispatcher, - filter_type filter); +/// @relates unipath_source +unipath_source_ptr make_unipath_source(central_dispatcher* dispatcher, + caf::stream in); -/// Peer managers always have one inbound and one outbound path. -/// @note the returned manager returns `true` for `blocks_inputs()` and Broker -/// does *not* automatically add the manager to `dispatcher`. -unipath_manager_ptr make_peer_manager(central_dispatcher* dispatcher, - unipath_manager::observer* observer); +/// @relates unipath_source +unipath_source_ptr make_unipath_source(central_dispatcher* dispatcher, + caf::stream in); } // namespace broker::detail namespace std { template <> -struct hash { - using argument_type = broker::detail::unipath_manager_ptr; +struct hash { + using argument_type = broker::detail::peer_manager_ptr; size_t operator()(const argument_type& x) const noexcept { hash f; return f(x.get()); diff --git a/include/broker/domain_options.hh b/include/broker/domain_options.hh new file mode 100644 index 00000000..5328c2a3 --- /dev/null +++ b/include/broker/domain_options.hh @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace broker { + +/// Bundles options for a Broker @ref gateway domain. +struct domain_options { + /// If `true`, configures the gateway to appear only as a sink to other + /// peers. + bool disable_forwarding = false; + + /// Stores all options to `sink`. + void save(caf::settings& sink); + + /// Loads all options from `source`. + void load(const caf::settings& source); +}; + +} // namespace broker diff --git a/include/broker/endpoint.hh b/include/broker/endpoint.hh index 3077c084..e178f52a 100644 --- a/include/broker/endpoint.hh +++ b/include/broker/endpoint.hh @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,7 @@ #include "broker/backend.hh" #include "broker/backend_options.hh" #include "broker/configuration.hh" +#include "broker/defaults.hh" #include "broker/endpoint_info.hh" #include "broker/expected.hh" #include "broker/frontend.hh" @@ -29,6 +31,7 @@ #include "broker/message.hh" #include "broker/network_info.hh" #include "broker/peer_info.hh" +#include "broker/shutdown_options.hh" #include "broker/status.hh" #include "broker/status_subscriber.hh" #include "broker/store.hh" @@ -157,6 +160,16 @@ public: return peer(info.address, info.port, info.retry); } + /// Initiates a peering with a remote endpoint. + /// @param locator Denotes the remote endpoint in notation. + /// @param retry If non-zero, seconds after which to retry if connection + /// cannot be established, or breaks. + /// @returns True if connection was successfulluy set up. + /// @note The endpoint will also receive a status message indicating + /// success or failure. + bool peer(const caf::uri& locator, + timeout::seconds retry = timeout::seconds(10)); + /// Initiates a peering with a remote endpoint, without waiting /// for the operation to complete. /// @param address The IP address of the remote endpoint. @@ -342,16 +355,44 @@ public: clock_->send_later(std::move(who), after, std::move(msg)); } + // --- setup and testing ----------------------------------------------------- + + // --await-peer-start + /// Blocks execution of the current thread until either `whom` was added to + /// the routing table and its subscription flooding reached this endpoint or a + /// timeout occurs. + /// @param whom ID of another endpoint. + /// @param timeout An optional timeout for the configuring the maximum time + /// this function may block. + /// @returns `true` if `whom` was added before the timeout, `false` otherwise. + [[nodiscard]] bool + await_peer(endpoint_id whom, timespan timeout = defaults::await_peer_timeout); + + /// Asynchronously runs `callback()` when `whom` was added to the routing + /// table and its subscription flooding reached this endpoint. + /// @param whom ID of another endpoint. + /// @param callback A function object wrapping code for asynchronous + /// execution. The argument for the callback is `true` if + /// `whom` was added before the timeout, `false` otherwise. + void await_peer(endpoint_id whom, std::function callback, + timespan timeout = defaults::await_peer_timeout); + // --await-peer-end + // --- properties ------------------------------------------------------------ /// Queries whether the endpoint waits for masters and slaves on shutdown. bool await_stores_on_shutdown() const { - return await_stores_on_shutdown_; + constexpr auto flag = shutdown_options::await_stores_on_shutdown; + return shutdown_options_.contains(flag); } /// Sets whether the endpoint waits for masters and slaves on shutdown. void await_stores_on_shutdown(bool x) { - await_stores_on_shutdown_ = x; + constexpr auto flag = shutdown_options::await_stores_on_shutdown; + if (x) + shutdown_options_.set(flag); + else + shutdown_options_.unset(flag); } bool is_shutdown() const { @@ -393,7 +434,7 @@ private: mutable caf::actor_system system_; }; caf::actor core_; - bool await_stores_on_shutdown_; + shutdown_options shutdown_options_; std::vector children_; bool destroyed_; clock* clock_; diff --git a/include/broker/endpoint_info.hh b/include/broker/endpoint_info.hh index 77e6630a..2c1b701b 100644 --- a/include/broker/endpoint_info.hh +++ b/include/broker/endpoint_info.hh @@ -1,9 +1,10 @@ #pragma once -#include - +#include #include +#include + #include "broker/convert.hh" #include "broker/fwd.hh" #include "broker/network_info.hh" @@ -11,15 +12,23 @@ namespace broker { -using caf::node_id; - /// Information about an endpoint. /// @relates endpoint struct endpoint_info { - node_id node; ///< A unique context ID per machine/process. + endpoint_id node; ///< A unique context ID per machine/process. optional network; ///< Optional network-level information. }; +/// @relates endpoint_info +inline bool operator==(const endpoint_info& x, const endpoint_info& y) { + return x.node == y.node && x.network == y.network; +} + +/// @relates endpoint_info +inline bool operator!=(const endpoint_info& x, const endpoint_info& y) { + return !(x == y); +} + /// @relates endpoint_info template bool inspect(Inspector& f, endpoint_info& x) { @@ -40,6 +49,9 @@ bool convert(const data& src, endpoint_info& dst); /// @relates endpoint_info bool convert(const endpoint_info& src, data& dst); +/// @relates endpoint_info +std::string to_string(const endpoint_info& x); + // Enable `can_convert` for `endpoint_info`. template <> struct can_convert_predicate { diff --git a/include/broker/entity_id.hh b/include/broker/entity_id.hh new file mode 100644 index 00000000..77180ad3 --- /dev/null +++ b/include/broker/entity_id.hh @@ -0,0 +1,82 @@ +#pragma once + +#include +#include +#include + +#include + +#include "broker/fwd.hh" + +namespace broker { + +/// Uniquely identifies a *publisher* in the distributed system. +struct entity_id { + /// Identifies the @ref endpoint instance that hosts the *publisher*. + endpoint_id endpoint; + + /// Identifies the local object that published a message, data store change, + /// or event. Usually, this ID belongs to a @ref publisher or @ref store + /// object. The @ref endpoint sets this ID to 0 when referring to itself, + /// e.g., when using `endpoint::publish`. + uint64_t object = 0; + + /// Returns whether this ID is valid, i.e., whether the `endpoint` member is + /// valid. + explicit operator bool() const noexcept { + return static_cast(endpoint); + } + + /// Returns an invalid ID. + static entity_id nil() noexcept { + return {caf::node_id{}, 0}; + } + + /// Converts the handle type to an entity ID. + template + static entity_id from(const Handle& hdl) { + return hdl ? entity_id{hdl->node(), hdl->id()} : nil(); + } + + /// Computes a hash value for this object. + size_t hash() const noexcept; +}; + +/// @relates entity_id +template +bool inspect(Inspector& f, entity_id& x) { + return f.object(x) + .pretty_name("entity_id") + .fields(f.field("endpoint", x.endpoint), f.field("object", x.object)); +} + +/// @relates entity_id +inline bool operator==(const entity_id& x, const entity_id& y) noexcept { + return std::tie(x.endpoint, x.object) == std::tie(y.endpoint, y.object); +} + +/// @relates entity_id +inline bool operator!=(const entity_id& x, const entity_id& y) noexcept { + return !(x == y); +} + +/// @relates entity_id +inline bool operator<(const entity_id& x, const entity_id& y) noexcept { + return std::tie(x.endpoint, x.object) < std::tie(y.endpoint, y.object); +} + +/// @relates entity_id +std::string to_string(const entity_id& x); + +} // namespace broker + +namespace std { + +template <> +struct hash { + size_t operator()(const broker::entity_id& x) const noexcept { + return x.hash(); + } +}; + +} // namespace std diff --git a/include/broker/error.hh b/include/broker/error.hh index 82bb753f..08ad71cf 100644 --- a/include/broker/error.hh +++ b/include/broker/error.hh @@ -62,8 +62,37 @@ enum class ec : uint8_t { end_of_file, /// Received an unknown type tag value. invalid_tag, + /// Received an invalid message. + invalid_message, /// Deserialized an invalid status. invalid_status, + /// Converting between two data types or formats failed. + conversion_failed, + /// Adding a consumer to a producer failed because the producer already added + /// the consumer. + consumer_exists, + /// A producer or consumer did not receive any message from a consumer within + /// the configured timeout. + connection_timeout, + /// Called a member function without satisfying its preconditions. + bad_member_function_call, + /// Attempted to use the same request_id twice. + repeated_request_id, + /// A clone ran out of sync with the master. + broken_clone, + /// Canceled an operation because the system is shutting down. + shutting_down, + /// Canceled a peering request due to invalid or inconsistent data. + invalid_peering_request, + /// Broker attempted to trigger a second handshake to a peer while the first + /// handshake did not complete. + repeated_peering_handshake_request, + /// Received an unexpected or duplicate message during endpoint handshake. + unexpected_handshake_message, + /// Handshake failed due to invalid state transitions. + invalid_handshake_state, + /// Dispatching a message failed because no path to the receiver exists. + no_path_to_peer, }; // --ec-enum-end diff --git a/include/broker/fwd.hh b/include/broker/fwd.hh index bec71d80..b5b6bfd4 100644 --- a/include/broker/fwd.hh +++ b/include/broker/fwd.hh @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -15,21 +16,27 @@ namespace broker { // -- PODs --------------------------------------------------------------------- +struct ack_clone_command; struct add_command; +struct attach_clone_command; +struct attach_writer_command; struct clear_command; +struct cumulative_ack_command; +struct domain_options; struct endpoint_info; +struct entity_id; struct enum_value; struct erase_command; struct expire_command; +struct keepalive_command; +struct nack_command; struct network_info; -struct node_message; struct none; struct peer_info; struct put_command; struct put_unique_command; -struct set_command; -struct snapshot_command; -struct snapshot_sync_command; +struct put_unique_result_command; +struct retransmit_failed_command; struct subtract_command; // -- classes ------------------------------------------------------------------ @@ -41,6 +48,7 @@ class endpoint; class internal_command; class port; class publisher; +class shutdown_options; class status; class store; class subnet; @@ -73,12 +81,43 @@ using integer = int64_t; using real = double; using request_id = uint64_t; -// -- CAF type aliases --------------------------------------------------------- +} // namespace broker + +// -- CAF type aliases (1) ----------------------------------------------------- + +namespace broker { using caf::optional; +using endpoint_id = caf::node_id; + +} // namespace broker + +// -- ALM types ---------------------------------------------------------------- + +namespace broker::alm { + +class multipath; +class multipath_group; +class multipath_node; +class peer; +class routing_table_row; +class stream_transport; + +struct lamport_timestamp; + +using routing_table = std::unordered_map; + +} // namespace broker::alm + +// -- CAF type aliases (2) ----------------------------------------------------- + +namespace broker { + using command_message = caf::cow_tuple; using data_message = caf::cow_tuple; +using endpoint_id_list = std::vector; using node_message_content = caf::variant; +using node_message = caf::cow_tuple; } // namespace broker @@ -100,11 +139,21 @@ class IdentifierUpdate; namespace broker::detail { struct retry_state; +struct store_state; class central_dispatcher; class flare_actor; class mailbox; +class peer_manager; +class unipath_command_sink; +class unipath_data_sink; class unipath_manager; +class unipath_source; + +enum class item_scope : uint8_t; + +using store_state_ptr = std::shared_ptr; +using weak_store_state_ptr = std::weak_ptr; } // namespace broker::detail @@ -145,7 +194,7 @@ static_assert(caf::has_type_id::value, static_assert(caf::has_type_id::value, "broker::timestamp != caf::timestamp"); -#define BROKER_ADD_ATOM(name) CAF_ADD_ATOM(broker, broker::atom, name) +#define BROKER_ADD_ATOM(...) CAF_ADD_ATOM(broker, broker::atom, __VA_ARGS__) #define BROKER_ADD_TYPE_ID(type) CAF_ADD_TYPE_ID(broker, type) @@ -154,12 +203,14 @@ CAF_BEGIN_TYPE_ID_BLOCK(broker, caf::first_custom_type_id) // -- atoms for generic communication ---------------------------------------- BROKER_ADD_ATOM(ack) - BROKER_ADD_ATOM(default_) + BROKER_ADD_ATOM(default_, "default") BROKER_ADD_ATOM(id) BROKER_ADD_ATOM(init) BROKER_ADD_ATOM(name) BROKER_ADD_ATOM(network) BROKER_ADD_ATOM(peer) + BROKER_ADD_ATOM(ping) + BROKER_ADD_ATOM(pong) BROKER_ADD_ATOM(read) BROKER_ADD_ATOM(retry) BROKER_ADD_ATOM(run) @@ -175,12 +226,14 @@ CAF_BEGIN_TYPE_ID_BLOCK(broker, caf::first_custom_type_id) // -- atoms for communication with stores ------------------------------------ BROKER_ADD_ATOM(attach) + BROKER_ADD_ATOM(await) BROKER_ADD_ATOM(clear) BROKER_ADD_ATOM(clone) BROKER_ADD_ATOM(decrement) BROKER_ADD_ATOM(erase) BROKER_ADD_ATOM(exists) BROKER_ADD_ATOM(expire) + BROKER_ADD_ATOM(idle) BROKER_ADD_ATOM(increment) BROKER_ADD_ATOM(keys) BROKER_ADD_ATOM(local) @@ -188,6 +241,7 @@ CAF_BEGIN_TYPE_ID_BLOCK(broker, caf::first_custom_type_id) BROKER_ADD_ATOM(mutable_check) BROKER_ADD_ATOM(resolve) BROKER_ADD_ATOM(restart) + BROKER_ADD_ATOM(revoke) BROKER_ADD_ATOM(stale_check) BROKER_ADD_ATOM(store) BROKER_ADD_ATOM(subtract) @@ -201,22 +255,32 @@ CAF_BEGIN_TYPE_ID_BLOCK(broker, caf::first_custom_type_id) // -- Broker type announcements ---------------------------------------------- + BROKER_ADD_TYPE_ID((broker::ack_clone_command)) BROKER_ADD_TYPE_ID((broker::add_command)) BROKER_ADD_TYPE_ID((broker::address)) + BROKER_ADD_TYPE_ID((broker::alm::lamport_timestamp)) + BROKER_ADD_TYPE_ID((broker::alm::multipath)) + BROKER_ADD_TYPE_ID((broker::attach_clone_command)) + BROKER_ADD_TYPE_ID((broker::attach_writer_command)) BROKER_ADD_TYPE_ID((broker::backend)) BROKER_ADD_TYPE_ID((broker::backend_options)) BROKER_ADD_TYPE_ID((broker::clear_command)) BROKER_ADD_TYPE_ID((broker::command_message)) + BROKER_ADD_TYPE_ID((broker::cumulative_ack_command)) BROKER_ADD_TYPE_ID((broker::data)) BROKER_ADD_TYPE_ID((broker::data_message)) BROKER_ADD_TYPE_ID((broker::detail::retry_state)) + BROKER_ADD_TYPE_ID((broker::detail::store_state_ptr)) BROKER_ADD_TYPE_ID((broker::ec)) BROKER_ADD_TYPE_ID((broker::endpoint_info)) + BROKER_ADD_TYPE_ID((broker::entity_id)) BROKER_ADD_TYPE_ID((broker::enum_value)) BROKER_ADD_TYPE_ID((broker::erase_command)) BROKER_ADD_TYPE_ID((broker::expire_command)) BROKER_ADD_TYPE_ID((broker::filter_type)) BROKER_ADD_TYPE_ID((broker::internal_command)) + BROKER_ADD_TYPE_ID((broker::keepalive_command)) + BROKER_ADD_TYPE_ID((broker::nack_command)) BROKER_ADD_TYPE_ID((broker::network_info)) BROKER_ADD_TYPE_ID((broker::node_message)) BROKER_ADD_TYPE_ID((broker::node_message_content)) @@ -227,12 +291,12 @@ CAF_BEGIN_TYPE_ID_BLOCK(broker, caf::first_custom_type_id) BROKER_ADD_TYPE_ID((broker::port)) BROKER_ADD_TYPE_ID((broker::put_command)) BROKER_ADD_TYPE_ID((broker::put_unique_command)) + BROKER_ADD_TYPE_ID((broker::put_unique_result_command)) + BROKER_ADD_TYPE_ID((broker::retransmit_failed_command)) BROKER_ADD_TYPE_ID((broker::sc)) BROKER_ADD_TYPE_ID((broker::set)) - BROKER_ADD_TYPE_ID((broker::set_command)) + BROKER_ADD_TYPE_ID((broker::shutdown_options)) BROKER_ADD_TYPE_ID((broker::snapshot)) - BROKER_ADD_TYPE_ID((broker::snapshot_command)) - BROKER_ADD_TYPE_ID((broker::snapshot_sync_command)) BROKER_ADD_TYPE_ID((broker::status)) BROKER_ADD_TYPE_ID((broker::subnet)) BROKER_ADD_TYPE_ID((broker::subtract_command)) @@ -246,13 +310,18 @@ CAF_BEGIN_TYPE_ID_BLOCK(broker, caf::first_custom_type_id) BROKER_ADD_TYPE_ID((caf::stream)) BROKER_ADD_TYPE_ID((caf::stream)) BROKER_ADD_TYPE_ID((caf::stream)) + BROKER_ADD_TYPE_ID((std::vector)) BROKER_ADD_TYPE_ID((std::vector)) BROKER_ADD_TYPE_ID((std::vector)) BROKER_ADD_TYPE_ID((std::vector)) BROKER_ADD_TYPE_ID((std::vector)) BROKER_ADD_TYPE_ID((std::vector)) + BROKER_ADD_TYPE_ID((std::vector)) CAF_END_TYPE_ID_BLOCK(broker) #undef BROKER_ADD_ATOM #undef BROKER_ADD_TYPE_ID + +CAF_ALLOW_UNSAFE_MESSAGE_TYPE(broker::detail::retry_state) +CAF_ALLOW_UNSAFE_MESSAGE_TYPE(broker::detail::store_state_ptr) diff --git a/include/broker/gateway.hh b/include/broker/gateway.hh new file mode 100644 index 00000000..2f73cba1 --- /dev/null +++ b/include/broker/gateway.hh @@ -0,0 +1,120 @@ +#pragma once + +#include + +#include + +#include "broker/domain_options.hh" +#include "broker/error.hh" +#include "broker/expected.hh" +#include "broker/fwd.hh" +#include "broker/timeout.hh" + +namespace broker { + +/// Partitions the global publish/subscribe layer into external and internal +/// domain. The gateway acts on behalf of all internal peers by channeling all +/// communication through itself. Peers in the internal domain are hidden in the +/// external domain and vice versa. +/// +/// The gateway forwards all messages published in one domain to the other +/// domain, but hides the original sender. The gateway appears as the source of +/// all messages it forwards from one domain to another. +/// +/// @warning The gateway assumes that peers from the external domain have no +/// peering relations with peers in the internal domain. Putting a gateway into +/// a network with alternative routing paths that bypass the gateway is going to +/// cause undefined behavior. +class gateway { +public: + // -- member types ----------------------------------------------------------- + + // -- constructors, destructors, and assignment operators -------------------- + + ~gateway(); + + gateway(gateway&&) = default; + + /// Tries to instantiate a new gateway with the default configuration. + static expected make(); + + /// Tries to instantiate a new gateway with the given configuration. + /// @param cfg Base configuration. Users can override parameters by providing + /// a `broker.conf`. + /// @param internal_adaptation Additional settings that effect only the + /// internal domain. + /// @param external_adaptation Additional settings that effect only the + /// external domain. + static expected make(configuration cfg, + domain_options internal_adaptation, + domain_options external_adaptation); + // -- setup and teardown ----------------------------------------------------- + + /// @cond PRIVATE + + /// Configures a pair of core actors in disjointed domains to forward + /// published events to each other. + static void setup(const caf::actor& internal, const caf::actor& external); + + /// @endcond + + /// Shuts down all background activity and blocks until all local subscribers + /// and publishers have terminated. + void shutdown(); + + // -- properties ------------------------------------------------------------- + + const caf::actor& internal_core() const noexcept; + + const caf::actor& external_core() const noexcept; + + const configuration& config() const noexcept; + + // -- peer management -------------------------------------------------------- + + /// Listens at a specific port to accept remote peers in the internal domain. + /// @param address The interface to listen at. If empty, listen on all + /// local interfaces. + /// @param port The port to listen locally. If 0, the endpoint selects the + /// next available free port from the OS + /// @returns The port the endpoint bound to or 0 on failure. + uint16_t listen_internal(const std::string& address = {}, uint16_t port = 0); + + /// Listens at a specific port to accept remote peers in the external domain. + /// @param address The interface to listen at. If empty, listen on all + /// local interfaces. + /// @param port The port to listen locally. If 0, the endpoint selects the + /// next available free port from the OS + /// @returns The port the endpoint bound to or 0 on failure. + uint16_t listen_external(const std::string& address = {}, uint16_t port = 0); + + /// Initiates peerings with a remote endpoints. + /// @param internal_peers List of endpoints in the internal domain. + /// @param external_peers List of endpoints in the external domain. + /// @param retry If non-zero, seconds after which to retry if connection + /// cannot be established, or breaks. + /// @returns A `map` with all failed connection attempts. + std::map peer(const std::vector& internal_peers, + const std::vector& external_peers, + timeout::seconds retry = timeout::peer); + +private: + // -- member types ----------------------------------------------------------- + + /// Opaque PIMPL type. + struct impl; + + // -- utility and helper functions ------------------------------------------- + + uint16_t listen_impl(const caf::actor& core, const std::string& address, + uint16_t port); + + // -- constructors, destructors, and assignment operators -------------------- + + gateway(std::unique_ptr&&); + + /// Pointer-to-implementation. + std::unique_ptr ptr_; +}; + +} // namespace broker diff --git a/include/broker/internal_command.hh b/include/broker/internal_command.hh index c0b8bdae..f456dbe4 100644 --- a/include/broker/internal_command.hh +++ b/include/broker/internal_command.hh @@ -8,233 +8,284 @@ #include #include "broker/data.hh" +#include "broker/detail/channel.hh" +#include "broker/entity_id.hh" #include "broker/fwd.hh" -#include "broker/publisher_id.hh" +#include "broker/snapshot.hh" #include "broker/time.hh" namespace broker { +// -- meta information --------------------------------------------------------- + +enum class command_tag { + /// Identifies commands that represent an *action* on the data store. For + /// example, adding or removing elements. + action, + /// Identifies control flow commands that a producer sends to its consumers. + producer_control, + /// Identifies control flow commands that a consumer sends to its producer. + consumer_control, +}; + +std::string to_string(command_tag); + +// -- utility for BROKER_ACTION_COMMAND ---------------------------------------- + +#define BROKER_PP_EXPAND(...) __VA_ARGS__ + +namespace detail { + +template +bool inspect_impl(Inspector& f, T& obj, caf::string_view pretty_name, + caf::string_view (&names)[N], Tuple refs, + std::index_sequence) { + static_assert(N == sizeof...(Is)); + return f.object(obj) + .pretty_name(pretty_name) + .fields(f.field(names[Is], std::get(refs))...); +} + +} // namespace detail + +// -- broadcast: operations on the key-value store such as put and erase ------- + +/// Adds a `publisher` field, tags the class as `action` command, and implements +/// an inspect overload. +#define BROKER_ACTION_COMMAND(name, field_names_pack, ...) \ + entity_id publisher; \ + static constexpr auto tag = command_tag::action; \ + template \ + friend typename Inspector::result_type inspect(Inspector& f, \ + name##_command& x) { \ + auto& [__VA_ARGS__, publisher] = x; \ + caf::string_view field_names[] = { \ + BROKER_PP_EXPAND field_names_pack, \ + "publisher", \ + }; \ + auto refs = std::forward_as_tuple(__VA_ARGS__, publisher); \ + std::make_index_sequence::value> iseq; \ + return detail::inspect_impl(f, x, #name, field_names, refs, iseq); \ + } + /// Sets a value in the key-value store. -struct put_command { +struct put_command { data key; data value; caf::optional expiry; - publisher_id publisher; + BROKER_ACTION_COMMAND(put, ("key", "value", "expiry"), key, value, expiry) }; -template -bool inspect(Inspector& f, put_command& x) { - return f.object(x).fields(f.field("key", x.key), f.field("value", x.value), - f.field("expiry", x.expiry), - f.field("publisher", x.publisher)); -} - /// Sets a value in the key-value store if its key does not already exist. struct put_unique_command { data key; data value; caf::optional expiry; - caf::actor who; + entity_id who; request_id req_id; - publisher_id publisher; + BROKER_ACTION_COMMAND(put_unique, ("key", "value", "expiry", "who", "req_id"), + key, value, expiry, who, req_id) }; -template -bool inspect(Inspector& f, put_unique_command& x) { - return f.object(x).fields(f.field("key", x.key), f.field("value", x.value), - f.field("expiry", x.expiry), f.field("who", x.who), - f.field("req_id", x.req_id), - f.field("publisher", x.publisher)); -} +/// Sets a value in the key-value store if its key does not already exist. +struct put_unique_result_command { + bool inserted; + entity_id who; + request_id req_id; + BROKER_ACTION_COMMAND(put_unique_result, ("inserted", "who", "req_id"), + inserted, who, req_id) +}; /// Removes a value in the key-value store. struct erase_command { data key; - publisher_id publisher; + BROKER_ACTION_COMMAND(erase, ("key"), key) }; -template -bool inspect(Inspector& f, erase_command& x) { - return f.object(x).fields(f.field("key", x.key), - f.field("publisher", x.publisher)); -} - /// Removes a value in the key-value store as a result of an expiration. The /// master sends this message type to the clones in order to allow them to /// differentiate between a user actively removing an entry versus the master /// removing it after expiration. struct expire_command { data key; - publisher_id publisher; + BROKER_ACTION_COMMAND(expire, ("key"), key) }; -template -bool inspect(Inspector& f, expire_command& x) { - return f.object(x).fields(f.field("key", x.key), - f.field("publisher", x.publisher)); -} - /// Adds a value to the existing value. struct add_command { data key; data value; data::type init_type; caf::optional expiry; - publisher_id publisher; + BROKER_ACTION_COMMAND(add, ("key", "value", "init_type", "expiry"), key, + value, init_type, expiry) }; -template -bool inspect(Inspector& f, add_command& x) { - return f.object(x).fields(f.field("key", x.key), f.field("value", x.value), - f.field("init_type", x.init_type), - f.field("expiry", x.expiry), - f.field("publisher", x.publisher)); -} - /// Subtracts a value to the existing value. struct subtract_command { data key; data value; caf::optional expiry; - publisher_id publisher; + BROKER_ACTION_COMMAND(subtract, ("key", "value", "expiry"), key, value, + expiry) }; -template -bool inspect(Inspector& f, subtract_command& x) { - return f.object(x).fields(f.field("key", x.key), f.field("value", x.value), - f.field("expiry", x.expiry), - f.field("publisher", x.publisher)); -} +/// Drops all values. +struct clear_command { + entity_id publisher; + static constexpr auto tag = command_tag::action; + template + friend bool inspect(Inspector& f, clear_command& x) { + return f.object(x) + .pretty_name("clear") // + .fields(f.field("publisher", x.publisher)); + } +}; -/// Causes the master to reply with a snapshot of its state. -struct snapshot_command { - caf::actor remote_core; - caf::actor remote_clone; +#undef BROKER_ACTION_COMMAND + +// -- unicast communication between clone and master --------------------------- + +/// Tags the class as `control` command, and implements an inspect overload. +#define BROKER_CONTROL_COMMAND(origin, name, field_names_pack, ...) \ + static constexpr auto tag = command_tag::origin##_control; \ + template \ + friend typename Inspector::result_type inspect(Inspector& f, \ + name##_command& x) { \ + auto& [__VA_ARGS__] = x; \ + caf::string_view field_names[] = {BROKER_PP_EXPAND field_names_pack}; \ + auto refs = std::forward_as_tuple(__VA_ARGS__); \ + std::make_index_sequence::value> iseq; \ + return detail::inspect_impl(f, x, #name, field_names, refs, iseq); \ + } + +/// Causes the master to add `remote_clone` to its list of clones. +struct attach_clone_command { + static constexpr auto tag = command_tag::consumer_control; + + template + friend bool inspect(Inspector& f, attach_clone_command& x) { + return f.object(x).pretty_name("attach_clone").fields(); + } }; -template -bool inspect(Inspector& f, snapshot_command& x) { - return f.object(x).fields(f.field("remote_core", x.remote_core), - f.field("remote_clone", x.remote_clone)); -} +/// Causes the master to add a store writer to its list of inputs. Also acts as +/// handshake for the channel. +struct attach_writer_command { + detail::sequence_number_type offset; + detail::tick_interval_type heartbeat_interval; + BROKER_CONTROL_COMMAND(producer, attach_writer, + ("offset", "heartbeat_interval"), offset, + heartbeat_interval) +}; -/// Since snapshots are sent to clones on a different channel, this allows -/// clones to coordinate the reception of snapshots with the stream of -/// updates that the master may have independently made to it. -struct snapshot_sync_command { - caf::actor remote_clone; +/// Confirms a clone and transfers the initial snapshot to a clone. +struct ack_clone_command { + detail::sequence_number_type offset; + detail::tick_interval_type heartbeat_interval; + snapshot state; + BROKER_CONTROL_COMMAND(producer, ack_clone, + ("offset", "heartbeat_interval", "state"), offset, + heartbeat_interval, state) }; -template -bool inspect(Inspector& f, snapshot_sync_command& x) { - return f.object(x).fields(f.field("remote_clone", x.remote_clone)); -} +/// Informs the receiver that the sender successfully handled all messages up to +/// a certain sequence number. +struct cumulative_ack_command { + detail::sequence_number_type seq; + BROKER_CONTROL_COMMAND(consumer, cumulative_ack, ("seq"), seq) +}; -/// Sets the full state of all receiving replicates to the included snapshot. -struct set_command { - std::unordered_map state; +/// Informs the receiver that one or more commands failed to reach the sender. +struct nack_command { + std::vector seqs; + BROKER_CONTROL_COMMAND(consumer, nack, ("seqs"), seqs) }; -template -bool inspect(Inspector& f, set_command& x) { - return f.object(x).fields(f.field("state", x.state)); -} +/// Informs all receivers that the sender is still alive. +struct keepalive_command { + detail::sequence_number_type seq; + BROKER_CONTROL_COMMAND(producer, keepalive, ("seq"), seq) +}; -/// Drops all values. -struct clear_command { - publisher_id publisher; +/// Notifies the receiver that the sender can no longer retransmit a command. +struct retransmit_failed_command { + detail::sequence_number_type seq; + BROKER_CONTROL_COMMAND(producer, retransmit_failed, ("seq"), seq) }; -template -bool inspect(Inspector& f, clear_command& x) { - return f.object(x).fields(); -} +#undef BROKER_CONTROL_COMMAND + +// -- variant setup ------------------------------------------------------------ class internal_command { public: enum class type : uint8_t { - none, put_command, put_unique_command, + put_unique_result_command, erase_command, expire_command, add_command, subtract_command, - snapshot_command, - snapshot_sync_command, - set_command, clear_command, + attach_clone_command, + attach_writer_command, + keepalive_command, + cumulative_ack_command, + nack_command, + ack_clone_command, + retransmit_failed_command, }; using variant_type - = caf::variant; + = caf::variant; - variant_type content; + detail::sequence_number_type seq; - internal_command(variant_type value); + entity_id sender; - internal_command() = default; - internal_command(internal_command&&) = default; - internal_command(const internal_command&) = default; - internal_command& operator=(internal_command&&) = default; - internal_command& operator=(const internal_command&) = default; + variant_type content; }; -template -internal_command make_internal_command(Ts&&... xs) { - return internal_command{T{std::forward(xs)...}}; -} - template bool inspect(Inspector& f, internal_command& x) { - return f.object(x).fields(f.field("content", x.content)); + return f.object(x).fields(f.field("seq", x.seq), f.field("sender", x.sender), + f.field("content", x.content)); } -namespace detail { - -template -using internal_command_tag_token - = std::integral_constant; - -template -struct internal_command_tag_oracle; - -#define INTERNAL_COMMAND_TAG_ORACLE(type_name) \ - template <> \ - struct internal_command_tag_oracle \ - : internal_command_tag_token {} - -INTERNAL_COMMAND_TAG_ORACLE(none); -INTERNAL_COMMAND_TAG_ORACLE(put_command); -INTERNAL_COMMAND_TAG_ORACLE(put_unique_command); -INTERNAL_COMMAND_TAG_ORACLE(erase_command); -INTERNAL_COMMAND_TAG_ORACLE(expire_command); -INTERNAL_COMMAND_TAG_ORACLE(add_command); -INTERNAL_COMMAND_TAG_ORACLE(subtract_command); -INTERNAL_COMMAND_TAG_ORACLE(snapshot_command); -INTERNAL_COMMAND_TAG_ORACLE(snapshot_sync_command); -INTERNAL_COMMAND_TAG_ORACLE(set_command); -INTERNAL_COMMAND_TAG_ORACLE(clear_command); - -#undef INTERNAL_COMMAND_TAG_ORACLE +} // namespace broker -} // namespace detail +namespace broker::detail { + +constexpr command_tag command_tag_by_type[] = { + put_command::tag, + put_unique_command::tag, + put_unique_result_command::tag, + erase_command::tag, + expire_command::tag, + add_command::tag, + subtract_command::tag, + clear_command::tag, + attach_clone_command::tag, + attach_writer_command::tag, + keepalive_command::tag, + cumulative_ack_command::tag, + nack_command::tag, + ack_clone_command::tag, + retransmit_failed_command::tag, +}; -/// Returns the `internal_command::type` tag for `T`. -/// @relates internal_internal_command -template -constexpr internal_command::type internal_command_tag() { - return detail::internal_command_tag_oracle::value; +inline command_tag tag_of(const internal_command& cmd) { + return command_tag_by_type[cmd.content.index()]; } -/// Returns the `internal_command::type` tag for `T` as `uint8_t`. -/// @relates internal_internal_command -template -constexpr uint8_t internal_command_uint_tag() { - return static_cast(detail::internal_command_tag_oracle::value); +inline internal_command::type type_of(const internal_command& cmd) { + return static_cast(cmd.content.index()); } -} // namespace broker +} // namespace broker::detail diff --git a/include/broker/message.hh b/include/broker/message.hh index fcc6b201..3944efbd 100644 --- a/include/broker/message.hh +++ b/include/broker/message.hh @@ -5,6 +5,7 @@ #include #include +#include "broker/alm/multipath.hh" #include "broker/data.hh" #include "broker/internal_command.hh" #include "broker/topic.hh" @@ -17,20 +18,17 @@ using data_message = caf::cow_tuple; /// A broker-internal message with topic and command. using command_message = caf::cow_tuple; -/// Value type of `node_message`. +/// A broker-internal message between two endpoints. using node_message_content = caf::variant; +/// Ordered, reliable communication channel between data stores. +using command_channel = detail::channel; + /// A message for node-to-node communication with either a user-defined data /// message or a broker-internal command messages. -struct node_message { - using value_type = node_message_content; - - /// Content of the message. - node_message_content content; - - /// Time-to-life counter. - uint16_t ttl; -}; +using node_message = caf::cow_tuple< // Fields: + node_message_content, // 0: content + alm::multipath>; // 1: path /// Returns whether `x` contains a ::node_message. inline bool is_data_message(const node_message_content& x) { @@ -39,7 +37,7 @@ inline bool is_data_message(const node_message_content& x) { /// Returns whether `x` contains a ::node_message. inline bool is_data_message(const node_message& x) { - return is_data_message(x.content); + return is_data_message(get<0>(x)); } /// Returns whether `x` contains a ::command_message. @@ -49,14 +47,7 @@ inline bool is_command_message(const node_message_content& x) { /// Returns whether `x` contains a ::command_message. inline bool is_command_message(const node_message& x) { - return is_command_message(x.content); -} - -/// @relates node_message -template -bool inspect(Inspector& f, node_message& x) { - return f.object(x).fields(f.field("content", x.content), - f.field("ttl", x.ttl)); + return is_command_message(get<0>(x)); } /// Generates a ::data_message. @@ -73,8 +64,8 @@ command_message make_command_message(Topic&& t, Command&& d) { /// Generates a ::node_message. template -node_message make_node_message(Value&& value, uint16_t ttl) { - return {std::forward(value), ttl}; +node_message make_node_message(Value&& value, alm::multipath path) { + return node_message{std::forward(value), std::move(path)}; } /// Retrieves the topic from a ::data_message. @@ -91,13 +82,12 @@ inline const topic& get_topic(const command_message& x) { inline const topic& get_topic(const node_message_content& x) { if (is_data_message(x)) return get_topic(caf::get(x)); - else - return get_topic(caf::get(x)); + return get_topic(caf::get(x)); } /// Retrieves the topic from a ::generic_message. inline const topic& get_topic(const node_message& x) { - return get_topic(x.content); + return get_topic(get<0>(x)); } /// Moves the topic out of a ::data_message. Causes `x` to make a lazy copy of @@ -124,7 +114,7 @@ inline topic&& move_topic(node_message_content& x) { /// Moves the topic out of a ::node_message. Causes `x` to make a lazy copy of /// its content if other ::node_message objects hold references to it. inline topic&& move_topic(node_message& x) { - return move_topic(x.content); + return move_topic(get<0>(x.unshared())); } /// Retrieves the data from a @ref data_message. @@ -138,36 +128,21 @@ inline data&& move_data(data_message& x) { return std::move(get<1>(x.unshared())); } -/// Unboxes the content of `x` and calls `get_data` on the nested -/// @ref data_message. -/// @pre `is_data_message(x)` -inline const data& get_data(const node_message& x) { - return get_data(get(x.content)); -} - -/// Unboxes the content of `x` and calls `move_data` on the nested -/// @ref data_message. -/// @pre `is_data_message(x)` -inline data&& move_data(node_message& x) { - return move_data(get(x.content)); -} - /// Retrieves the command content from a ::command_message. -inline const internal_command::variant_type& -get_command(const command_message& x) { - return get<1>(x).content; +inline const internal_command& get_command(const command_message& x) { + return get<1>(x); } /// Moves the command content out of a ::command_message. Causes `x` to make a /// lazy copy of its content if other ::command_message objects hold references /// to it. -inline internal_command::variant_type&& move_command(command_message& x) { - return std::move(get<1>(x.unshared()).content); +inline internal_command&& move_command(command_message& x) { + return std::move(get<1>(x.unshared())); } /// Retrieves the content from a ::data_message. inline const node_message_content& get_content(const node_message& x) { - return x.content; + return get<0>(x); } /// Force `x` to become uniquely referenced. Performs a deep-copy of the content @@ -192,7 +167,67 @@ inline void force_unshared(node_message_content& x) { /// @copydoc force_unshared inline void force_unshared(node_message& x) { - force_unshared(x.content); + x.unshared(); +} + +/// Moves the content out of a ::node_message. Causes `x` to make a lazy copy of +/// its content if other ::node_message objects hold references to it. +inline node_message_content&& move_content(node_message& x) { + return std::move(get<0>(x.unshared())); +} + +/// Retrieves the path from a ::data_message. +inline const auto& get_path(const node_message& x) { + return get<1>(x); +} + +/// Get unshared access the path field of a ::node_message. Causes `x` to make a +/// lazy copy of its content if other ::node_message objects hold references to +/// it. +inline auto& get_unshared_path(node_message& x) { + return get<1>(x.unshared()); +} + +/// Shortcut for `get(get_content(x))`. +/// @pre `is_data_message(x)` +inline const data_message& get_data_message(const node_message& x) { + return get(get_content(x)); +} + +/// Shortcut for `get(get_content(x))`. +/// @pre `is_data_message(x)` +inline const command_message& get_command_message(const node_message& x) { + return get(get_content(x)); +} + +/// Shortcut for `get(x)`. +/// @pre `is_data_message(x)` +inline const data_message& get_data_message(const node_message_content& x) { + return get(x); +} + +/// Shortcut for `get(x)`. +/// @pre `is_data_message(x)` +inline data_message& get_data_message(node_message_content& x) { + return get(x); +} + +/// Shortcut for `get(x)`. +/// @pre `is_data_message(x)` +inline const command_message& get_command_message(const node_message_content& x) { + return caf::get(x); +} + +/// Shortcut for `get(x)`. +/// @pre `is_command_message(x)` +inline command_message& get_command_message(node_message_content& x) { + return caf::get(x); +} + +/// Shortcut for `get_data(get(get_content(x)))`. +/// @pre `is_data_message(x)` +inline const data& get_data(const node_message& x) { + return get_data(get_data_message(x)); } /// Converts `msg` to a human-readable string representation. @@ -201,4 +236,160 @@ std::string to_string(const data_message& msg); /// Converts `msg` to a human-readable string representation. std::string to_string(const command_message& msg); +/// Converts `msg` to a human-readable string representation. +std::string to_string(const node_message& msg); + } // namespace broker + +// CAF ships node messages in batches. However, simply packing node messages +// into a list can result in a lot of redundant data on the wire. Chances are +// the node messages share some topics or source routing information. +// +// In order to pack data more efficiently on the wire, we specialize +// caf::inspector_access for the batch type and then pull out topics and +// multipaths. The actual payload (either broker::data or internal_command) then +// references topic and path by index and we re-assemble everything back to node +// messages during deserialization. +// +// All intermediary buffers are thread-local variables in order to reduce the +// number of heap allocations. These buffers grow to the size of the largest +// batch during runtime and then reach a state where they no longer need to +// allocate any new memory. + +namespace broker::detail { + +template +class indexed_cache { +public: + using value_type = T; + + uint32_t operator[](const T& val) { + for (size_t index = 0; index < buf_.size(); ++index) + if (buf_[index] == val) + return static_cast(index); + auto res = static_cast(buf_.size()); + buf_.emplace_back(val); + return res; + } + + const T* find(uint32_t index) { + if (index < buf_.size()) + return std::addressof(buf_[index]); + else + return nullptr; + } + + void clear() { + return buf_.clear(); + } + + template + friend bool inspect(Inspector& f, indexed_cache& x) { + return f.apply(x.buf_); + } + +private: + std::vector buf_; +}; + +using topic_cache_type = indexed_cache; + +using path_cache_type = indexed_cache; + +using content_buf_type = std::vector< + std::tuple>>; + +topic_cache_type& thread_local_topic_cache(); + +path_cache_type& thread_local_path_cache(); + +content_buf_type& thread_local_content_buf(); + +} // namespace broker::detail + +namespace caf { + +template <> +struct inspector_access> +: inspector_access_base> { + using value_type = std::vector; + + template + static bool load(Inspector& f, value_type& x) { + auto& paths = broker::detail::thread_local_path_cache(); + auto& topics = broker::detail::thread_local_topic_cache(); + auto& contents = broker::detail::thread_local_content_buf(); + auto ok = f.begin_tuple(3) // + && f.apply(paths) // + && f.apply(topics) // + && f.apply(contents) // + && f.end_tuple(); + if (ok) { + x.clear(); + for (auto& [path_index, topic_index, val] : contents) { + auto* path_ptr = paths.find(path_index); + auto* topic_ptr = topics.find(topic_index); + if (path_ptr && topic_ptr) { + if (holds_alternative(val)) { + auto& dval = get(val); + auto dmsg = make_data_message(*topic_ptr, std::move(dval)); + x.emplace_back(make_node_message(std::move(dmsg), *path_ptr)); + } else { + auto& cval = get(val); + auto cmsg = make_command_message(*topic_ptr, std::move(cval)); + x.emplace_back(make_node_message(std::move(cmsg), *path_ptr)); + } + } else { + f.emplace_error(caf::sec::load_callback_failed, + "batch re-assembly failed"); + return false; + } + } + return true; + } else { + return false; + } + } + + template + static bool save(Inspector& f, value_type& x) { + auto& paths = broker::detail::thread_local_path_cache(); + auto& topics = broker::detail::thread_local_topic_cache(); + auto& contents = broker::detail::thread_local_content_buf(); + paths.clear(); + topics.clear(); + contents.clear(); + for (auto& entry : x) { + auto& [content, path] = entry.data(); + auto path_index = paths[get_path(entry)]; + if (is_data_message(content)) { + auto& [topic, value] = get_data_message(content).data(); + auto topic_index = topics[topic]; + contents.emplace_back(topic_index, path_index, value); + } else { + auto& [topic, cmd] = get_command_message(content).data(); + auto topic_index = topics[topic]; + contents.emplace_back(topic_index, path_index, cmd); + } + } + return f.begin_tuple(3) // + && f.apply(paths) // + && f.apply(topics) // + && f.apply(contents) // + && f.end_tuple(); + } + + template + static bool apply(Inspector& f, value_type& x) { + if (!f.has_human_readable_format()) { + if constexpr (Inspector::is_loading) + return load(f, x); + else + return save(f, x); + } else { + return f.list(x); + } + } +}; + +} // namespace caf diff --git a/include/broker/mixin/connector.hh b/include/broker/mixin/connector.hh index f0649e67..42e8449a 100644 --- a/include/broker/mixin/connector.hh +++ b/include/broker/mixin/connector.hh @@ -22,44 +22,66 @@ namespace broker::mixin { /// (atom::publish, network_info addr, data_message msg) -> void /// => try_publish(addr, msg, self->make_response_promise()) /// ~~~ -template +template class connector : public Base { public: + // -- member types ----------------------------------------------------------- + using extended_base = connector; using super = Base; - using peer_id_type = typename super::peer_id_type; - - using communication_handle_type = typename Base::communication_handle_type; + // -- constructors, destructors, and assignment operators -------------------- template - explicit connector(Ts&&... xs) - : super(std::forward(xs)...), cache_(super::self()) { + explicit connector(caf::event_based_actor* self, Ts&&... xs) + : super(self, std::forward(xs)...), cache_(self) { // nop } + connector() = delete; + + connector(const connector&) = delete; + + connector& operator=(const connector&) = delete; + + // -- properties ------------------------------------------------------------- + + detail::network_cache& cache() { + return cache_; + } + + // -- lazy connection management --------------------------------------------- + void try_peering(const network_info& addr, caf::response_promise rp, uint32_t count) { - BROKER_TRACE(BROKER_ARG(count)); + BROKER_TRACE(BROKER_ARG(addr) << BROKER_ARG(count)); auto self = super::self(); // Fetch the comm. handle from the cache and with that fetch the ID from the // remote peer via direct request messages. cache_.fetch( addr, - [=](communication_handle_type hdl) mutable { - BROKER_DEBUG("lookup successful:" << BROKER_ARG(addr) - << BROKER_ARG(hdl)); - dref().start_peering(hdl.node(), hdl, std::move(rp)); + [=](caf::actor hdl) mutable { + // TODO: replace hardcoded timeout with some configuration parameter + self + ->request(hdl, std::chrono::minutes(10), atom::ping_v, this->id(), + self) + .then( + [=](atom::pong, const endpoint_id& remote_id, + [[maybe_unused]] caf::actor hdl2) { + BROKER_ASSERT(hdl == hdl2); + this->start_peering(remote_id, hdl, rp); + }, + [=](error& err) mutable { rp.deliver(std::move(err)); }); }, [=](error err) mutable { - BROKER_DEBUG("lookup failed:" << BROKER_ARG(addr) << BROKER_ARG(err)); - dref().peer_unavailable(addr); - if (addr.retry.count() == 0 && ++count < 10) { + this->peer_unavailable(addr); + ++count; // Tracked, but currently unused; could implement max. count. + if (addr.retry.count() == 0) { rp.deliver(std::move(err)); } else { self->delayed_send(self, addr.retry, - detail::retry_state{addr, std::move(rp), count}); + detail::retry_state{addr, rp, count}); } }); } @@ -67,68 +89,127 @@ public: void try_publish(const network_info& addr, data_message& msg, caf::response_promise rp) { auto self = super::self(); + auto deliver_err = [=](error err) mutable { rp.deliver(std::move(err)); }; cache_.fetch( addr, - [=, msg{std::move(msg)}](communication_handle_type hdl) mutable { - dref().ship(msg, hdl); - rp.deliver(caf::unit); + [=, msg{std::move(msg)}](caf::actor hdl) mutable { + if (auto i = ids_.find(hdl); i != ids_.end()) { + if (this->dispatch_to(msg, i->second)) { + rp.deliver(); + } else { + auto err = make_error(ec::no_path_to_peer, to_string(addr)); + rp.deliver(std::move(err)); + } + } else { + // TODO: replace infinite with some useful default / config parameter + self->request(hdl, caf::infinite, atom::get_v, atom::id_v) + .then( + [=, msg{std::move(msg)}](const endpoint_id& remote_id) mutable { + ids_.emplace(hdl, remote_id); + if (this->dispatch_to(msg, i->second)) { + rp.deliver(); + } else { + auto err = make_error(ec::no_path_to_peer, to_string(addr)); + rp.deliver(std::move(err)); + } + }, + deliver_err); + } }, - [=](error err) mutable { rp.deliver(std::move(err)); }); + deliver_err); } - void peer_disconnected(const peer_id_type& peer_id, - const communication_handle_type& hdl, - const error& reason) { - if (!dref().shutting_down()) { - auto x = cache_.find(hdl); - if (x && x->retry != timeout::seconds(0)) { - cache_.remove(hdl); - BROKER_INFO("will try reconnecting to" << *x << "in" - << to_string(x->retry)); - auto self = super::self(); - self->delayed_send(self, x->retry, atom::peer_v, atom::retry_v, *x); - } - } - super::peer_disconnected(peer_id, hdl, reason); - } - - template - caf::behavior make_behavior(Fs... fs) { - using detail::lift; - auto& d = dref(); - return super::make_behavior( - std::move(fs)..., + caf::behavior make_behavior() override { + return caf::message_handler{ [=](atom::peer, const network_info& addr) { - dref().try_peering(addr, super::self()->make_response_promise(), 0); - }, - [=](atom::peer, atom::retry, network_info& addr) { - dref().try_peering(addr, caf::response_promise{}, 0); - }, - [=](detail::retry_state& st) { - dref().try_peering(st.addr, std::move(st.rp), st.count); + this->try_peering(addr, super::self()->make_response_promise(), 0); }, [=](atom::publish, const network_info& addr, data_message& msg) { - dref().try_publish(addr, msg, super::self()->make_response_promise()); + this->try_publish(addr, msg, super::self()->make_response_promise()); }, [=](atom::unpeer, const network_info& addr) { if (auto hdl = cache_.find(addr)) - dref().unpeer(*hdl); + this->unpeer(*hdl); else - dref().cannot_remove_peer(addr); - }); + this->cannot_remove_peer(addr); + }, + [=](detail::retry_state& x) { + this->try_peering(x.addr, std::move(x.rp), x.count); + }, + [=](atom::ping, const endpoint_id& peer_id, const caf::actor& hdl) { + // This step only exists to populate the network caches on both sides + // before starting the actual handshake. + auto rp = this->self()->make_response_promise(); + cache_.fetch( + hdl, + [this, rp](const network_info&) mutable { + rp.deliver(atom::pong_v, this->id(), this->self()); + }, + [rp](error err) mutable { rp.deliver(std::move(err)); }); + return rp; + }, + } + .or_else(super::make_behavior()); } - auto& cache() { - return cache_; + // -- overrides -------------------------------------------------------------- + + void peer_disconnected(const endpoint_id& peer_id, const caf::actor& hdl, + const error& reason) override { + // Lost network connection: try reconnecting. + BROKER_TRACE(BROKER_ARG(peer_id) << BROKER_ARG(hdl) << BROKER_ARG(reason)); + if (auto addr = cache_.find(hdl)) { + // Drop any previous state and trigger a new connection cycle. + ids_.erase(hdl); + cache_.remove(hdl); + // The naive thing to do here would be calling + // `this->try_peering(*addr, {}, 0)` to trigger the reconnect loop. + // However, `peer_disconnected` is not necessarily triggered by a + // disconnect. Broker endpoints tear down the peering relations as part of + // a regular shutdown. Hence, we must somehow delay the reconnect attempts + // until the connection actually ceased to exist. + // TODO: when fully switching to CAF 0.18, we should use the new node + // monitoring and `node_down_msg` signaling instead for a cleaner + // and ultimately more robust implementation. Using attach on the + // actor handle assumes that the down message triggers after losing + // connection to the remote endpoint actor. That's not necessarily + // the case, though. We could still see the down message before CAF + // actually shuts down the connection. Shutting down the endpoint + // actor generally comes last before tearing down a Broker process, + // so adding the 250ms delay at least makes it very unlikely that + // the connection still exists. Still, this entire block is a hack + // and we should move on to node monitoring once we no longer care + // for CAF 0.17 compatibility. + if (addr->retry.count() > 0) { + auto weak_self = this->self()->address(); + hdl->attach_functor( + [weak_self, addr{std::move(*addr)}](const caf::error& rsn) mutable { + // Trigger reconnect after 250ms when still alive and kicking. + if (auto strong_self = caf::actor_cast(weak_self)) + caf::delayed_anon_send(caf::actor(strong_self), + std::chrono::milliseconds{250}, + atom::peer_v, std::move(addr)); + }); + } + } + super::peer_disconnected(peer_id, hdl, reason); } -private: - Subtype& dref() { - return static_cast(*this); + void peer_removed(const endpoint_id& peer_id, + const caf::actor& hdl) override { + // Graceful removal by the user: remove all state associated to the peer. + BROKER_TRACE(BROKER_ARG(peer_id) << BROKER_ARG(hdl)); + ids_.erase(hdl); + cache_.remove(hdl); + super::peer_removed(peer_id, hdl); } +private: /// Associates network addresses to remote actor handles and vice versa. detail::network_cache cache_; + + /// Maps remote actor handles to peer IDs. + std::unordered_map ids_; }; } // namespace broker::mixin diff --git a/include/broker/mixin/data_store_manager.hh b/include/broker/mixin/data_store_manager.hh index b9a5664c..7b5bf553 100644 --- a/include/broker/mixin/data_store_manager.hh +++ b/include/broker/mixin/data_store_manager.hh @@ -16,11 +16,12 @@ #include "broker/endpoint.hh" #include "broker/filter_type.hh" #include "broker/logger.hh" +#include "broker/shutdown_options.hh" #include "broker/topic.hh" namespace broker::mixin { -template +template class data_store_manager : public Base { public: // --- member types ---------------------------------------------------------- @@ -36,11 +37,18 @@ public: // --- construction and destruction ------------------------------------------ template - explicit data_store_manager(endpoint::clock* clock, Ts&&... xs) - : super(std::forward(xs)...), clock_(clock) { + data_store_manager(caf::event_based_actor* self, endpoint::clock* clock, + Ts&&... xs) + : super(self, std::forward(xs)...), clock_(clock) { // nop } + data_store_manager() = delete; + + data_store_manager(const data_store_manager&) = delete; + + data_store_manager& operator=(const data_store_manager&) = delete; + // -- properties ------------------------------------------------------------- /// Returns whether a master for `name` probably exists already on one of our @@ -48,7 +56,7 @@ public: bool has_remote_master(const std::string& name) { // If we don't have a master recorded locally, we could still have a // propagated filter to a remote core hosting a master. - return dref().has_remote_subscriber(name / topics::master_suffix); + return this->has_remote_subscriber(name / topics::master_suffix); } const auto& masters() const noexcept { @@ -77,10 +85,10 @@ public: BROKER_ASSERT(ptr != nullptr); BROKER_INFO("spawning new master:" << name); auto self = super::self(); - auto ms = self->template spawn(detail::master_actor, self, - name, std::move(ptr), clock_); + auto ms = self->template spawn( + detail::master_actor, this->id(), self, name, std::move(ptr), clock_); filter_type filter{name / topics::master_suffix}; - if (auto err = dref().add_store(ms, filter)) + if (auto err = this->add_store(ms, filter)) return err; masters_.emplace(name, ms); return ms; @@ -101,12 +109,11 @@ public: return i->second; BROKER_INFO("spawning new clone:" << name); auto self = super::self(); - auto cl = self->template spawn(detail::clone_actor, self, name, - resync_interval, stale_interval, - mutation_buffer_interval, - clock_); + auto cl = self->template spawn( + detail::clone_actor, this->id(), self, name, resync_interval, + stale_interval, mutation_buffer_interval, clock_); filter_type filter{name / topics::clone_suffix}; - if (auto err = dref().add_store(cl, filter)) + if (auto err = this->add_store(cl, filter)) return err; clones_.emplace(name, cl); return cl; @@ -120,39 +127,44 @@ public: return ec::no_such_master; } - /// Instructs the master of the given store to generate a snapshot. - void snapshot(const std::string& name, caf::actor& clone) { - auto msg = make_internal_command(super::self(), - std::move(clone)); - dref().publish(make_command_message(name / topics::master_suffix, msg)); - } - /// Detaches all masters and clones by sending exit messages to the /// corresponding actors. void detach_stores() { + BROKER_TRACE(BROKER_ARG2("masters_.size()", masters_.size()) + << BROKER_ARG2("clones_.size()", clones_.size())); auto self = super::self(); auto f = [&](auto& container) { - for (auto& kvp : container) - self->send_exit(kvp.second, caf::exit_reason::user_shutdown); + for (auto& kvp : container) { + self->send_exit(kvp.second, caf::exit_reason::kill); + // TODO: re-implement graceful shutdown + // self->send_exit(kvp.second, caf::exit_reason::user_shutdown); + } container.clear(); }; f(masters_); f(clones_); } + // -- overrides -------------------------------------------------------------- + + void shutdown(shutdown_options options) override { + detach_stores(); + super::shutdown(options); + } + // -- factories -------------------------------------------------------------- - template - caf::behavior make_behavior(Fs... fs) { + caf::behavior make_behavior() override { using detail::lift; - auto& d = dref(); - return super::make_behavior( - std::move(fs)..., - lift(d, &Subtype::attach_clone), - lift(d, &Subtype::attach_master), - lift(d, &Subtype::get_master), - lift(d, &Subtype::snapshot), - lift(d, &Subtype::detach_stores), + return caf::message_handler{ + lift( + *this, &data_store_manager::attach_clone), + lift( + *this, &data_store_manager::attach_master), + lift( + *this, &data_store_manager::get_master), + lift(*this, + &data_store_manager::detach_stores), [this](atom::store, atom::master, atom::resolve, std::string& name, caf::actor& who_asked) { // TODO: get rid of the who_asked parameter and use proper @@ -163,7 +175,7 @@ public: self->send(who_asked, atom::master_v, i->second); return; } - auto peers = dref().peer_handles(); + auto peers = this->peer_handles(); if (peers.empty()) { BROKER_INFO("no peers to ask for the master"); self->send(who_asked, atom::master_v, @@ -171,19 +183,15 @@ public: return; } auto resolver - = self->template spawn(detail::master_resolver); + = self->template spawn(detail::master_resolver); self->send(resolver, std::move(peers), std::move(name), std::move(who_asked)); - }); - } - -protected: - // -- CRTP scaffold ---------------------------------------------------------- - - Subtype& dref() { - return static_cast(*this); + }, + } + .or_else(super::make_behavior()); } +private: // -- member variables ------------------------------------------------------- /// Enables manual time management by the user. diff --git a/include/broker/mixin/notifier.hh b/include/broker/mixin/notifier.hh index 59dec392..25be09f7 100644 --- a/include/broker/mixin/notifier.hh +++ b/include/broker/mixin/notifier.hh @@ -17,120 +17,136 @@ namespace broker::mixin { -template +template class notifier : public Base { public: + // -- member types ----------------------------------------------------------- + using super = Base; using extended_base = notifier; - using peer_id_type = typename super::peer_id_type; - - using communication_handle_type = typename Base::communication_handle_type; - - // The notifier embeds `endpoint_info` objects into status and error updates. - // While we keep the implementation as generic as possible, the current - // implementation `endpoint_info` prohibits any other peer ID type at the - // moment. - static_assert(std::is_same::value); + // -- constructors, destructors, and assignment operators -------------------- template - explicit notifier(Ts&&... xs) : super(std::forward(xs)...) { + explicit notifier(caf::event_based_actor* self, Ts&&... xs) + : super(self, std::forward(xs)...) { // nop } - void peer_connected(const peer_id_type& peer_id, - const communication_handle_type& hdl) { + notifier() = delete; + + notifier(const notifier&) = delete; + + notifier& operator=(const notifier&) = delete; + + // -- overrides -------------------------------------------------------------- + + void peer_discovered(const endpoint_id& peer_id) override { + BROKER_TRACE(BROKER_ARG(peer_id)); + emit(peer_id, sc_constant(), + "found a new peer in the network"); + super::peer_discovered(peer_id); + } + + void peer_connected(const endpoint_id& peer_id, + const caf::actor& hdl) override { BROKER_TRACE(BROKER_ARG(peer_id) << BROKER_ARG(hdl)); - emit(hdl, sc_constant(), "handshake successful"); + emit(peer_id, sc_constant(), "handshake successful"); super::peer_connected(peer_id, hdl); } - void peer_disconnected(const peer_id_type& peer_id, - const communication_handle_type& hdl, - const error& reason) { + void peer_disconnected(const endpoint_id& peer_id, const caf::actor& hdl, + const error& reason) override { BROKER_TRACE(BROKER_ARG(peer_id) << BROKER_ARG(hdl) << BROKER_ARG(reason)); // Calling emit() with the peer_id only trigges a network info lookup that // can stall this actor if we're already in shutdown mode. Hence, we perform // a manual cache lookup and simply omit the network information if we // cannot find a cached entry. network_info peer_addr; - if (auto addr = dref().cache().find(hdl)) + if (auto addr = this->cache().find(hdl)) peer_addr = *addr; emit(peer_id, peer_addr, sc_constant(), "lost connection to remote peer"); super::peer_disconnected(peer_id, hdl, reason); } - void peer_removed(const peer_id_type& peer_id, - const communication_handle_type& hdl) { + void peer_removed(const endpoint_id& peer_id, + const caf::actor& hdl) override { BROKER_TRACE(BROKER_ARG(peer_id) << BROKER_ARG(hdl)); - emit(hdl, sc_constant(), + emit(peer_id, sc_constant(), "removed connection to remote peer"); super::peer_removed(peer_id, hdl); } - void peer_unavailable(const peer_id_type& peer_id, - const communication_handle_type& hdl, - const error& reason) { - auto self = super::self(); - emit(hdl, ec_constant(), - "failed to complete handhsake"); - super::peer_unavailable(peer_id, hdl, reason); + void peer_unreachable(const endpoint_id& peer_id) override { + BROKER_TRACE(BROKER_ARG(peer_id)); + emit(peer_id, sc_constant(), + "lost the last path"); + super::peer_unreachable(peer_id); } - void peer_unavailable(const network_info& addr) { + void peer_unavailable(const network_info& addr) override { + BROKER_TRACE(BROKER_ARG(addr)); auto self = super::self(); - emit(addr, ec_constant(), + emit({}, addr, ec_constant(), "unable to connect to remote peer"); } - void cannot_remove_peer(const network_info& addr) { - BROKER_TRACE(BROKER_ARG(addr)); - emit(addr, ec_constant(), - "cannot unpeer from unknown peer"); - super::cannot_remove_peer(addr); + void cannot_remove_peer(const endpoint_id& x) override { + cannot_remove_peer_impl(x); } - void cannot_remove_peer(const peer_id_type& peer_id, - const communication_handle_type& hdl) { - BROKER_TRACE(BROKER_ARG(hdl)); - if (hdl) - emit(hdl, ec_constant(), - "cannot unpeer from unknown peer"); - super::cannot_remove_peer(peer_id, hdl); + void cannot_remove_peer(const caf::actor& x) override { + cannot_remove_peer_impl(x); } - void disable_notifications() { - BROKER_TRACE(""); - disable_notifications_ = true; + void cannot_remove_peer(const network_info& x) override { + cannot_remove_peer_impl(x); } - template - caf::behavior make_behavior(Fs... fs) { - using detail::lift; - auto& d = dref(); - return super::make_behavior( - fs..., lift(d, &Subtype::disable_notifications)); + // -- initialization --------------------------------------------------------- + + caf::behavior make_behavior() override { + return caf::message_handler{ + [this](atom::no_events) { + BROKER_DEBUG("disable notifications"); + disable_notifications_ = true; + }, + [this](atom::publish, endpoint_info& receiver, data_message& msg) { + // TODO: implement me + // this->ship(msg, receiver.node); + }, + [](atom::add, atom::status, const caf::actor&) { + // TODO: this handler exists only for backwards-compatibility. It used + // to register status subscribers for synchronization. Eventually, + // we should either re-implement the synchronization if necessary + // or remove this handler. + }, + } + .or_else(super::make_behavior()); } private: - auto& dref() { - return *static_cast(this); + template + void cannot_remove_peer_impl(const T& x) { + BROKER_TRACE(BROKER_ARG(x)); + emit(x, ec_constant(), "cannot unpeer from unknown peer"); + super::cannot_remove_peer(x); } void emit(const status& stat) { auto dmsg = make_data_message(topics::statuses, get_as(stat)); - dref().ship_locally(std::move(dmsg)); + this->publish_locally(std::move(dmsg)); } void emit(const error& err) { auto dmsg = make_data_message(topics::errors, get_as(err)); - dref().ship_locally(std::move(dmsg)); + this->publish_locally(std::move(dmsg)); } template - void emit(const peer_id_type& peer_id, const network_info& x, + void emit(const endpoint_id& peer_id, const network_info& x, std::integral_constant, const char* msg) { BROKER_INFO("emit:" << Code << x); if (disable_notifications_) @@ -154,25 +170,40 @@ private: } } - /// Reports a status or error to all status subscribers. + /// Reports an error to all status subscribers. template - void emit(const communication_handle_type& hdl, EnumConstant code, - const char* msg) { + void emit(const caf::actor& hdl, EnumConstant code, const char* msg) { + static_assert(detail::has_network_info_v); + if (disable_notifications_) + return; + auto unbox_or_default = [](auto maybe_value) { + using value_type = std::decay_t; + if (maybe_value) + return std::move(*maybe_value); + return value_type{}; + }; + emit(unbox_or_default(get_peer_id(this->tbl(), hdl)), + unbox_or_default(this->cache().find(hdl)), code, msg); + } + + template + void emit(const endpoint_id& peer_id, EnumConstant code, const char* msg) { + BROKER_TRACE(BROKER_ARG(peer_id) << BROKER_ARG2("code", EnumConstant::value) + << BROKER_ARG(msg)); if (disable_notifications_) return; using value_type = typename EnumConstant::value_type; if constexpr (detail::has_network_info_v) { - auto on_cache_hit = [=](network_info x) { emit(hdl.node(), x, code, msg); }; - auto on_cache_miss = [=](caf::error) { emit(hdl.node(), {}, code, msg); }; - if (super::self()->node() != hdl.node()) { - dref().cache().fetch(hdl, on_cache_hit, on_cache_miss); - } else { - on_cache_miss({}); - } + network_info net; + auto& tbl = this->tbl(); + if (auto i = tbl.find(peer_id); i != tbl.end() && i->second.hdl) + if (auto maybe_net = this->cache().find(i->second.hdl)) + net = std::move(*maybe_net); + emit(peer_id, net, code, msg); } else if constexpr (std::is_same::value) { - emit(status::make(hdl, msg)); + emit(status::make(peer_id, msg)); } else { - emit(make_error(EnumConstant::value, endpoint_info{hdl, nil}, msg)); + emit(make_error(EnumConstant::value, endpoint_info{peer_id, nil}, msg)); } } diff --git a/include/broker/mixin/recorder.hh b/include/broker/mixin/recorder.hh index ea38992a..448bd598 100644 --- a/include/broker/mixin/recorder.hh +++ b/include/broker/mixin/recorder.hh @@ -5,43 +5,45 @@ namespace broker::mixin { -template +template class recorder : public Base { public: + // -- member types ----------------------------------------------------------- + using super = Base; using extended_base = recorder; - using peer_id_type = typename super::peer_id_type; - - using communication_handle_type = typename super::communication_handle_type; + // -- constructors, destructors, and assignment operators -------------------- template - explicit recorder(Ts&&... xs) - : super(std::forward(xs)...), rec_(super::self()) { + explicit recorder(caf::event_based_actor* self, Ts&&... xs) + : super(self, std::forward(xs)...), rec_(super::self()) { // nop } - template - void ship(T& msg) { + recorder() = delete; + + recorder(const recorder&) = delete; + + recorder& operator=(const recorder&) = delete; + + // -- overrides -------------------------------------------------------------- + + void dispatch(node_message&& msg) override { if (rec_) rec_.try_record(msg); - super::ship(msg); - } - - void ship(data_message& msg, const communication_handle_type& receiver) { - // TODO: extend recording interface to cover direct messages - super::ship(msg, receiver); + super::dispatch(std::move(msg)); } - void subscribe(const filter_type& what) { + void subscribe(const filter_type& what) override { if (rec_) rec_.record_subscription(what); super::subscribe(what); } - void peer_connected(const peer_id_type& remote_id, - const communication_handle_type& hdl) { + void peer_connected(const endpoint_id& remote_id, + const caf::actor& hdl) override { if (rec_) rec_.record_peer(remote_id); super::peer_connected(remote_id, hdl); diff --git a/include/broker/publisher.hh b/include/broker/publisher.hh index 7d583bb2..20ab2c01 100644 --- a/include/broker/publisher.hh +++ b/include/broker/publisher.hh @@ -7,6 +7,7 @@ #include #include "broker/atoms.hh" +#include "broker/entity_id.hh" #include "broker/fwd.hh" #include "broker/message.hh" @@ -109,4 +110,6 @@ private: topic topic_; }; +using publisher_id [[deprecated("use entity_id instead")]] = entity_id; + } // namespace broker diff --git a/include/broker/shutdown_options.hh b/include/broker/shutdown_options.hh new file mode 100644 index 00000000..31bb3d97 --- /dev/null +++ b/include/broker/shutdown_options.hh @@ -0,0 +1,37 @@ +#pragma once + +#include +#include + +namespace broker { + +class shutdown_options { +public: + enum flag { + await_stores_on_shutdown = 0x01, + }; + + constexpr bool contains(flag f) const noexcept { + return (flags_ & static_cast(f)) != 0; + } + + constexpr void set(flag f) noexcept { + flags_ |= static_cast(f); + } + + constexpr void unset(flag f) noexcept { + flags_ &= ~static_cast(f); + } + + template + friend auto inspect(Inspector& f, shutdown_options& x) { + return f.object(x).fields(f.field("flags", x.flags_)); + } + +private: + uint8_t flags_ = 0; +}; + +std::string to_string(shutdown_options options); + +} // namespace broker diff --git a/include/broker/status.hh b/include/broker/status.hh index f47d105f..17436b89 100644 --- a/include/broker/status.hh +++ b/include/broker/status.hh @@ -97,7 +97,7 @@ public: } template - static status make(node_id node, std::string msg) { + static status make(endpoint_id node, std::string msg) { static_assert(sc_has_endpoint_info_v); return {S, endpoint_info{std::move(node), nil}, std::move(msg)}; } @@ -134,8 +134,6 @@ public: friend bool operator==(sc x, const status& y); - friend std::string to_string(const status& s); - template friend bool inspect(Inspector& f, status& x) { auto verify = [&x] { return x.verify(); }; @@ -170,6 +168,9 @@ private: std::string message_; }; +/// @relates status +std::string to_string(const status& x); + /// @relates status template status make_status(Ts&&... xs) { @@ -231,7 +232,7 @@ private: }; /// @relates status_view -std::string to_string(status_view s); +std::string to_string(status_view sv); /// @relates status_view inline status_view make_status_view(const data& src) { diff --git a/include/broker/store.hh b/include/broker/store.hh index e8c0cfcc..99e8153b 100644 --- a/include/broker/store.hh +++ b/include/broker/store.hh @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -7,18 +8,18 @@ #include #include #include -#include #include #include "broker/api_flags.hh" #include "broker/atoms.hh" #include "broker/data.hh" +#include "broker/defaults.hh" +#include "broker/error.hh" +#include "broker/expected.hh" #include "broker/fwd.hh" #include "broker/mailbox.hh" #include "broker/message.hh" #include "broker/optional.hh" -#include "broker/error.hh" -#include "broker/expected.hh" #include "broker/status.hh" #include "broker/timeout.hh" @@ -30,8 +31,12 @@ class endpoint; /// and querying contents. class store { public: + // -- friends ---------------------------------------------------------------- + friend class endpoint; + // -- member types ----------------------------------------------------------- + using stream_type = caf::stream; /// A response to a lookup request issued by a ::proxy. @@ -49,7 +54,7 @@ public: /// @param s The store to create a proxy for. explicit proxy(store& s); - /// Performs a request to check existance of a value. + /// Performs a request to check existence of a value. /// @returns A unique identifier for this request to correlate it with a /// response. request_id exists(data key); @@ -94,7 +99,7 @@ public: std::vector receive(size_t n); /// Returns a globally unique identifier for the frontend actor. - publisher_id frontend_id() const noexcept { + entity_id frontend_id() const noexcept { return {frontend_.node(), frontend_.id()}; } @@ -104,14 +109,26 @@ public: caf::actor proxy_; }; - /// Default-constructs an uninitialized store. - store() = default; + // -- constructors, destructors, and assignment operators -------------------- + + store(); + + store(store&&); + + store(const store&); + + store& operator=(store&&); + + store& operator=(const store&); + + ~store(); // --- inspectors ----------------------------------------------------------- /// Retrieves the name of the store. - /// @returns The store name. - const std::string& name() const; + /// @returns A copy of the store name or an empty string when calling this + /// function on an invalid object. + std::string name() const; /// Checks whether a key exists in the store. /// @returns A boolean that's if the key exists. @@ -128,7 +145,7 @@ public: /// @param expiry An optional expiration time for *key*. /// @returns A true data value if inserted or false if key already existed. expected put_unique(data key, data value, - optional expiry = {}) const; + optional expiry = {}); /// For containers values, retrieves a specific index from the value. This /// is supported for sets, tables, and vectors. @@ -142,15 +159,26 @@ public: /// Retrieves a copy of the store's current keys, returned as a set. expected keys() const; - /// Retrieves the frontend. - inline const caf::actor& frontend() const { - return frontend_; + /// Returns whether the store was fully initialized + bool initialized() const noexcept; + + /// Returns whether the store was fully initialized + explicit operator bool() const noexcept { + return initialized(); } + /// Retrieves the frontend. + /// @pre `initialized()` + caf::actor frontend() const; + /// Returns a globally unique identifier for the frontend actor. - publisher_id frontend_id() const noexcept { - return {frontend_.node(), frontend_.id()}; - } + entity_id frontend_id() const; + + /// Returns the topic for sending messages to the master. + caf::actor self_hdl() const; + + /// Returns the topic for sending messages to the master. + entity_id self_id() const; // --- modifiers ----------------------------------------------------------- @@ -158,21 +186,21 @@ public: /// @param key The key of the key-value pair. /// @param value The value of the key-value pair. /// @param expiry An optional expiration time for *key*. - void put(data key, data value, optional expiry = {}) const; + void put(data key, data value, optional expiry = {}); /// Removes the value associated with a given key. /// @param key The key to remove from the store. - void erase(data key) const; + void erase(data key); /// Empties out the store. - void clear() const; + void clear(); /// Increments a value by a given amount. This is supported for all /// numerical types as well as for timestamps. /// @param key The key of the value to increment. /// @param value The amount to increment the value. /// @param expiry An optional new expiration time for *key*. - void increment(data key, data amount, optional expiry = {}) const { + void increment(data key, data amount, optional expiry = {}) { auto init_type = data::type::none; switch ( amount.get_type() ) { @@ -200,7 +228,7 @@ public: /// @param key The key of the value to increment. /// @param value The amount to decrement the value. /// @param expiry An optional new expiration time for *key*. - void decrement(data key, data amount, optional expiry = {}) const { + void decrement(data key, data amount, optional expiry = {}) { subtract(std::move(key), std::move(amount), std::move(expiry)); } @@ -208,7 +236,7 @@ public: /// @param key The key of the string to which to append. /// @param str The string to append. /// @param expiry An optional new expiration time for *key*. - void append(data key, data str, optional expiry = {}) const { + void append(data key, data str, optional expiry = {}) { add(std::move(key), std::move(str), data::type::string, std::move(expiry)); } @@ -216,8 +244,8 @@ public: /// @param key The key of the set into which to insert the value. /// @param index The index to insert. /// @param expiry An optional new expiration time for *key*. - void insert_into(data key, data index, optional expiry = {}) const { - add(std::move(key), std::move(index), data::type::set, std::move(expiry)); + void insert_into(data key, data index, optional expiry = {}) { + add(std::move(key), std::move(index), data::type::set, std::move(expiry)); } /// Inserts an index into a table. @@ -225,15 +253,17 @@ public: /// @param index The index to insert. /// @param value The value to associated with the inserted index. For sets, this is ignored. /// @param expiry An optional new expiration time for *key*. - void insert_into(data key, data index, data value, optional expiry = {}) const { - add(std::move(key), vector({std::move(index), std::move(value)}), data::type::table, std::move(expiry)); + void insert_into(data key, data index, data value, + optional expiry = {}) { + add(std::move(key), vector({std::move(index), std::move(value)}), + data::type::table, std::move(expiry)); } /// Removes am index from a set or table. /// @param key The key of the set/table from which to remove the value. /// @param index The index to remove. /// @param expiry An optional new expiration time for *key*. - void remove_from(data key, data index, optional expiry = {}) const { + void remove_from(data key, data index, optional expiry = {}) { subtract(std::move(key), std::move(index), std::move(expiry)); } @@ -241,17 +271,43 @@ public: /// @param key The key of the vector to which to append the value. /// @param value The value to append. /// @param expiry An optional new expiration time for *key*. - void push(data key, data value, optional expiry = {}) const { - add(std::move(key), std::move(value), data::type::vector, std::move(expiry)); + void push(data key, data value, optional expiry = {}) { + add(std::move(key), std::move(value), data::type::vector, + std::move(expiry)); } /// Removes the last value of a vector. /// @param key The key of the vector from which to remove the last value. /// @param expiry An optional new expiration time for *key*. - void pop(data key, optional expiry = {}) const { + void pop(data key, optional expiry = {}) { subtract(key, key, std::move(expiry)); } + // --await-idle-start + /// Blocks execution of the current thread until the frontend actor reached an + /// IDLE state. On a master, this means that all clones have caught up with + /// the master and have ACKed the most recent command. On a clone, this means + /// that the master has ACKed any pending put commands from this store and + /// that the clone is not waiting on any out-of-order messages from the + /// master. + /// @param timeout The maximum amount of time this function may block. + /// @returns `true` if the frontend actor responded before the timeout, + /// `false` otherwise. + [[nodiscard]] bool await_idle(timespan timeout + = defaults::store::await_idle_timeout); + + /// Asynchronously runs `callback(true)` when the frontend actor reached an + /// IDLE state or `callback(false)` if the optional timeout triggered first + /// (or in case of an error). + /// @param timeout The maximum amount of time this function may block. + /// @param callback A function object wrapping code for asynchronous + /// execution. The argument for the callback is `true` if the + /// frontend actor responded before the timeout, `false` + /// otherwise. + void await_idle(std::function callback, + timespan timeout = defaults::store::await_idle_timeout); + // --await-idle-end + /// Release any state held by the object, rendering it invalid. /// @warning Performing *any* action on this object afterwards invokes /// undefined behavior, except: @@ -273,35 +329,29 @@ private: /// @param value The value of the key-value pair. /// @param init_type The type of data to initialize when the key does not exist. /// @param expiry An optional new expiration time for *key*. - void add(data key, data value, data::type init_type, optional expiry = {}) const; + void add(data key, data value, data::type init_type, + optional expiry = {}); /// Subtracts a value from another one, with a type-specific meaning of /// "substract". This is the backend for a number of the modifiers methods. /// @param key The key of the key-value pair. /// @param value The value of the key-value pair. /// @param expiry An optional new expiration time for *key*. - void subtract(data key, data value, optional expiry = {}) const; - - template - expected request(Ts&&... xs) const { - if (!frontend_) - return make_error(ec::unspecified, "store not initialized"); - expected res{ec::unspecified}; - caf::scoped_actor self{frontend_->home_system()}; - auto msg = caf::make_message(std::forward(xs)...); - self->request(frontend_, timeout::frontend, std::move(msg)).receive( - [&](T& x) { - res = std::move(x); - }, - [&](caf::error& e) { - res = std::move(e); - } - ); - return res; - } - - caf::actor frontend_; - std::string name_; + void subtract(data key, data value, optional expiry = {}); + + // -- member variables ------------------------------------------------------- + + // If we would only consider the native C++ API, we could store a regular + // shared pointer here and rely on scoping to make sure that store objects get + // destroyed before the broker::endpoint shuts down (and takes the frontend + // actor down with it). However, Zeek scripts in particular commonly declare + // store objects as global variables. Hence, we need a way to invalidate store + // objects once the frontend actor shuts down. We achieve this by storing only + // a weak pointer to the state here and have the frontend actor keeping this + // state alive by holding on to a strong reference. Once the frontend actor + // terminates, the state becomes invalid since no other objects holds a strong + // reference to the state. + detail::weak_store_state_ptr state_; }; } // namespace broker diff --git a/include/broker/store_event.hh b/include/broker/store_event.hh index dc6e8810..a1de3a49 100644 --- a/include/broker/store_event.hh +++ b/include/broker/store_event.hh @@ -5,7 +5,7 @@ #include "broker/convert.hh" #include "broker/data.hh" -#include "broker/publisher_id.hh" +#include "broker/entity_id.hh" namespace broker { @@ -35,7 +35,7 @@ public: /// ``` /// /// Whereas the `publisher_endpoint` and the `publisher_object` encode a - /// @ref publisher_id. + /// @ref entity_id. class insert { public: insert(const insert&) noexcept = default; @@ -72,7 +72,7 @@ public: return nil; } - publisher_id publisher() const noexcept { + entity_id publisher() const noexcept { if (auto value = to((*xs_)[5])) { return {std::move(*value), get((*xs_)[6])}; } @@ -140,7 +140,7 @@ public: return nil; } - publisher_id publisher() const noexcept { + entity_id publisher() const noexcept { if (auto value = to((*xs_)[6])) { return {*value, get((*xs_)[7])}; } @@ -192,7 +192,7 @@ public: return (*xs_)[2]; } - publisher_id publisher() const noexcept { + entity_id publisher() const noexcept { if (auto value = to((*xs_)[3])) { return {*value, get((*xs_)[4])}; } @@ -244,7 +244,7 @@ public: return (*xs_)[2]; } - publisher_id publisher() const noexcept { + entity_id publisher() const noexcept { if (auto value = to((*xs_)[3])) { return {*value, get((*xs_)[4])}; } diff --git a/include/broker/subscriber_base.hh b/include/broker/subscriber_base.hh index 2a39cb7d..34dadfc6 100644 --- a/include/broker/subscriber_base.hh +++ b/include/broker/subscriber_base.hh @@ -150,14 +150,14 @@ public: /// Returns the amound of values than can be extracted immediately without /// blocking. - size_t available() const { + size_t available() const noexcept { return queue_->buffer_size(); } /// Returns a file handle for integrating this publisher into a `select` or /// `poll` loop. - int fd() const { - return queue_->fd(); + int fd() const noexcept { + return static_cast(queue_->fd()); } protected: diff --git a/src/alm/multipath.cc b/src/alm/multipath.cc new file mode 100644 index 00000000..1aca7b57 --- /dev/null +++ b/src/alm/multipath.cc @@ -0,0 +1,176 @@ +#include "broker/alm/multipath.hh" + +#include "broker/alm/routing_table.hh" + +namespace broker::alm { + +multipath_tree::multipath_tree(endpoint_id id) { + root = detail::new_instance(mem, id); +} + +multipath_tree::~multipath_tree() { + // We can simply "wink out" the tree structure, but we still need to release + // all references to node IDs. + root->shallow_delete(); +} + +multipath_group::~multipath_group() { + delete first_; +} + +bool multipath_group::equals(const multipath_group& other) const noexcept { + auto eq = [](const auto& lhs, const auto& rhs) { return lhs.equals(rhs); }; + return std::equal(begin(), end(), other.begin(), other.end(), eq); +} + +bool multipath_group::contains(const endpoint_id& id) const noexcept { + auto pred = [&id](const multipath_node& node) { return node.contains(id); }; + return std::any_of(begin(), end(), pred); +} + +template +std::pair +multipath_group::emplace_impl(const endpoint_id& id, + MakeNewNode make_new_node) { + if (size_ == 0) { + first_ = make_new_node(); + size_ = 1; + return {first_, true}; + } else { + // Insertion sorts by ID. + BROKER_ASSERT(first_ != nullptr); + if (first_->id_ == id) { + return {first_, false}; + } else if (first_->id_ > id) { + ++size_; + auto new_node = make_new_node(); + new_node->right_ = first_; + first_ = new_node; + return {new_node, true}; + } + auto pos = first_; + auto next = pos->right_; + while (next != nullptr) { + if (next->id_ == id) { + return {next, false}; + } else if (next->id_ > id) { + ++size_; + auto new_node = make_new_node(); + pos->right_ = new_node; + new_node->right_ = next; + return {new_node, true}; + } else { + pos = next; + next = next->right_; + } + } + ++size_; + auto new_node = make_new_node(); + BROKER_ASSERT(pos->right_ == nullptr); + pos->right_ = new_node; + return {new_node, true}; + } +} + +std::pair +multipath_group::emplace(detail::monotonic_buffer_resource& mem, + const endpoint_id& id) { + auto make_new_node = [&mem, &id] { + return detail::new_instance(mem, id); + }; + return emplace_impl(id, make_new_node); +} + +bool multipath_group::emplace(multipath_node* new_node) { + auto make_new_node = [new_node] { return new_node; }; + return emplace_impl(new_node->id_, make_new_node).second; +} + +void multipath_group::shallow_delete() noexcept { + for (auto& child : *this) + child.shallow_delete(); +} + +multipath_node::~multipath_node() { + delete right_; +} + +bool multipath_node::equals(const multipath_node& other) const noexcept { + return id_ == other.id_ + && is_receiver_ == other.is_receiver_ + && down_.equals(other.down_); +} + +bool multipath_node::contains(const endpoint_id& id) const noexcept { + return id_ == id || down_.contains(id); +} + +void multipath_node::stringify(std::string& buf) const { + buf += '('; + buf += to_string(id_); + if (!down_.empty()) { + buf += ", ["; + auto i = down_.begin(); + i->stringify(buf); + while (++i != down_.end()) { + buf += ", "; + i->stringify(buf); + } + buf += ']'; + } + buf += ')'; +} + +void multipath_node::shallow_delete() noexcept { + id_ = endpoint_id{}; + down_.shallow_delete(); +} + +multipath::multipath() { + tree_ = std::make_shared(endpoint_id{}); + head_ = tree_->root; +} + +multipath::multipath(const endpoint_id& id) { + tree_ = std::make_shared(id); + head_ = tree_->root; +} + +multipath::multipath(const tree_ptr& t, multipath_node* h) + : tree_(t), head_(h) { + // nop +} + +void multipath::generate(const std::vector& receivers, + const routing_table& tbl, + std::vector& routes, + std::vector& unreachables) { + auto route = [&](const endpoint_id& id) -> auto& { + for (auto& mpath : routes) + if (mpath.head().id() == id) + return mpath; + routes.emplace_back(id); + return routes.back(); + }; + for (auto& receiver : receivers) { + if (auto ptr = shortest_path(tbl, receiver)) { + auto& sp = *ptr; + BROKER_ASSERT(!sp.empty()); + route(sp[0]).splice(sp); + } else { + unreachables.emplace_back(receiver); + } + } +} + +void multipath::splice(const std::vector& path) { + BROKER_ASSERT(path.empty() || path[0] == head().id()); + if (!path.empty()) { + auto child = head_; + for (auto i = path.begin() + 1; i != path.end(); ++i) + child = child->down_.emplace(tree_->mem, *i).first; + child->is_receiver_ = true; + } +} + +} // namespace broker::alm diff --git a/src/alm/peer.cc b/src/alm/peer.cc new file mode 100644 index 00000000..d3a0e5c8 --- /dev/null +++ b/src/alm/peer.cc @@ -0,0 +1,418 @@ +#include "broker/alm/peer.hh" + +namespace broker::alm { + +// -- constructors, destructors, and assignment operators ---------------------- + +peer::peer(caf::event_based_actor* selfptr) : self_(selfptr) { + revocations_.aging_interval = defaults::path_revocations::aging_interval; + revocations_.max_age = defaults::path_revocations::max_age; + revocations_.next_aging_cycle = caf::actor_clock::time_point{}; + using caf::get_or; + auto& cfg = selfptr->system().config(); + disable_forwarding_ = get_or(cfg, "broker.disable-forwarding", false); + namespace pb = broker::defaults::path_revocations; + revocations_.aging_interval + = get_or(cfg, "broker.path-revocations.aging-interval", pb::aging_interval); + revocations_.max_age + = get_or(cfg, "broker.path-revocations.max-age", pb::max_age); + revocations_.next_aging_cycle + = selfptr->clock().now() + revocations_.aging_interval; +} + +peer::~peer() { + // nop +} + +// -- central_dispatcher overrides --------------------------------------------- + +caf::event_based_actor* peer::this_actor() noexcept { + return self(); +} + +endpoint_id peer::this_endpoint() const { + return id(); +} + +filter_type peer::local_filter() const { + return filter_; +} + +alm::lamport_timestamp peer::local_timestamp() const noexcept { + return timestamp_; +} + +// -- additional dispatch overloads -------------------------------------------- + +template +bool peer::dispatch_to_impl(T&& msg, endpoint_id&& receiver) { + if (auto ptr = shortest_path(tbl_, receiver); ptr && !ptr->empty()) { + multipath path{ptr->begin(), ptr->end()}; + dispatch(make_node_message(std::forward(msg), std::move(path))); + return true; + } else { + BROKER_DEBUG("drop message: no path to" << receiver); + return false; + } +} + +bool peer::dispatch_to(data_message msg, endpoint_id receiver) { + BROKER_TRACE(BROKER_ARG(msg) << BROKER_ARG(receiver)); + return dispatch_to_impl(std::move(msg), std::move(receiver)); +} + +bool peer::dispatch_to(command_message msg, endpoint_id receiver) { + BROKER_TRACE(BROKER_ARG(msg) << BROKER_ARG(receiver)); + return dispatch_to_impl(std::move(msg), std::move(receiver)); +} + +// -- convenience functions for subscription information ----------------------- + +bool peer::has_remote_subscriber(const topic& x) const noexcept { + detail::prefix_matcher matches; + for (const auto& [peer, filter] : peer_filters_) + if (matches(filter, x)) + return true; + return false; +} + +bool peer::contains(const endpoint_id_list& ids, const endpoint_id& id) { + auto predicate = [&](const endpoint_id& pid) { return pid == id; }; + return std::any_of(ids.begin(), ids.end(), predicate); +} + +// -- flooding ----------------------------------------------------------------- + +void peer::flood_subscriptions() { + endpoint_id_list path{id_}; + vector_timestamp ts{timestamp_}; + for_each_direct(tbl_, [&](auto&, auto& hdl) { + publish(hdl, atom::subscribe_v, path, ts, filter_); + }); +} + +void peer::flood_path_revocation(const endpoint_id& lost_peer) { + // We bundle path revocation and subscription flooding, because other peers + // in the network could drop in-flight subscription updates after seeing a + // newer timestamp with the path revocation. + endpoint_id_list path{id_}; + vector_timestamp ts{timestamp_}; + for_each_direct(tbl_, [&, this](const auto& id, const auto& hdl) { + publish(hdl, atom::revoke_v, path, ts, lost_peer, filter_); + }); +} + +// -- publish and subscribe functions ------------------------------------------ + +void peer::subscribe(const filter_type& what) { + BROKER_TRACE(BROKER_ARG(what)); + auto not_internal = [](const topic& x) { return !is_internal(x); }; + if (filter_extend(filter_, what, not_internal)) { + ++timestamp_; + flood_subscriptions(); + } else { + BROKER_DEBUG("already subscribed to topic (or topic is internal):" << what); + } +} + +bool peer::valid(endpoint_id_list& path, vector_timestamp path_ts) { + // Drop if empty or if path and path_ts have different sizes. + if (path.empty()) { + BROKER_WARNING("drop message: path empty"); + return false; + } + if (path.size() != path_ts.size()) { + BROKER_WARNING("drop message: path and timestamp have different sizes"); + return false; + } + // Sanity check: we can only receive messages from direct connections. + auto forwarder = find_row(tbl_, path.back()); + if (forwarder == nullptr) { + BROKER_WARNING("received message from an unrecognized peer"); + return false; + } + if (!forwarder->hdl) { + BROKER_WARNING( + "received message from a peer we don't have a direct connection to"); + return false; + } + // Drop all paths that contain loops. + if (contains(path, id_)) { + BROKER_DEBUG("drop message: path contains a loop"); + return false; + } + // Drop all messages that arrive after revocationsing a path. + if (revoked(path, path_ts, revocations_.entries)) { + BROKER_DEBUG("drop message from a revoked path"); + return false; + } + return true; +} + +void peer::age_revocations() { + if (revocations_.entries.empty()) + return; + auto now = self()->clock().now(); + if (now < revocations_.next_aging_cycle) + return; + auto predicate = [this, now](const auto& entry) { + return entry.first_seen + revocations_.max_age <= now; + }; + auto& entries = revocations_.entries; + entries.erase(std::remove_if(entries.begin(), entries.end(), predicate), + entries.end()); + revocations_.next_aging_cycle = now + revocations_.aging_interval; +} + +std::pair +peer::handle_update(endpoint_id_list& path, vector_timestamp path_ts, + const filter_type& filter) { + BROKER_TRACE(BROKER_ARG(path) << BROKER_ARG(path_ts) << BROKER_ARG(filter)); + std::vector new_peers; + // Extract new peers from the path. + auto is_new = [this](const auto& id) { return !reachable(tbl_, id); }; + for (const auto& id : path) + if (is_new(id)) + new_peers.emplace_back(id); + // Update the routing table. + auto added_tbl_entry = add_or_update_path( + tbl_, path[0], endpoint_id_list{path.rbegin(), path.rend()}, + vector_timestamp{path_ts.rbegin(), path_ts.rend()}); + // Increase local time, but only if we have changed the routing table. + // Otherwise, we would cause infinite flooding, because the peers would + // never agree on a vector time. + if (added_tbl_entry) { + BROKER_DEBUG("increase local time"); + ++timestamp_; + } + // Store the subscription if it's new. + const auto& subscriber = path[0]; + if (path_ts[0] > peer_timestamps_[subscriber]) { + peer_timestamps_[subscriber] = path_ts[0]; + peer_filters_[subscriber] = filter; + } + // Trigger await callbacks if necessary. + if (auto [first, last] = awaited_peers_.equal_range(subscriber); + first != last) { + std::for_each(first, last, + [&subscriber](auto& kvp) { kvp.second.deliver(subscriber); }); + awaited_peers_.erase(first, last); + } + return {std::move(new_peers), added_tbl_entry}; +} + +void peer::handle_filter_update(endpoint_id_list& path, + vector_timestamp& path_ts, + const filter_type& filter) { + BROKER_TRACE(BROKER_ARG(path) << BROKER_ARG(path_ts) << BROKER_ARG(filter)); + // Handle message content (drop nonsense messages and revoked paths). + if (!valid(path, path_ts)) + return; + auto new_peers = std::move(handle_update(path, path_ts, filter).first); + // Forward message to all other neighbors. + if (!disable_forwarding_) { + path.emplace_back(id_); + path_ts.emplace_back(timestamp_); + for_each_direct(tbl_, [&](auto& pid, auto& hdl) { + if (!contains(path, pid)) + publish(hdl, atom::subscribe_v, path, path_ts, filter); + }); + } + // If we have learned new peers, we flood our own subscriptions as well. + if (!new_peers.empty()) { + BROKER_DEBUG("learned new peers: " << new_peers); + for (auto& id : new_peers) + peer_discovered(id); + // TODO: This primarly makes sure that eventually all peers know each + // other. There may be more efficient ways to ensure connectivity, + // though. + flood_subscriptions(); + } + // Clean up some state if possible. + age_revocations(); +} + +void peer::handle_path_revocation(endpoint_id_list& path, + vector_timestamp& path_ts, + const endpoint_id& revoked_hop, + const filter_type& filter) { + BROKER_TRACE(BROKER_ARG(path) + << BROKER_ARG(path_ts) << BROKER_ARG(revoked_hop) + << BROKER_ARG(filter)); + // Drop nonsense messages. + if (!valid(path, path_ts)) + return; + // Handle the subscription part of the message. + auto&& [new_peers, increased_time] = handle_update(path, path_ts, filter); + // Handle the recovation part of the message. + auto [i, added] + = emplace(revocations_.entries, self_, path[0], path_ts[0], revoked_hop); + if (added) { + if (!increased_time) + ++timestamp_; + auto on_drop = [this](const endpoint_id& whom) { + BROKER_INFO("lost peer " << whom << " as a result of path revocation"); + peer_unreachable(whom); + }; + revoke(tbl_, *i, on_drop); + } + // Forward message to all other neighbors. + if (!disable_forwarding_) { + path.emplace_back(id_); + path_ts.emplace_back(timestamp_); + for_each_direct(tbl_, [&](auto& pid, auto& hdl) { + if (!contains(path, pid)) + publish(hdl, atom::revoke_v, path, path_ts, revoked_hop, filter); + }); + } + // If we have learned new peers, we flood our own subscriptions as well. + if (!new_peers.empty()) { + BROKER_DEBUG("learned new peers: " << new_peers); + for (auto& id : new_peers) + peer_discovered(id); + flood_subscriptions(); + } + // Clean up some state if possible. + age_revocations(); +} + +// -- interface to the transport ----------------------------------------------- + +void peer::publish_locally(const node_message_content& msg) { + BROKER_TRACE(BROKER_ARG(msg)); + if (is_data_message(msg)) { + publish_locally(get_data_message(msg)); + } else { + BROKER_ASSERT(is_command_message(msg)); + publish_locally(get_command_message(msg)); + } +} + +// -- callbacks ---------------------------------------------------------------- + +void peer::peer_discovered(const endpoint_id&) { + // nop +} + +void peer::peer_connected(const endpoint_id&, const caf::actor&) { + // nop +} + +void peer::peer_disconnected(const endpoint_id& peer_id, const caf::actor& hdl, + [[maybe_unused]] const error& reason) { + BROKER_TRACE(BROKER_ARG(peer_id) << BROKER_ARG(hdl) << BROKER_ARG(reason)); + cleanup(peer_id, hdl); +} + +void peer::peer_removed([[maybe_unused]] const endpoint_id& peer_id, + [[maybe_unused]] const caf::actor& hdl) { + BROKER_TRACE(BROKER_ARG(peer_id) << BROKER_ARG(hdl)); + cleanup(peer_id, hdl); +} + +void peer::peer_unreachable(const endpoint_id& peer_id) { + peer_filters_.erase(peer_id); +} + +void peer::cannot_remove_peer([[maybe_unused]] const endpoint_id& x) { + BROKER_DEBUG("cannot unpeer from uknown peer" << x); +} + +void peer::cannot_remove_peer([[maybe_unused]] const caf::actor& x) { + BROKER_DEBUG("cannot unpeer from uknown peer" << x); +} + +void peer::cannot_remove_peer([[maybe_unused]] const network_info& x) { + BROKER_DEBUG("cannot unpeer from uknown peer" << x); +} + +void peer::peer_unavailable(const network_info&) { + // nop +} + +void peer::shutdown([[maybe_unused]] shutdown_options options) { + BROKER_TRACE(BROKER_ARG(options)); + BROKER_DEBUG("cancel any pending await_peer requests"); + auto cancel = make_error(ec::shutting_down); + if (!awaited_peers_.empty()) { + for (auto& kvp : awaited_peers_) + kvp.second.deliver(cancel); + awaited_peers_.clear(); + } + if (!disable_forwarding_) { + BROKER_DEBUG("revoke all paths through this peer"); + ++timestamp_; + auto ids = peer_ids(); + for (auto& x : ids) + flood_path_revocation(x); + } + self_->quit(); +} + +// -- initialization ----------------------------------------------------------- + +caf::behavior peer::make_behavior() { + BROKER_DEBUG("make behavior for peer" << id_); + using detail::lift; + return { + [this](atom::publish, data_message& msg) { + dispatch(msg); + }, + [this](atom::publish, command_message& msg) { + dispatch(msg); + }, + [this](atom::publish, command_message& msg, endpoint_id& receiver) { + if (receiver == id_) + publish_locally(msg); + else + dispatch_to(std::move(msg), std::move(receiver)); + }, + [this](atom::publish, node_message& msg) { + dispatch(std::move(msg)); + }, + lift(*this, &peer::subscribe), + lift(*this, &peer::handle_filter_update), + lift(*this, &peer::handle_path_revocation), + [=](atom::get, atom::id) { return id_; }, + [=](atom::get, atom::peer, atom::subscriptions) { + // For backwards-compatibility, we only report the filter of our + // direct peers. Returning all filter would make more sense in an + // ALM setting, but that would change the semantics of + // endpoint::peer_filter. + auto is_direct_peer + = [this](const auto& peer_id) { return tbl_.count(peer_id) != 0; }; + filter_type result; + for (const auto& [peer, filter] : peer_filters_) + if (is_direct_peer(peer)) + filter_extend(result, filter); + return result; + }, + [=](atom::shutdown, shutdown_options opts) { shutdown(opts); }, + [=](atom::publish, atom::local, command_message& msg) { + dispatch(msg); + }, + [=](atom::publish, atom::local, data_message& msg) { + dispatch(msg); + }, + [=](atom::await, endpoint_id who) { + auto rp = self_->make_response_promise(); + if (auto i = peer_filters_.find(who); i != peer_filters_.end()) + rp.deliver(who); + else + awaited_peers_.emplace(who, std::move(rp)); + }, + }; +} + +// -- implementation details --------------------------------------------------- + +void peer::cleanup(const endpoint_id& peer_id, const caf::actor& hdl) { + BROKER_TRACE(BROKER_ARG(peer_id) << BROKER_ARG(hdl)); + auto on_drop = [this](const endpoint_id& whom) { peer_unreachable(whom); }; + if (erase_direct(tbl_, peer_id, on_drop)) { + ++timestamp_; + flood_path_revocation(peer_id); + } +} + +} // namespace broker::alm diff --git a/src/alm/routing_table.cc b/src/alm/routing_table.cc new file mode 100644 index 00000000..8f2e2da1 --- /dev/null +++ b/src/alm/routing_table.cc @@ -0,0 +1,59 @@ +#include "broker/alm/routing_table.hh" + +namespace broker::alm { + +optional get_peer_id(const routing_table& tbl, + const caf::actor& hdl) { + auto predicate = [&](const auto& kvp) { return kvp.second.hdl == hdl; }; + auto e = tbl.end(); + auto i = std::find_if(tbl.begin(), e, predicate); + if (i != e) + return i->first; + return nil; +} + +const std::vector* shortest_path(const routing_table& tbl, + const endpoint_id& peer) { + if (auto i = tbl.find(peer); + i != tbl.end() && !i->second.versioned_paths.empty()) + return std::addressof(i->second.versioned_paths.front().first); + else + return nullptr; +} + +const routing_table_row* find_row(const routing_table& tbl, + const endpoint_id& peer) { + if (auto i = tbl.find(peer); i != tbl.end()) + return std::addressof(i->second); + else + return nullptr; +} + +routing_table_row* find_row(routing_table& tbl, const endpoint_id& peer) { + if (auto i = tbl.find(peer); i != tbl.end()) + return std::addressof(i->second); + else + return nullptr; +} + +bool add_or_update_path(routing_table& tbl, + const endpoint_id& peer, + std::vector path, + vector_timestamp ts) { + auto& row = tbl[peer]; + auto& paths = row.versioned_paths; + auto i = std::lower_bound(paths.begin(), paths.end(), path, path_less); + if (i == paths.end()) { + paths.emplace_back(std::move(path), std::move(ts)); + return true; + } else if (i->first != path) { + paths.insert(i, std::make_pair(std::move(path), std::move(ts))); + return true; + } else { + if (i->second < ts) + i->second = std::move(ts); + return false; + } +} + +} // namespace broker::alm diff --git a/src/alm/stream_transport.cc b/src/alm/stream_transport.cc new file mode 100644 index 00000000..3f3fe713 --- /dev/null +++ b/src/alm/stream_transport.cc @@ -0,0 +1,685 @@ +#include "broker/alm/stream_transport.hh" + +#include "broker/detail/overload.hh" + +namespace broker::alm { + +// -- constructors, destructors, and assignment operators ---------------------- + +stream_transport::stream_transport(caf::event_based_actor* self) : super(self) { + // nop +} + +// -- properties --------------------------------------------------------------- + +bool stream_transport::connected_to(const caf::actor& hdl) const noexcept { + return hdl_to_mgr_.count(hdl) != 0; +} + +// -- adding local subscribers ------------------------------------------------- + +caf::outbound_stream_slot +stream_transport::add_sending_worker(filter_type filter) { + BROKER_TRACE(BROKER_ARG(filter)); + subscribe(filter); + auto mgr = make_unipath_data_sink(this, std::move(filter)); + auto res = mgr->add_unchecked_outbound_path(); + BROKER_ASSERT(res != caf::invalid_stream_slot); + data_sinks_.emplace_back(std::move(mgr)); + return res; +} + +error stream_transport::add_worker(const caf::actor& hdl, filter_type filter) { + BROKER_TRACE(BROKER_ARG(hdl) << BROKER_ARG(filter)); + if (hdl == nullptr || filter.empty()) { + return caf::sec::cannot_add_downstream; + } else { + subscribe(filter); + auto mgr = make_unipath_data_sink(this, std::move(filter)); + auto res = mgr->add_unchecked_outbound_path(hdl); + BROKER_ASSERT(res != caf::invalid_stream_slot); + data_sinks_.emplace_back(std::move(mgr)); + return caf::none; + } +} + +caf::outbound_stream_slot +stream_transport::add_sending_store(filter_type filter) { + BROKER_TRACE(BROKER_ARG(filter)); + subscribe(filter); + auto mgr = make_unipath_command_sink(this, std::move(filter)); + auto res = mgr->add_unchecked_outbound_path(); + BROKER_ASSERT(res != caf::invalid_stream_slot); + command_sinks_.emplace_back(std::move(mgr)); + return res; +} + +error stream_transport::add_store(const caf::actor& hdl, filter_type filter) { + BROKER_TRACE(BROKER_ARG(hdl) << BROKER_ARG(filter)); + if (hdl == nullptr || filter.empty()) { + return caf::sec::cannot_add_downstream; + } else { + subscribe(filter); + auto mgr = make_unipath_command_sink(this, std::move(filter)); + auto res = mgr->add_unchecked_outbound_path(hdl); + BROKER_ASSERT(res != caf::invalid_stream_slot); + command_sinks_.emplace_back(std::move(mgr)); + return caf::none; + } +} + +// -- overrides for peer::publish ---------------------------------------------- + +void stream_transport::publish(const caf::actor& dst, atom::subscribe, + const endpoint_id_list& path, + const vector_timestamp& ts, + const filter_type& new_filter) { + BROKER_TRACE(BROKER_ARG(dst) + << BROKER_ARG(path) << BROKER_ARG(ts) << BROKER_ARG(new_filter)); + self()->send(dst, atom::subscribe_v, path, ts, new_filter); +} + +void stream_transport::publish(const caf::actor& dst, atom::revoke, + const endpoint_id_list& path, + const vector_timestamp& ts, + const endpoint_id& lost_peer, + const filter_type& new_filter) { + BROKER_TRACE(BROKER_ARG(dst) + << BROKER_ARG(path) << BROKER_ARG(ts) << BROKER_ARG(lost_peer) + << BROKER_ARG(new_filter)); + self()->send(dst, atom::revoke_v, path, ts, lost_peer, new_filter); +} + +void stream_transport::publish_locally(const data_message& msg) { + BROKER_TRACE(BROKER_ARG(msg)); + for (auto& sink : data_sinks_) + sink->enqueue(msg); +} + +void stream_transport::publish_locally(const command_message& msg) { + BROKER_TRACE(BROKER_ARG(msg)); + for (auto& sink : command_sinks_) + sink->enqueue(msg); +} + +// -- peering ------------------------------------------------------------------ + +detail::peer_manager_ptr +stream_transport::get_or_insert_pending(const endpoint_id& remote_peer) { + if (auto i = pending_.find(remote_peer); i != pending_.end()) { + return i->second; + } else { + auto mgr = detail::make_peer_manager(this); + pending_.emplace(remote_peer, mgr); + return mgr; + } +} + +detail::peer_manager_ptr stream_transport::get_pending(const caf::actor& hdl) { + auto pred = [&hdl](const auto& kvp) { + return kvp.second->handshake().remote_hdl == hdl; + }; + if (auto i = std::find_if(pending_.begin(), pending_.end(), pred); + i != pending_.end()) { + return i->second; + } else { + return nullptr; + } +} + +detail::peer_manager_ptr +stream_transport::get_pending(const endpoint_id& remote_peer) { + if (auto i = pending_.find(remote_peer); i != pending_.end()) { + return i->second; + } else { + return nullptr; + } +} + +// Initiates peering between A (this node) and B (remote peer). +void stream_transport::start_peering(const endpoint_id& remote_peer, + const caf::actor& hdl, + caf::response_promise rp) { + BROKER_TRACE(BROKER_ARG(remote_peer) << BROKER_ARG(hdl)); + if (is_direct_connection(tbl(), remote_peer)) { + BROKER_DEBUG("start_peering ignored: already peering with" << remote_peer); + rp.deliver(atom::peer_v, atom::ok_v, hdl); + } else if (remote_peer < id()) { + // We avoid conflicts in the handshake process by always having the node + // with the smaller ID initiate the peering. Otherwise, we could end up in + // a deadlock during handshake if both sides send step 1 at the same time. + if (auto i = pending_.find(remote_peer); i != pending_.end()) { + auto& mgr = i->second; + auto& hs = mgr->handshake(); + if (mgr->hdl() != hdl) { + BROKER_ERROR("multiple peers share a single actor handle!"); + rp.deliver(make_error(ec::invalid_peering_request, + "handle already in use by another responder")); + } else if (!hs.is_responder()) { + BROKER_ERROR("peer tries to obtained wrong role in handshake!"); + rp.deliver(make_error(ec::invalid_handshake_state)); + } else { + hs.promises.emplace_back(rp); + } + } else { + auto mgr = make_peer_manager(this); + pending_.emplace(remote_peer, mgr); + auto& hs = mgr->handshake(); + hs.to_responder(); + hs.promises.emplace_back(rp); + auto s = self(); + s->request(hdl, std::chrono::minutes(10), atom::peer_v, id(), s) + .then( + [](atom::peer, atom::ok, const endpoint_id&) { + // nop + }, + [mgr](caf::error& err) mutable { + // Abort the handshake if it hasn't started yet. Otherwise, we + // have other mechanisms in place that capture the same error. + if (auto& href = mgr->handshake(); !href.started()) { + BROKER_DEBUG("peering failed:" << err); + href.fail(std::move(err)); + } + }); + } + } else if (auto i = pending_.find(remote_peer); i != pending_.end()) { + if (i->second->handshake().remote_hdl == hdl) { + BROKER_DEBUG("start_peering ignored: already started peering with" + << remote_peer); + rp.deliver(atom::peer_v, atom::ok_v, hdl); + } else { + BROKER_ERROR("multiple peers share a single actor handle!"); + rp.deliver(make_error(ec::invalid_peering_request, + "handle already in use by another responder")); + } + } else { + auto mgr = make_peer_manager(this); + if (mgr->handshake().originator_start_peering(remote_peer, hdl, + std::move(rp))) { + BROKER_DEBUG("start peering with" << remote_peer); + pending_.emplace(remote_peer, std::move(mgr)); + } else { + BROKER_ERROR("failed to start peering with" << remote_peer << ":" + << mgr->handshake().err); + } + } +} + +caf::outbound_stream_slot +stream_transport::handle_peering_request(const endpoint_id& remote_peer, + const caf::actor& hdl) { + BROKER_TRACE(BROKER_ARG(hdl) << BROKER_ARG(remote_peer)); + if (is_direct_connection(tbl(), remote_peer)) { + BROKER_ERROR("drop peering request: already have a direct connection to" + << remote_peer); + return {}; + } else if (auto mgr = get_or_insert_pending(remote_peer); + mgr->handshake().started()) { + if (mgr->handshake().remote_hdl == hdl) { + BROKER_ERROR("multiple peering requests: already started peering with" + << remote_peer); + } else { + BROKER_ERROR("multiple peers share a single actor handle!"); + } + return {}; + } else { + auto& hs = mgr->handshake(); + if (hs.responder_start_peering(remote_peer, hdl)) { + BROKER_DEBUG("start peering with" << remote_peer); + BROKER_ASSERT(hs.out != caf::invalid_stream_slot); + return {hs.out}; + } else { + BROKER_ERROR("failed to start peering with" << remote_peer << ":" + << hs.err); + return {}; + } + } +} + +caf::outbound_stream_slot +stream_transport::handle_peering_handshake_1(caf::stream, + const caf::actor& hdl, + const endpoint_id& remote_peer, + const filter_type& filter, + lamport_timestamp timestamp) { + BROKER_TRACE(BROKER_ARG(hdl) << BROKER_ARG(remote_peer) << BROKER_ARG(filter) + << BROKER_ARG(timestamp)); + if (is_direct_connection(tbl(), remote_peer)) { + BROKER_ERROR("drop peering handshake: already have a direct connection to" + << remote_peer); + return {}; + } else if (auto mgr = get_pending(remote_peer); mgr == nullptr) { + BROKER_ERROR("received open_stream_msg from an unknown responder"); + return {}; + } else if (mgr->handshake().remote_hdl != hdl) { + BROKER_ERROR("multiple peers share a single actor handle!"); + return {}; + } else { + if (mgr->handshake().originator_handle_open_stream_msg(filter, timestamp)) { + return {mgr->handshake().out}; + } else { + BROKER_ERROR("handshake failed:" << mgr->handshake().err); + return {}; + } + } +} + +void stream_transport::handle_peering_handshake_2( + caf::stream in, atom::ok, const caf::actor& hdl, + const endpoint_id& remote_peer, const filter_type& filter, + lamport_timestamp timestamp) { + BROKER_TRACE(BROKER_ARG(hdl) << BROKER_ARG(remote_peer) << BROKER_ARG(filter) + << BROKER_ARG(timestamp)); + if (auto mgr = get_pending(remote_peer); mgr == nullptr) { + BROKER_ERROR("received open_stream_msg from an unknown originator"); + } else if (mgr->handshake().remote_hdl != hdl) { + BROKER_ERROR("multiple peers share a single actor handle!"); + } else if (!mgr->handshake().responder_handle_open_stream_msg(filter, + timestamp)) { + BROKER_ERROR("handshake failed:" << mgr->handshake().err); + } +} + +// -- callbacks ---------------------------------------------------------------- + +void stream_transport::shutdown(shutdown_options options) { + BROKER_TRACE(BROKER_ARG(options)); + // TODO: honor wait-for-stores flag. + auto drop_all = [](auto& container) { + if (!container.empty()) { + for (auto& sink : container) { + sink->unobserve(); + sink->push(); + } + container.clear(); + } + }; + tearing_down_ = true; + if (auto peers = peer_ids(); !peers.empty()) + for (auto& x : peers) + unpeer(x); + super::shutdown(options); + drop_all(data_sinks_); + drop_all(command_sinks_); +} + +// -- "overridden" member functions of alm::peer ------------------------------- + +void stream_transport::handle_filter_update(endpoint_id_list& path, + vector_timestamp& path_ts, + const filter_type& filter) { + BROKER_TRACE(BROKER_ARG(path) << BROKER_ARG(path_ts) << BROKER_ARG(filter)); + if (path.empty()) { + BROKER_WARNING("drop message: path empty"); + } else if (auto mgr = get_pending(path.back()); mgr && mgr->blocks_inputs()) { + auto msg = make_message(atom::subscribe_v, path, path_ts, filter); + mgr->add_blocked_input(std::move(msg)); + } else { + super::handle_filter_update(path, path_ts, filter); + } +} + +void stream_transport::handle_path_revocation(endpoint_id_list& path, + vector_timestamp& path_ts, + const endpoint_id& revoked_hop, + const filter_type& filter) { + BROKER_TRACE(BROKER_ARG(path) + << BROKER_ARG(path_ts) << BROKER_ARG(revoked_hop) + << BROKER_ARG(filter)); + if (path.empty()) { + BROKER_WARNING("drop message: path empty"); + } else if (auto mgr = get_pending(path.back()); mgr && mgr->blocks_inputs()) { + auto msg = make_message(atom::revoke_v, path, path_ts, revoked_hop, filter); + mgr->add_blocked_input(std::move(msg)); + } else { + super::handle_path_revocation(path, path_ts, revoked_hop, filter); + } +} + +// -- overrides for detail::central_dispatcher --------------------------------- + +void stream_transport::flush() { + for (auto& kvp: hdl_to_mgr_) + kvp.second->push(); + for (auto& sink : data_sinks_) + sink->push(); + for (auto& sink : command_sinks_) + sink->push(); +} + +template +void stream_transport::dispatch_impl(const T& msg) { + const auto& topic = get_topic(msg); + detail::prefix_matcher matches; + endpoint_id_list receivers; + for (const auto& [peer, filter] : peer_filters_) + if (matches(filter, topic)) + receivers.emplace_back(peer); + BROKER_DEBUG("got" << receivers.size() << "receiver for" << msg); + if (!receivers.empty()) { + std::vector paths; + std::vector unreachables; + alm::multipath::generate(receivers, tbl_, paths, unreachables); + for (auto&& path : paths) { + if (auto ptr = peer_lookup(path.head().id())) + ptr->enqueue(make_node_message(msg, std::move(path))); + else + BROKER_WARNING("cannot ship message: no direct path to" + << path.head().id()); + } + if (!unreachables.empty()) + BROKER_WARNING("cannot ship message: no path to any of" << unreachables); + } +} + +void stream_transport::dispatch(const data_message& msg) { + BROKER_TRACE(BROKER_ARG(msg)); + dispatch_impl(msg); +} + +void stream_transport::dispatch(const command_message& msg) { + BROKER_TRACE(BROKER_ARG(msg)); + dispatch_impl(msg); +} + +void stream_transport::dispatch(node_message&& msg) { + BROKER_TRACE(BROKER_ARG(id_)<(tup); + auto& path = get<1>(tup); + // Push to local subscribers if the message is addressed at this node and this + // node is a receiver. + if (path.head().id() != id_) { + if (auto ptr = peer_lookup(path.head().id())) { + ptr->enqueue(node_message{std::move(content), std::move(path)}); + } else { + BROKER_WARNING("cannot ship message: no direct connection to" + << path.head().id()); + } + } else { + if (path.head().is_receiver()) + publish_locally(content); + // Forward to all next hops. + path.for_each_node([&](multipath&& nested) { + if (auto ptr = peer_lookup(nested.head().id())) { + ptr->enqueue(node_message{content, std::move(nested)}); + } else { + BROKER_WARNING("cannot ship message: no direct connection to" + << nested.head().id()); + } + }); + } +} + +// -- overrides for detail::unipath_manager::observer -------------------------- + +void stream_transport::closing(detail::unipath_manager* ptr, bool graceful, + const error& reason) { + BROKER_ASSERT(ptr != nullptr); + auto drop_from = [](auto& container, auto* dptr) { + auto pred = [dptr](const auto& entry) { return entry == dptr; }; + auto i = std::find_if(container.begin(), container.end(), pred); + if (i == container.end()) { + return false; + } else { + container.erase(i, container.end()); + return true; + } + }; + auto f = detail::make_overload( + [this, &reason](detail::peer_manager* dptr) { + if (auto i = mgr_to_hdl_.find(dptr); i != mgr_to_hdl_.end()) { + auto hdl = i->second; + drop_peer(hdl, reason); + } + }, + [this, drop_from](detail::unipath_data_sink* dptr) { + drop_from(data_sinks_, dptr); + }, + [this, drop_from](detail::unipath_command_sink* dptr) { + drop_from(command_sinks_, dptr); + }, + [](detail::unipath_source*) { + // nop + }); + std::visit(f, ptr->derived_ptr()); +} + +void stream_transport::downstream_connected(detail::unipath_manager* ptr, + const caf::actor&) { + auto f = detail::make_overload( + [](detail::peer_manager*) { + // Nothing to do. We add state in finalize_handshake. + }, + [this](detail::unipath_data_sink* derived_ptr) { + data_sinks_.emplace_back(derived_ptr); + }, + [this](detail::unipath_command_sink* derived_ptr) { + command_sinks_.emplace_back(derived_ptr); + }, + [](detail::unipath_source*) { + BROKER_ERROR("downstream_connected called on a unipath_source"); + }); + std::visit(f, ptr->derived_ptr()); +} + +bool stream_transport::finalize_handshake(detail::peer_manager* mgr) { + BROKER_TRACE(""); + auto add_mapping = [this, mgr] { + auto hdl = mgr->handshake().remote_hdl; + auto [i, added] = hdl_to_mgr_.emplace(hdl, mgr); + if (!added) + return false; + if (mgr_to_hdl_.emplace(mgr, hdl).second) { + return true; + } else { + hdl_to_mgr_.erase(i); + return false; + } + }; + auto& hs = mgr->handshake(); + BROKER_ASSERT(hs.done()); + BROKER_ASSERT(hs.in != caf::invalid_stream_slot); + BROKER_ASSERT(hs.out != caf::invalid_stream_slot); + BROKER_ASSERT(hs.remote_hdl != nullptr); + BROKER_ASSERT(mgr->hdl() == hs.remote_hdl); + if (auto i = pending_.find(hs.remote_id); i != pending_.end()) { + BROKER_ASSERT(i->second == mgr); + pending_.erase(i); + if (!add_mapping()) { + BROKER_ERROR("failed add mapping for the peer manager"); + return false; + } else if (is_direct_connection(tbl_, hs.remote_id)) { + BROKER_ERROR("tried to complete handshake for already connected peer"); + mgr_to_hdl_.erase(mgr); + hdl_to_mgr_.erase(hs.remote_hdl); + return false; + } else { + auto trigger_peer_discovered = !reachable(tbl_, hs.remote_id); + tbl_[hs.remote_id].hdl = hs.remote_hdl; + if (trigger_peer_discovered) + peer_discovered(hs.remote_id); + peer_connected(hs.remote_id, hs.remote_hdl); + auto path = std::vector{hs.remote_id}; + auto path_ts = vector_timestamp{hs.remote_timestamp}; + handle_filter_update(path, path_ts, hs.remote_filter); + return true; + } + } else { + BROKER_ERROR("finalize_handshake called but manager not found in pending_"); + return false; + } +} + +void stream_transport::abort_handshake(detail::peer_manager* mgr) { + auto& hs = mgr->handshake(); + if (auto i = pending_.find(hs.remote_id); i != pending_.end()) { + BROKER_ASSERT(!mgr->unique()); + pending_.erase(i); + } +} + +// -- initialization ----------------------------------------------------------- + +caf::behavior stream_transport::make_behavior() { + using detail::lift; + return caf::message_handler{ + // Expose to member functions to messaging API. + lift(*this, + &stream_transport::handle_peering_request), + lift<>(*this, &stream_transport::handle_peering_handshake_1), + lift<>(*this, &stream_transport::handle_peering_handshake_2), + lift(*this, &stream_transport::add_worker), + lift(*this, &stream_transport::add_sending_worker), + lift(*this, &stream_transport::add_sending_store), + // Trigger peering to remotes. + [this](atom::peer, const endpoint_id& remote_peer, const caf::actor& hdl) { + start_peering(remote_peer, hdl, self()->make_response_promise()); + }, + // Per-stream subscription updates. + [this](atom::join, atom::update, caf::stream_slot slot, + filter_type& filter) { + update_filter(slot, std::move(filter)); + }, + [this](atom::join, atom::update, caf::stream_slot slot, filter_type& filter, + const caf::actor& listener) { + auto res = update_filter(slot, std::move(filter)); + self()->send(listener, res); + }, + // Allow local publishers to hook directly into the stream. + [this](caf::stream in) { + make_unipath_source(this, in); + }, + [this](caf::stream in) { + make_unipath_source(this, in); + }, + // // Special handlers for bypassing streams and/or forwarding. + [this](atom::publish, atom::local, data_message& msg) { + publish_locally(msg); + }, + [this](atom::unpeer, const caf::actor& hdl) { unpeer(hdl); }, + [this](atom::unpeer, const endpoint_id& peer_id) { unpeer(peer_id); }, + } + .or_else(super::make_behavior()); +} + +// -- utility ------------------------------------------------------------------ + +bool stream_transport::update_filter(caf::stream_slot slot, + filter_type&& filter) { + auto predicate = [slot](const auto& mgr) { + return mgr->outbound_path_slot() == slot; + }; + auto fetch_from = [predicate](const auto& container) { + auto i = std::find_if(container.begin(), container.end(), predicate); + return std::make_pair(i, i != container.end()); + }; + if (auto [i, i_valid] = fetch_from(data_sinks_); i_valid) { + auto& mgr = *i; + subscribe(filter); + mgr->filter(std::move(filter)); + return true; + } else if (auto [j, j_valid] = fetch_from(command_sinks_); j_valid) { + auto& mgr = *j; + subscribe(filter); + mgr->filter(std::move(filter)); + return true; + } else { + return false; + } +} + +bool stream_transport::peer_cleanup(const endpoint_id& peer_id, + const error* reason) { + bool result = false; + // Check whether we disconnect from the peer during the handshake. + if (auto mgr = get_pending(peer_id)) { + result = true; + mgr->handshake().fail(ec::peer_disconnect_during_handshake); + BROKER_ASSERT(get_pending(peer_id) == nullptr); + } else if (auto i = tbl_.find(peer_id); i != tbl_.end() && i->second.hdl) { + result = true; + auto hdl = i->second.hdl; + if (auto j = hdl_to_mgr_.find(hdl); j != hdl_to_mgr_.end()) { + j->second->unobserve(); + j->second->shutdown(); + mgr_to_hdl_.erase(j->second); + hdl_to_mgr_.erase(j); + } else { + BROKER_DEBUG("found peer in routing table but not in hdl_to_mgr_"); + } + if (reason) + peer_disconnected(peer_id, hdl, *reason); + else + peer_removed(peer_id, hdl); + erase_direct(tbl_, peer_id, + [this](const endpoint_id& whom) { peer_unreachable(whom); }); + } + return result; +} + +void stream_transport::drop_peer(const caf::actor& hdl, const error& reason) { + endpoint_id remote_id; + bool disconnected = false; + if (auto peer_id = get_peer_id(tbl_, hdl)) { + remote_id = *peer_id; + peer_cleanup(*peer_id, &reason); + } else { + auto has_hdl = [&hdl](const auto& kvp) { + return kvp.second->handshake().remote_hdl == hdl; + }; + auto i = std::find_if(pending_.begin(), pending_.end(), has_hdl); + if (i != pending_.end()) { + auto remote_id = i->second->handshake().remote_id; + peer_cleanup(remote_id, &reason); + } + } +} + +void stream_transport::unpeer(const endpoint_id& peer_id, + const caf::actor& hdl) { + BROKER_TRACE(BROKER_ARG(peer_id) << BROKER_ARG(hdl)); + if (!peer_cleanup(peer_id)) + cannot_remove_peer(peer_id); +} + +void stream_transport::unpeer(const endpoint_id& peer_id) { + BROKER_TRACE(BROKER_ARG(peer_id)); + if (auto i = tbl().find(peer_id); i != tbl().end()) { + auto hdl = i->second.hdl; + unpeer(peer_id, hdl); + } else if (auto ptr = get_pending(peer_id)) { + auto hdl = ptr->handshake().remote_hdl; + unpeer(peer_id, hdl); + } else { + cannot_remove_peer(peer_id); + } +} + +void stream_transport::unpeer(const caf::actor& hdl) { + BROKER_TRACE(BROKER_ARG(hdl)); + if (auto peer_id = get_peer_id(tbl(), hdl)) { + unpeer(*peer_id, hdl); + } else if (auto ptr = get_pending(hdl)) { + auto peer_id = ptr->handshake().remote_id; + unpeer(peer_id, hdl); + } else { + cannot_remove_peer(hdl); + } +} + +detail::peer_manager* +stream_transport::peer_lookup(const endpoint_id& peer_id) { + if (auto i = pending_.find(peer_id); i != pending_.end()) + return i->second.get(); + if (auto row = find_row(tbl_, peer_id)) + if (auto i = hdl_to_mgr_.find(row->hdl); i != hdl_to_mgr_.end()) + return i->second.get(); + return nullptr; +} + +} // namespace broker::alm diff --git a/src/broker-gateway.cc b/src/broker-gateway.cc new file mode 100644 index 00000000..b60a947f --- /dev/null +++ b/src/broker-gateway.cc @@ -0,0 +1,186 @@ +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "broker/configuration.hh" +#include "broker/domain_options.hh" +#include "broker/gateway.hh" + +using namespace broker; + +// -- local type aliases ------------------------------------------------------- + +using uri_list = std::vector; + +// -- I/O utility (TODO: copy-pasted from broker-node.cc -> consolidate) ------- + +namespace detail { + +namespace { + +std::mutex ostream_mtx; + +} // namespace + +int print_impl(std::ostream& ostr, const char* x) { + ostr << x; + return 0; +} + +int print_impl(std::ostream& ostr, const std::string& x) { + ostr << x; + return 0; +} + +int print_impl(std::ostream& ostr, const caf::term& x) { + ostr << x; + return 0; +} + +template +int print_impl(std::ostream& ostr, const T& x) { + return print_impl(ostr, caf::deep_to_string(x)); +} + +template +void println(std::ostream& ostr, Ts&&... xs) { + std::unique_lock guard{ostream_mtx}; + std::initializer_list{print_impl(ostr, std::forward(xs))...}; + ostr << caf::term::reset_endl; +} + +} // namespace detail + +namespace out { + +template +void println(Ts&&... xs) { + ::detail::println(std::cout, std::forward(xs)...); +} + +} // namespace out + +namespace err { + +template +void println(Ts&&... xs) { + ::detail::println(std::cerr, caf::term::red, std::forward(xs)...); +} + +} // namespace err + +namespace verbose { + +namespace { + +std::atomic enabled; + +} // namespace + +template +void println(Ts&&... xs) { + if (enabled) + ::detail::println(std::clog, caf::term::blue, + std::chrono::system_clock::now(), ": ", + std::forward(xs)...); +} + +} // namespace verbose + +// -- configuration ------------------------------------------------------------ + +class config : public configuration { +public: + using super = configuration; + + config() : super(skip_init) { + opt_group{custom_options_, "global"} // + .add("verbose,v", "print status and debug output") + .add("retry-interval", + "time between peering connection attempts in seconds"); + opt_group{custom_options_, "internal"} + .add("peers", + "list of peers to connect to on startup in " + "tcp://$host:$port notation") + .add("port", "local port to listen for incoming peerings ") + .add(internal.disable_forwarding, "disable-forwarding", + "disable peer-to-peer message forwarding in the internal domain"); + opt_group{custom_options_, "external"} + .add("peers", + "list of peers to connect to on startup in " + "tcp://$host:$port notation") + .add("port", "local port to listen for incoming peerings ") + .add(external.disable_forwarding, "disable-forwarding", + "disable peer-to-peer message forwarding in the external domain"); + } + + using super::init; + + domain_options internal; + + domain_options external; +}; + +// -- setup and main loop ------------------------------------------------------ + +int run(gateway& gw) { + using caf::get_as; + using caf::get_or; + auto& cfg = gw.config(); + auto try_listen = [&](caf::string_view key) { + if (auto local_port = get_as(cfg, key)) { + auto p = caf::starts_with(key, "internal.") + ? gw.listen_internal({}, *local_port) + : gw.listen_external({}, *local_port); + if (p == 0) { + err::println("unable to open port ", *local_port); + return false; + } + verbose::println("listen for peers on port ", p); + } + return true; + }; + if (!try_listen("internal.local-port") || !try_listen("external.local-port")) + return EXIT_FAILURE; + if (auto peering_failures + = gw.peer(get_or(cfg, "internal.peers", uri_list{}), + get_or(cfg, "internal.peers", uri_list{}), + timeout::seconds{get_or(cfg, "retry-interval", size_t{10})}); + !peering_failures.empty()) { + for (const auto& [locator, reason] : peering_failures) + err::println("*** unable to peer with ", locator, ": ", reason); + } + out::println("*** gateway up and running, press to quit"); + getchar(); + return EXIT_SUCCESS; +} + +int main(int argc, char** argv) { + configuration::init_global_state(); + // Parse CLI parameters using our config. + config cfg; + try { + cfg.init(argc, argv); + } catch (std::exception& ex) { + err::println("*** unable to initialize config: ", ex.what()); + return EXIT_FAILURE; + } + if (cfg.cli_helptext_printed) + return EXIT_SUCCESS; + if (get_or(cfg, "verbose", false)) + verbose::enabled = true; + // Create gateway and run. + auto [internal, external] = std::tie(cfg.internal, cfg.external); + if (auto gw = gateway::make(std::move(cfg), internal, external)) { + return run(*gw); + } else { + err::println("*** unable to create gateway: ", gw.error()); + return EXIT_FAILURE; + } +} diff --git a/src/broker-node.cc b/src/broker-node.cc index 5acec22d..2d688fa4 100644 --- a/src/broker-node.cc +++ b/src/broker-node.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -51,8 +52,6 @@ CAF_BEGIN_TYPE_ID_BLOCK(broker_node, id_block::broker::end) BROKER_NODE_ADD_ATOM(blocking, "blocking") BROKER_NODE_ADD_ATOM(generate, "generate") - BROKER_NODE_ADD_ATOM(ping, "ping") - BROKER_NODE_ADD_ATOM(pong, "pong") BROKER_NODE_ADD_ATOM(relay, "relay") BROKER_NODE_ADD_ATOM(stream, "stream") diff --git a/src/broker-pipe.cc b/src/broker-pipe.cc index 9bba8cdb..ce2fb32c 100644 --- a/src/broker-pipe.cc +++ b/src/broker-pipe.cc @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #pragma GCC diagnostic pop diff --git a/src/configuration.cc b/src/configuration.cc index 7d5afc56..24b697fc 100644 --- a/src/configuration.cc +++ b/src/configuration.cc @@ -15,9 +15,12 @@ #include #include "broker/address.hh" +#include "broker/alm/lamport_timestamp.hh" +#include "broker/alm/multipath.hh" #include "broker/config.hh" #include "broker/core_actor.hh" #include "broker/data.hh" +#include "broker/detail/retry_state.hh" #include "broker/endpoint.hh" #include "broker/internal_command.hh" #include "broker/port.hh" @@ -82,15 +85,26 @@ configuration::configuration(skip_init_t) { add_message_types(*this); // Add custom options to the CAF parser. opt_group{custom_options_, "?broker"} - .add(options_.disable_ssl, "disable_ssl", + .add(options_.disable_ssl, "disable-ssl", "forces Broker to use unencrypted communication") - .add(options_.ttl, "ttl", "drop messages after traversing TTL hops") + .add(options_.disable_forwarding, "disable-forwarding", + "if true, turns the endpoint into a leaf node") .add("recording-directory", "path for storing recorded meta information") .add("output-generator-file-cap", "maximum number of entries when recording published messages") .add("max-pending-inputs-per-source", "maximum number of items we buffer per peer or publisher"); + sync_options(); + opt_group{custom_options_, "broker.store"} + .add("tick-interval", + "time interval for advancing the local Lamport time") + .add("heartbeat-interval", + "number of ticks between heartbeat messages") + .add("nack-timeout", + "number of ticks before sending NACK messages") + .add("connection-timeout", + "number of heartbeats a remote store is allowed to miss"); // Ensure that we're only talking to compatible Broker instances. std::vector ids{"broker.v" + std::to_string(version::protocol)}; // Override CAF defaults. @@ -111,8 +125,7 @@ configuration::configuration(skip_init_t) { configuration::configuration(broker_options opts) : configuration(skip_init) { options_ = opts; - set("broker.ttl", opts.ttl); - put(content, "broker.forward", opts.forward); + sync_options(); init(0, nullptr); } @@ -187,13 +200,16 @@ void configuration::init(int argc, char** argv) { caf::settings configuration::dump_content() const { auto result = super::dump_content(); auto& grp = result["broker"].as_dictionary(); - put_missing(grp, "disable_ssl", options_.disable_ssl); - put_missing(grp, "ttl", options_.ttl); - put_missing(grp, "forward", options_.forward); + put_missing(grp, "disable-ssl", options_.disable_ssl); + put_missing(grp, "disable-forwarding", options_.disable_forwarding); if (auto path = get_as(content, "broker.recording-directory")) - put_missing(grp, "recording-directory", std::move(*path)); + put_missing(grp, "recording-directory", *path); if (auto cap = get_as(content, "broker.output-generator-file-cap")) put_missing(grp, "output-generator-file-cap", *cap); + namespace pb = broker::defaults::path_revocations; + auto& sub_grp = grp["path-revocations"].as_dictionary(); + put_missing(sub_grp, "aging-interval", pb::aging_interval); + put_missing(sub_grp, "max-age", pb::max_age); return result; } @@ -216,4 +232,9 @@ void configuration::init_global_state() { }); } +void configuration::sync_options() { + set("broker.disable-ssl", options_.disable_ssl); + set("broker.disable-forwarding", options_.disable_forwarding); +} + } // namespace broker diff --git a/src/core_actor.cc b/src/core_actor.cc index deed8aad..efedf1e1 100644 --- a/src/core_actor.cc +++ b/src/core_actor.cc @@ -20,288 +20,54 @@ #include #include -#include "broker/atoms.hh" -#include "broker/backend.hh" -#include "broker/backend_options.hh" -#include "broker/convert.hh" -#include "broker/defaults.hh" -#include "broker/detail/assert.hh" -#include "broker/detail/filesystem.hh" -#include "broker/detail/make_backend.hh" -#include "broker/endpoint.hh" -#include "broker/error.hh" -#include "broker/logger.hh" -#include "broker/peer_status.hh" -#include "broker/status.hh" -#include "broker/topic.hh" - -using namespace caf; +#include "broker/domain_options.hh" namespace broker { -core_state::core_state(caf::event_based_actor* ptr, - const filter_type& initial_filter, broker_options opts, - endpoint::clock* ep_clock) - : super(ep_clock, ptr, initial_filter), - options_(opts), - filter_(initial_filter) { - cache().set_use_ssl(!options_.disable_ssl); - // We monitor remote inbound peerings and local outbound peerings. - self_->set_down_handler([this](const caf::down_msg& down) { - if (!down.source) - ; // Ignore bogus message. - else if (auto hdl = caf::actor_cast(down.source)) - drop_peer(hdl, false, down.reason); - }); +core_state::core_state(caf::event_based_actor* self, filter_type initial_filter, + endpoint::clock* clock, const domain_options* adaptation) + : super(self, clock) { + id(self->node()); + if (adaptation && adaptation->disable_forwarding) + disable_forwarding(true); + if (!initial_filter.empty()) + subscribe(initial_filter); } -void core_state::update_filter_on_peers() { - BROKER_TRACE(""); - for_each_peer([this](const actor& hdl) { - self()->send(hdl, atom::update_v, filter_); - }); -} - -void core_state::subscribe(filter_type xs) { - BROKER_TRACE(BROKER_ARG(xs)); - // Status and error topics are internal topics. - auto internal_only = [](const topic& x) { - return x == topics::errors || x == topics::statuses - || topics::store_events.prefix_of(x); - }; - xs.erase(std::remove_if(xs.begin(), xs.end(), internal_only), xs.end()); - if (xs.empty()) - return; - if (filter_extend(filter_, xs)) { - BROKER_DEBUG("Changed filter to " << filter_); - update_filter_on_peers(); - super::subscribe(xs); - } -} - -bool core_state::has_remote_subscriber(const topic& x) noexcept { - return any_peer_manager([&x](const auto& mgr) { return mgr->accepts(x); }); -} - -void core_state::peer_connected(const peer_id_type& peer_id, - const communication_handle_type& hdl) { - super::peer_connected(peer_id, hdl); +core_state::~core_state() { + BROKER_DEBUG("core_state destroyed"); } caf::behavior core_state::make_behavior() { - return super::make_behavior( - // --- filter manipulation ------------------------------------------------- - [=](atom::subscribe, filter_type& f) { - BROKER_TRACE(BROKER_ARG(f)); - subscribe(std::move(f)); - }, - // --- peering requests from local actors, i.e., "step 0" ------------------ - [=](atom::peer, actor remote_core) { - auto remote_id = remote_core.node(); - start_peering(remote_id, std::move(remote_core), - self()->make_response_promise()); - }, - // --- 3-way handshake for establishing peering streams between A and B ---- - // --- A (this node) performs steps #1 and #3; B performs #2 and #4 -------- - // Step #1: - A demands B shall establish a stream back to A - // - A has subscribers to the topics `ts` - [=](atom::peer, filter_type& peer_ts, caf::actor& peer_hdl) { - BROKER_TRACE(BROKER_ARG(peer_ts) << BROKER_ARG(peer_hdl)); - using result_type = decltype(start_handshake(peer_hdl, peer_ts)); - // Reject anonymous peering requests. - if (peer_hdl == nullptr) { - BROKER_DEBUG("Drop anonymous peering request."); - return result_type{}; - } - // Drop repeated handshake requests. - if (connected_to(peer_hdl)) { - BROKER_WARNING("Drop peering request from already connected peer."); - return result_type{}; - } - BROKER_DEBUG("received handshake step #1" << BROKER_ARG(peer_hdl) - << BROKER_ARG(actor{self()})); - // Start CAF stream. - return start_handshake(peer_hdl, std::move(peer_ts)); - }, - // Step #2: B establishes a stream to A and sends its own filter - [=](const stream& in, filter_type& filter, - caf::actor& peer_hdl) { - BROKER_TRACE(BROKER_ARG(in) << BROKER_ARG(filter) << peer_hdl); - BROKER_DEBUG("received handshake step #2 from" - << peer_hdl << BROKER_ARG(actor{self()})); - // At this stage, we expect to have no path to the peer yet. - if (connected_to(peer_hdl)) { - BROKER_WARNING("Received unexpected or repeated step #2 handshake."); - return; - } - start_handshake(peer_hdl, std::move(filter)); - ack_peering(in, peer_hdl); - }, - // Step #3: - A establishes a stream to B - // - B has a stream to A and vice versa now - [=](const stream& in, ok_atom, caf::actor& peer_hdl) { - BROKER_TRACE(BROKER_ARG(in) << BROKER_ARG(peer_hdl)); - if (!pending_connections().count(peer_hdl)) { - BROKER_ERROR("Received a step #3 handshake, but no #1 previously."); - } else if (ack_peering(in, peer_hdl)) - try_finalize_handshake(peer_hdl); - else - BROKER_DEBUG("Drop (repeated?) step #3 handshake."); - }, - // --- asynchronous communication to peers --------------------------------- - [=](atom::update, filter_type f) { - BROKER_TRACE(BROKER_ARG(f)); - auto p = caf::actor_cast(self()->current_sender()); - if (p == nullptr) { - BROKER_DEBUG("Received anonymous filter update."); - return; - } - if (!update_peer(p, std::move(f))) - BROKER_DEBUG("Cannot update filter of unknown peer:" << to_string(p)); - }, - // --- communication to local actors: incoming streams and subscriptions --- - [=](atom::join, filter_type& filter) { - BROKER_TRACE(BROKER_ARG(filter)); - auto result = add_worker(filter); - if (result != invalid_stream_slot) - subscribe(std::move(filter)); - return result; - }, - [=](atom::join, atom::update, stream_slot slot, filter_type& filter) { - subscribe(filter); - set_filter(slot, std::move(filter)); - }, - [=](atom::join, atom::update, stream_slot slot, filter_type& filter, - caf::actor& who_asked) { - subscribe(filter); - set_filter(slot, std::move(filter)); - self()->send(who_asked, true); - }, - [=](atom::join, atom::store, const filter_type& filter) { - return add_sending_store(filter); - }, - [=](endpoint::stream_type in) { - BROKER_TRACE("add data_message input stream"); - detail::make_source(&dispatcher_, in); - }, - [=](stream in) { - BROKER_TRACE("add node_message::value_type input stream"); - detail::make_source(&dispatcher_, in); - }, - [=](atom::publish, data_message& x) { - BROKER_TRACE(BROKER_ARG(x)); - publish(std::move(x)); - }, - [=](atom::publish, command_message& x) { - BROKER_TRACE(BROKER_ARG(x)); - publish(std::move(x)); - }, - // --- communication to local actors only, i.e., never forward to peers ---- - [=](atom::publish, atom::local, data_message& x) { - BROKER_TRACE(BROKER_ARG(x)); - local_push(std::move(x)); - }, - [=](atom::publish, atom::local, command_message& x) { - BROKER_TRACE(BROKER_ARG(x)); - local_push(std::move(x)); - }, - // --- "one-to-one" communication that bypasses streaming entirely --------- - [=](atom::publish, endpoint_info& e, data_message& x) { - BROKER_TRACE(BROKER_ARG(e) << BROKER_ARG(x)); - actor hdl; - if (e.network) { - auto tmp = cache().find(*e.network); - if (tmp) - hdl = std::move(*tmp); - } - if (!hdl) { - auto predicate = [&](const actor& x) { return x.node() == e.node; }; - hdl = find_output_peer_hdl(std::move(predicate)); - if (!hdl) { - BROKER_ERROR("no node found for endpoint info" << e); - return; - } - } - self()->send(hdl, atom::publish_v, atom::local_v, std::move(x)); - }, - // --- accessors ----------------------------------------------------------- + self_->set_exit_handler([self{self_}](caf::exit_msg& msg) { + if (msg.reason) { + BROKER_DEBUG("shutting down after receiving an exit message with reason:" + << msg.reason); + self->quit(std::move(msg.reason)); + } + }); + auto& cfg = self_->system().config(); + cache().set_use_ssl(!caf::get_or(cfg, "broker.disable-ssl", false)); + return caf::message_handler{ [=](atom::get, atom::peer) { std::vector result; - auto add = [&](actor hdl, peer_status status) { - peer_info tmp; - tmp.status = status; - tmp.flags = peer_flags::remote + peer_flags::inbound - + peer_flags::outbound; - tmp.peer.node = hdl.node(); - auto addrs = cache().find(hdl); - // the peer_info only holds a single address, so ... pick first? - if (addrs) - tmp.peer.network = *addrs; - result.emplace_back(std::move(tmp)); - }; - // collect connected peers - for_each_peer([&](const actor& hdl) { - add(hdl, peer_status::peered); - }); - // collect pending peers - for (const auto& kvp : pending_connections()) - add(kvp.first, peer_status::connecting); - return result; - }, - [=](atom::get, atom::peer, atom::subscriptions) { - std::vector result; - // Collect filters for all peers. - for_each_filter([&](auto x) { - result.insert(result.end(), std::make_move_iterator(x.begin()), - std::make_move_iterator(x.end())); + // Add all direct connections from the routing table. + alm::for_each_direct(tbl(), [&, this](const auto& id, const auto& hdl) { + endpoint_info ep{id, cache().find(hdl)}; + result.push_back( + {std::move(ep), peer_flags::remote, peer_status::peered}); }); - // Sort and drop duplicates. - std::sort(result.begin(), result.end()); - auto e = std::unique(result.begin(), result.end()); - if (e != result.end()) - result.erase(e, result.end()); + // TODO: implement me + // // Add all pending peerings from the stream transport. + // for (const auto& [peer_id, pending_conn] : pending_connections()) { + // endpoint_info ep{peer_id, cache().find(pending_conn->remote_hdl)}; + // result.push_back( + // {std::move(ep), peer_flags::remote, peer_status::connected}); + // } return result; }, - // --- destructive state manipulations ------------------------------------- - [=](atom::unpeer, actor x) { unpeer(x); }, - [=](atom::shutdown) { - self()->quit(exit_reason::user_shutdown); - /* -- To consider: - -- Terminating the actor after receiving shutdown unconditionally can - -- cause already published data to not getting forwarded. The - -- following code implements a more complicated shutdown procedure - -- that would make sure data is transmitted before shutting down. - -- However, this is often undesirable because it can take an arbitrary - -- long time. Also, the current implementation does not terminate in - -- all cases, i.e., seems not bug-free. - auto& st = self->state; - st.shutting_down = true; - // Shutdown immediately if no local sink or source is connected. - if (st.policy().at_end()) { - BROKER_DEBUG("Terminate core actor after receiving 'shutdown'"); - self->quit(exit_reason::user_shutdown); - return; - } - // Wait until local sinks and sources are done, but no longer respond to - // any future message. - BROKER_DEBUG("Delay termination of core actor after receiving " - "'shutdown' until local sinks and sources are done; " - "workers.size:" << st.policy().workers().num_paths() - << ", stores.size:" << st.policy().stores().num_paths()); - self->set_default_handler(caf::drop); - self->become( - [] { - // Dummy behavior to keep the actor alive but unresponsive. - } - ); - */ - }, - [=](atom::shutdown, atom::store) { - for (auto& kvp : masters_) - self()->send_exit(kvp.second, caf::exit_reason::user_shutdown); - for (auto& kvp : clones_) - self()->send_exit(kvp.second, caf::exit_reason::user_shutdown); - }); + } + .or_else(super::make_behavior()); } } // namespace broker diff --git a/src/data.cc b/src/data.cc index 4a10e564..55d8f7e9 100644 --- a/src/data.cc +++ b/src/data.cc @@ -5,179 +5,89 @@ #include "broker/convert.hh" -namespace broker { - -struct type_name_getter { - using result_type = const char*; - - result_type operator()(broker::address) { - return "address"; - } - - result_type operator()(broker::boolean) { - return "boolean"; - } - - result_type operator()(broker::count) { - return "count"; - } - - result_type operator()(broker::enum_value) { - return "enum value"; - } - - result_type operator()(broker::integer) { - return "integer"; - } - - result_type operator()(broker::none) { - return "none"; - } - - result_type operator()(broker::port) { - return "port"; - } - - result_type operator()(broker::real) { - return "real"; - } - - result_type operator()(broker::set) { - return "set"; - } - - result_type operator()(std::string) { - return "string"; - } - - result_type operator()(broker::subnet) { - return "subnet"; - } - - result_type operator()(broker::table) { - return "table"; - } - - result_type operator()(broker::timespan) { - return "timespan"; - } +namespace { - result_type operator()(broker::timestamp) { - return "timestamp"; - } +using namespace broker; - result_type operator()(broker::vector) { - return "vector"; - } +constexpr const char* data_type_names[] = { + "none", "boolean", "count", "integer", "real", + "string", "address", "subnet", "port", "timestamp", + "timespan", "enum_value", "set", "table", "vector", }; -struct type_getter { - using result_type = data::type; - - result_type operator()(broker::address) { - return data::type::address; - } - - result_type operator()(broker::boolean) { - return data::type::boolean; - } - - result_type operator()(broker::count) { - return data::type::count; - } - - result_type operator()(broker::enum_value) { - return data::type::enum_value; - } - - result_type operator()(broker::integer) { - return data::type::integer; - } - - result_type operator()(broker::none) { - return data::type::none; - } - - result_type operator()(broker::port) { - return data::type::port; - } - - result_type operator()(broker::real) { - return data::type::real; - } - - result_type operator()(broker::set) { - return data::type::set; - } - - result_type operator()(std::string) { - return data::type::string; - } - - result_type operator()(broker::subnet) { - return data::type::subnet; - } +constexpr int ival_of(broker::data::type x) { + return static_cast(x); +} - result_type operator()(broker::table) { - return data::type::table; - } +template +constexpr int pos_of() { + return caf::detail::tl_index_of::value; +} - result_type operator()(broker::timespan) { - return data::type::timespan; - } +// Make sure the static_cast in data::get_type is safe. +static_assert(ival_of(data::type::none) == pos_of()); +static_assert(ival_of(data::type::boolean) == pos_of()); +static_assert(ival_of(data::type::count) == pos_of()); +static_assert(ival_of(data::type::integer) == pos_of()); +static_assert(ival_of(data::type::real) == pos_of()); +static_assert(ival_of(data::type::string) == pos_of()); +static_assert(ival_of(data::type::address) == pos_of
()); +static_assert(ival_of(data::type::subnet) == pos_of()); +static_assert(ival_of(data::type::port) == pos_of()); +static_assert(ival_of(data::type::timestamp) == pos_of()); +static_assert(ival_of(data::type::timespan) == pos_of()); +static_assert(ival_of(data::type::enum_value) == pos_of()); +static_assert(ival_of(data::type::set) == pos_of()); +static_assert(ival_of(data::type::table) == pos_of
()); +static_assert(ival_of(data::type::vector) == pos_of()); - result_type operator()(broker::timestamp) { - return data::type::timestamp; - } +} // namespace - result_type operator()(broker::vector) { - return data::type::vector; - } -}; +namespace broker { data::type data::get_type() const { - return caf::visit(type_getter(), get_data()); + return static_cast(data_.index()); } data data::from_type(data::type t) { - switch ( t ) { - case data::type::address: - return broker::address{}; - case data::type::boolean: - return broker::boolean{}; - case data::type::count: - return broker::count{}; - case data::type::enum_value: - return broker::enum_value{}; - case data::type::integer: - return broker::integer{}; - case data::type::none: - return broker::data{}; - case data::type::port: - return broker::port{}; - case data::type::real: - return broker::real{}; - case data::type::set: - return broker::set{}; - case data::type::string: - return std::string{}; - case data::type::subnet: - return broker::subnet{}; - case data::type::table: - return broker::table{}; - case data::type::timespan: - return broker::timespan{}; - case data::type::timestamp: - return broker::timestamp{}; - case data::type::vector: - return broker::vector{}; - default: - return data{}; + switch (t) { + case data::type::address: + return broker::address{}; + case data::type::boolean: + return broker::boolean{}; + case data::type::count: + return broker::count{}; + case data::type::enum_value: + return broker::enum_value{}; + case data::type::integer: + return broker::integer{}; + case data::type::none: + return broker::data{}; + case data::type::port: + return broker::port{}; + case data::type::real: + return broker::real{}; + case data::type::set: + return broker::set{}; + case data::type::string: + return std::string{}; + case data::type::subnet: + return broker::subnet{}; + case data::type::table: + return broker::table{}; + case data::type::timespan: + return broker::timespan{}; + case data::type::timestamp: + return broker::timestamp{}; + case data::type::vector: + return broker::vector{}; + default: + return data{}; } } const char* data::get_type_name() const { - return caf::visit(type_name_getter(), *this); + return data_type_names[data_.index()]; } namespace { @@ -205,6 +115,19 @@ struct data_converter { return convert(x, str); } + result_type operator()(timespan ts) { + if (convert(ts.count(), str)) { + str += "ns"; + return true; + } else { + return false; + } + } + + result_type operator()(timestamp ts) { + return (*this)(ts.time_since_epoch()); + } + result_type operator()(bool b) { str = b ? 'T' : 'F'; return true; diff --git a/src/defaults.cc b/src/defaults.cc index 9bbef9fe..ab99e930 100644 --- a/src/defaults.cc +++ b/src/defaults.cc @@ -11,10 +11,32 @@ const caf::string_view recording_directory = ""; const size_t output_generator_file_cap = std::numeric_limits::max(); +const caf::timespan await_peer_timeout = 10s; + } // namespace broker::defaults namespace broker::defaults::store { +// Run with 20 ticks per second. const caf::timespan tick_interval = 50ms; +// Send 5 heartbeats per second. +const uint16_t heartbeat_interval = 4; + +// Wait up to 100ms before sending NACK messages. +const uint16_t nack_timeout = 2; + +// Disconnect channels when not hearing anything from the remote side for 1s. +const uint16_t connection_timeout = 5; + +const caf::timespan await_idle_timeout = 5s; + } // namespace broker::defaults::store + +namespace broker::defaults::path_revocations { + +const caf::timespan aging_interval = 1s; + +const caf::timespan max_age = 5min; + +} // namespace broker::defaults::path_revocations diff --git a/src/detail/central_dispatcher.cc b/src/detail/central_dispatcher.cc index f3691fa8..5da3a089 100644 --- a/src/detail/central_dispatcher.cc +++ b/src/detail/central_dispatcher.cc @@ -1,26 +1,20 @@ #include "broker/detail/central_dispatcher.hh" +#include "broker/detail/overload.hh" #include "broker/logger.hh" #include "broker/message.hh" namespace broker::detail { -central_dispatcher::central_dispatcher(caf::scheduled_actor* self) - : self_(self) { +central_dispatcher::~central_dispatcher() { // nop } -void central_dispatcher::enqueue(const unipath_manager* source, - item_scope scope, - caf::span xs) { - BROKER_DEBUG("central enqueue" << BROKER_ARG(scope) - << BROKER_ARG2("xs.size", xs.size())); - auto f = [&](auto& sink) { return !sink->enqueue(source, scope, xs); }; - sinks_.erase(std::remove_if(sinks_.begin(), sinks_.end(), f), sinks_.end()); -} - -void central_dispatcher::add(unipath_manager_ptr sink) { - sinks_.emplace_back(std::move(sink)); +void central_dispatcher::dispatch(const node_message_content& msg) { + if (is_data_message(msg)) + dispatch(get_data_message(msg)); + else + dispatch(get_command_message(msg)); } } // namespace broker::detail diff --git a/src/detail/clone_actor.cc b/src/detail/clone_actor.cc index 2894bf0e..33066cd2 100644 --- a/src/detail/clone_actor.cc +++ b/src/detail/clone_actor.cc @@ -15,6 +15,7 @@ #include "broker/atoms.hh" #include "broker/convert.hh" #include "broker/data.hh" +#include "broker/defaults.hh" #include "broker/error.hh" #include "broker/store.hh" #include "broker/topic.hh" @@ -24,18 +25,26 @@ #include -namespace broker { -namespace detail { +namespace broker::detail { static double now(endpoint::clock* clock) { auto d = clock->now().time_since_epoch(); return std::chrono::duration_cast>(d).count(); } -void clone_state::init(caf::event_based_actor* ptr, std::string&& nm, - caf::actor&& parent, endpoint::clock* ep_clock) { - super::init(ptr, ep_clock, std::move(nm), std::move(parent)); - master_topic = id / topics::master_suffix; +// -- initialization ----------------------------------------------------------- + +clone_state::clone_state() : input(this) { + // nop +} + +void clone_state::init(caf::event_based_actor* ptr, endpoint_id this_endpoint, + std::string&& nm, caf::actor&& parent, + endpoint::clock* ep_clock) { + super::init(ptr, std::move(this_endpoint), ep_clock, std::move(nm), + std::move(parent)); + master_topic = store_name / topics::master_suffix; + super::init(input); } void clone_state::forward(internal_command&& x) { @@ -43,19 +52,123 @@ void clone_state::forward(internal_command&& x) { make_command_message(master_topic, std::move(x))); } -void clone_state::command(internal_command::variant_type& cmd) { - caf::visit(*this, cmd); +void clone_state::dispatch(command_message& msg) { + BROKER_TRACE(BROKER_ARG(msg)); + // Here, we receive all command messages from the stream. The first step is + // figuring out whether the received message stems from a writer or master. + // + // Clones can only send control messages (they are always consumers). Writers + // can send us either actions or control messages (they are producers). + auto& cmd = get_command(msg); + auto seq = cmd.seq; + auto tag = detail::tag_of(cmd); + auto type = detail::type_of(cmd); + if (input.initialized() && cmd.sender != input.producer()) { + BROKER_WARNING( + "received command message from unrecognized sender: " << cmd.sender); + return; + } + switch (tag) { + case command_tag::action: { + // Action messages from the master. + input.handle_event(seq, std::move(msg)); + break; + } + case command_tag::producer_control: { + // Control messages from the master. + switch (type) { + case internal_command::type::ack_clone_command: { + auto& inner = get(cmd.content); + if (input.handle_handshake(cmd.sender, inner.offset, + inner.heartbeat_interval)) { + BROKER_DEBUG("received ack_clone from" << cmd.sender); + set_store(std::move(inner.state)); + if (output_ptr) { + output_ptr->stalled = false; + output_ptr->trigger_handshakes(); + for (auto& msg : output_ptr->buf()) + broadcast(output_ptr.get(), msg); + } + } else { + BROKER_DEBUG("ignored repeated ack_clone from" << cmd.sender); + } + break; + } + case internal_command::type::keepalive_command: { + if (!input.initialized()) { + BROKER_DEBUG("ignored keepalive: input not initialized yet"); + break; + } + auto& inner = get(cmd.content); + BROKER_DEBUG("keepalive from master:" + << BROKER_ARG2("input.next_seq", input.next_seq()) + << BROKER_ARG2("input.last_seq", input.last_seq()) + << BROKER_ARG2("cmd.seq", inner.seq)); + input.handle_heartbeat(inner.seq); + break; + } + case internal_command::type::retransmit_failed_command: { + if (!input.initialized()) + break; + auto& inner = get(cmd.content); + input.handle_retransmit_failed(inner.seq); + break; + } + default: { + BROKER_ERROR("received unexpected producer control message:" << cmd); + } + } + break; + } + default: { + BROKER_ASSERT(tag == command_tag::consumer_control); + if (!output_ptr) { + BROKER_DEBUG("received control message for a non-existing channel"); + break; + } + // Control messages from the master for the writer. + switch (type) { + case internal_command::type::cumulative_ack_command: { + auto& inner = get(cmd.content); + // We create the path with entity_id::nil(), but the channel still + // cares about the handle. Hence, we need to set the actual handle + // once once the master responds with an ACK. + if (auto i = output_ptr->find_path(entity_id::nil()); + i != output_ptr->paths().end()) { + i->hdl = cmd.sender; + BROKER_DEBUG("received ACK from the master for the writer"); + } + output_ptr->handle_ack(cmd.sender, inner.seq); + break; + } + case internal_command::type::nack_command: { + auto& inner = get(cmd.content); + output_ptr->handle_nack(cmd.sender, inner.seqs); + break; + } + default: { + BROKER_ERROR("received bogus consumer control message:" << cmd); + } + } + } + } } -void clone_state::command(internal_command& cmd) { - command(cmd.content); +void clone_state::tick() { + BROKER_TRACE(""); + input.tick(); + if (output_ptr && !output_ptr->stalled) + output_ptr->tick(); } -void clone_state::operator()(none) { - BROKER_WARNING("received empty command"); +// -- callbacks for the consumer ----------------------------------------------- + +void clone_state::consume(consumer_type*, command_message& msg) { + auto f = [this](auto& cmd) { consume(cmd); }; + caf::visit(f, get<1>(msg.unshared()).content); } -void clone_state::operator()(put_command& x) { +void clone_state::consume(put_command& x) { BROKER_INFO("PUT" << x.key << "->" << x.value << "with expiry" << x.expiry); if (auto i = store.find(x.key); i != store.end()) { auto& value = i->second; @@ -68,54 +181,179 @@ void clone_state::operator()(put_command& x) { } } -void clone_state::operator()(put_unique_command& x) { - BROKER_ERROR("clone received put_unique_command"); +void clone_state::consume(put_unique_result_command& cmd) { + local_request_key key{cmd.who, cmd.req_id}; + if (auto i = local_requests.find(key); i != local_requests.end()) { + i->second.deliver(data{cmd.inserted}, cmd.req_id); + local_requests.erase(i); + } } -void clone_state::operator()(erase_command& x) { +void clone_state::consume(erase_command& x) { BROKER_INFO("ERASE" << x.key); if (store.erase(x.key) != 0) emit_erase_event(x.key, x.publisher); } -void clone_state::operator()(expire_command& x) { +void clone_state::consume(expire_command& x) { BROKER_INFO("EXPIRE" << x.key); if (store.erase(x.key) != 0) emit_expire_event(x.key, x.publisher); } -void clone_state::operator()(add_command&) { - BROKER_ERROR("clone received add_command"); +void clone_state::consume(clear_command& x) { + BROKER_INFO("CLEAR"); + for (auto& kvp : store) + emit_erase_event(kvp.first, x.publisher); + store.clear(); } -void clone_state::operator()(subtract_command&) { - BROKER_ERROR("clone received subtract_command"); +error clone_state::consume_nil(consumer_type* src) { + BROKER_ERROR("clone out of sync: lost message from the master!"); + // By returning an error, we cause the channel to abort and call `close`. + return ec::broken_clone; } -void clone_state::operator()(snapshot_command&) { - BROKER_ERROR("received SNAPSHOT in a clone"); +void clone_state::close(consumer_type* src, [[maybe_unused]] error reason) { + BROKER_ERROR(BROKER_ARG(reason)); + // TODO: send some 'bye, bye' message to enable the master to remove this + // clone early, rather than waiting for timeout, see: + // https://github.com/zeek/broker/issues/142 } -void clone_state::operator()(snapshot_sync_command& x) { - if (x.remote_clone == self) - awaiting_snapshot_sync = false; +void clone_state::send(consumer_type* ptr, channel_type::cumulative_ack ack) { + BROKER_DEBUG(BROKER_ARG(ack)); + auto msg = make_command_message( + master_topic, internal_command{0, id, cumulative_ack_command{ack.seq}}); + self->send(core, atom::publish_v, std::move(msg), ptr->producer().endpoint); +} + +void clone_state::send(consumer_type* ptr, channel_type::nack nack) { + BROKER_DEBUG(BROKER_ARG(nack)); + auto msg = make_command_message( + master_topic, internal_command{0, id, nack_command{std::move(nack.seqs)}}); + if (ptr->initialized()) + self->send(core, atom::publish_v, std::move(msg), ptr->producer().endpoint); + else + self->send(core, atom::publish_v, std::move(msg)); +} + +// -- callbacks for the producer ----------------------------------------------- + +void clone_state::send(producer_type* ptr, const entity_id&, + const channel_type::event& what) { + BROKER_TRACE(BROKER_ARG(what) << BROKER_ARG2("stalled", ptr->stalled)); + if (ptr->stalled) + return; + BROKER_ASSERT(what.seq == get_command(what.content).seq); + self->send(core, atom::publish_v, what.content); +} + +void clone_state::send(producer_type* ptr, const entity_id&, + channel_type::handshake what) { + BROKER_TRACE(BROKER_ARG(what) << BROKER_ARG2("stalled", ptr->stalled)); + if (ptr->stalled) + return; + auto msg = make_command_message( + master_topic, + internal_command{ + 0, id, attach_writer_command{what.offset, what.heartbeat_interval}}); + self->send(core, atom::publish_v, std::move(msg)); +} + +void clone_state::send(producer_type* ptr, const entity_id&, + channel_type::retransmit_failed what) { + BROKER_TRACE(BROKER_ARG(what) << BROKER_ARG2("stalled", ptr->stalled)); + if (ptr->stalled) + return; + auto msg = make_command_message( + master_topic, internal_command{0, id, retransmit_failed_command{what.seq}}); + self->send(core, atom::publish_v, std::move(msg)); +} + +void clone_state::broadcast(producer_type* ptr, channel_type::heartbeat what) { + BROKER_TRACE(BROKER_ARG(what) << BROKER_ARG2("stalled", ptr->stalled)); + if (ptr->stalled) + return; + // Re-send handshakes as well. Usually, the keepalive message also acts as + // handshake. However, the master did not open the channel in this case. We + // first need to create it by sending `attach_writer_command`. Everything + // received before this attach message is going to be ignored by the master. + for (auto& path : ptr->paths()) { + if (path.acked == 0) { + BROKER_DEBUG("re-send attach_writer_command"); + send(ptr, path.hdl, + channel_type::handshake{path.offset, ptr->heartbeat_interval()}); + } + } + auto msg = make_command_message( + master_topic, + internal_command{0, entity_id::from(self), keepalive_command{what.seq}}); + self->send(core, atom::publish_v, std::move(msg)); +} + +void clone_state::broadcast(producer_type* ptr, + const channel_type::event& what) { + BROKER_TRACE(BROKER_ARG(what) << BROKER_ARG2("stalled", ptr->stalled)); + if (ptr->stalled) + return; + BROKER_ASSERT(what.seq == get_command(what.content).seq); + self->send(core, atom::publish_v, what.content); +} + +void clone_state::drop(producer_type*, const entity_id&, + [[maybe_unused]] ec reason) { + BROKER_TRACE(BROKER_ARG(reason)); + // TODO: see comment in close() +} + +void clone_state::handshake_completed(producer_type*, const entity_id&) { + BROKER_DEBUG("completed handshake between writer and master for store" + << store_name); +} + +// -- properties --------------------------------------------------------------- + +data clone_state::keys() const { + set result; + for (auto& kvp : store) + result.emplace(kvp.first); + return result; } -void clone_state::operator()(set_command& x) { - BROKER_INFO("SET" << x.state); +clone_state::producer_type& clone_state::output() { + // TODO: we only use a pointer here, because caf::optional lacks the `emplace` + // member function. Either add that member function upstream or use + // std::optional instead if all supported platforms support it. + if (!output_ptr) { + BROKER_DEBUG("add output channel to clone " << store_name); + output_ptr.reset(new producer_type(this)); + super::init(*output_ptr); + // Remove `stalled` flag immediately if the input is ready. + if (input.initialized()) + output_ptr->stalled = false; + // We reach the master by publishing to its topic. Hence, we actually don't + // need a handle to it at all for the writer. + output_ptr->add(entity_id::nil()); + } + return *output_ptr; +} + +void clone_state::set_store(std::unordered_map x) { + BROKER_INFO("SET" << x); // We consider the master the source of all updates. - publisher_id publisher{master.node(), master.id()}; + entity_id publisher = input.producer(); // Short-circuit messages with an empty state. - if (x.state.empty()) { + if (x.empty()) { if (!store.empty()) { - clear_command cmd{publisher}; - (*this)(cmd); + clear_command cmd{std::move(publisher)}; + consume(cmd); } return; } if (store.empty()) { // Emit insert events. - for (auto& [key, value] : x.state) + for (auto& [key, value] : x) emit_insert_event(key, value, nil, publisher); } else { // Emit erase and put events. @@ -123,12 +361,12 @@ void clone_state::operator()(set_command& x) { keys.reserve(store.size()); for (auto& kvp : store) keys.emplace_back(&kvp.first); - auto is_erased = [&x](const data* key) { return x.state.count(*key) == 0; }; + auto is_erased = [&x](const data* key) { return x.count(*key) == 0; }; auto p = std::partition(keys.begin(), keys.end(), is_erased); for (auto i = keys.begin(); i != p; ++i) - emit_erase_event(**i, publisher_id{}); + emit_erase_event(**i, entity_id{}); for (auto i = p; i != keys.end(); ++i) { - const auto& value = x.state[**i]; + const auto& value = x[**i]; emit_update_event(**i, store[**i], value, nil, publisher); } // Emit insert events. @@ -138,210 +376,104 @@ void clone_state::operator()(set_command& x) { return false; return true; }; - for (const auto& [key, value] : x.state) + for (const auto& [key, value] : x) if (is_new(key)) emit_insert_event(key, value, nil, publisher); } // Override local state. - store = std::move(x.state); + store = std::move(x); } -void clone_state::operator()(clear_command& x) { - BROKER_INFO("CLEAR"); - for (auto& kvp : store) - emit_erase_event(kvp.first, x.publisher); - store.clear(); +bool clone_state::has_master() const noexcept { + return input.initialized(); } -data clone_state::keys() const { - set result; - for (auto& kvp : store) - result.emplace(kvp.first); - return result; +bool clone_state::idle() const noexcept { + return input.idle() && (!output_ptr || output_ptr->idle()); } +// -- master actor ------------------------------------------------------------- + caf::behavior clone_actor(caf::stateful_actor* self, - caf::actor core, std::string id, - double resync_interval, double stale_interval, + endpoint_id this_endpoint, caf::actor core, + std::string store_name, double resync_interval, + double stale_interval, double mutation_buffer_interval, endpoint::clock* clock) { + // Setup. self->monitor(core); - self->state.init(self, std::move(id), std::move(core), clock); - self->set_down_handler( - [=](const caf::down_msg& msg) { - if (msg.source == core) { - BROKER_INFO("core is down, kill clone as well"); - self->quit(msg.reason); - } else { - BROKER_INFO("lost master"); - self->state.master = nullptr; - self->state.awaiting_snapshot = true; - self->state.awaiting_snapshot_sync = true; - self->state.pending_remote_updates.clear(); - self->state.pending_remote_updates.shrink_to_fit(); - self->send(self, atom::master_v, atom::resolve_v); - - if ( stale_interval >= 0 ) - { - self->state.stale_time = now(clock) + stale_interval; - auto si = std::chrono::duration(stale_interval); - auto ts = std::chrono::duration_cast(si); - auto msg = caf::make_message(atom::tick_v, - atom::stale_check_v); - clock->send_later(self, ts, std::move(msg)); - } - - if ( mutation_buffer_interval > 0 ) - { - self->state.unmutable_time = now(clock) + mutation_buffer_interval; - auto si = std::chrono::duration(mutation_buffer_interval); - auto ts = std::chrono::duration_cast(si); - auto msg = caf::make_message(atom::tick_v, - atom::mutable_check_v); - clock->send_later(self, ts, std::move(msg)); - } - } - } - ); - - if ( mutation_buffer_interval > 0 ) - { - self->state.unmutable_time = now(clock) + mutation_buffer_interval; - auto si = std::chrono::duration(mutation_buffer_interval); - auto ts = std::chrono::duration_cast(si); - auto msg = caf::make_message(atom::tick_v, - atom::mutable_check_v); - clock->send_later(self, ts, std::move(msg)); - } - - self->send(self, atom::master_v, atom::resolve_v); - - return { + self->state.init(self, std::move(this_endpoint), std::move(store_name), + std::move(core), clock); + self->set_down_handler([=](const caf::down_msg& msg) { + self->state.on_down_msg(msg.source, msg.reason); + }); + // Ask the master to add this clone. + self->state.send(std::addressof(self->state.input), + clone_state::channel_type::nack{{0}}); + // Schedule first tick. + clock->send_later(self, defaults::store::tick_interval, + caf::make_message(atom::tick_v)); + return self->state.make_behavior( // --- local communication ------------------------------------------------- - [=](atom::local, internal_command& x) { - if ( self->state.master ) - { - // forward all commands to the master - self->state.forward(std::move(x)); - return; - } - - if ( mutation_buffer_interval <= 0 ) - return; - - if ( now(clock) >= self->state.unmutable_time ) - return; - - self->state.mutation_buffer.emplace_back(std::move(x)); - }, - [=](set_command& x) { - self->state(x); - self->state.awaiting_snapshot = false; - - if ( ! self->state.awaiting_snapshot_sync ) { - for ( auto& update : self->state.pending_remote_updates ) - self->state.command(update); - - self->state.pending_remote_updates.clear(); - self->state.pending_remote_updates.shrink_to_fit(); + [=](atom::local, internal_command& cmd) { + auto& st = self->state; + if (auto inner = get_if(&cmd.content); + inner && inner->who) { + local_request_key key{inner->who, inner->req_id}; + st.local_requests.emplace(key, self->make_response_promise()); } + auto& out = st.output(); + cmd.seq = out.next_seq(); + cmd.sender = entity_id::from(self); + auto msg = make_command_message(st.master_topic, std::move(cmd)); + out.produce(std::move(msg)); }, [=](atom::sync_point, caf::actor& who) { self->send(who, atom::sync_point_v); }, - [=](atom::master, atom::resolve) { - if ( self->state.master ) - return; - - BROKER_INFO("request master resolve"); - self->send(self->state.core, atom::store_v, atom::master_v, - atom::resolve_v, self->state.id, self); - auto ri = std::chrono::duration(resync_interval); - auto ts = std::chrono::duration_cast(ri); - auto msg = caf::make_message(atom::master_v, atom::resolve_v); - clock->send_later(self, ts, std::move(msg)); - }, - [=](atom::master, caf::actor& master) { - if ( self->state.master ) - return; - - BROKER_INFO("resolved master"); - self->state.master = std::move(master); - self->state.is_stale = false; - self->state.stale_time = -1.0; - self->state.unmutable_time = -1.0; - self->monitor(self->state.master); - - for ( auto& cmd : self->state.mutation_buffer ) - self->state.forward(std::move(cmd)); - - self->state.mutation_buffer.clear(); - self->state.mutation_buffer.shrink_to_fit(); - - self->send(self->state.core, atom::store_v, atom::master_v, - atom::snapshot_v, self->state.id, self); - }, - [=](atom::master, caf::error err) { - if ( self->state.master ) - return; - - BROKER_INFO("error resolving master " << caf::to_string(err)); - }, - [=](atom::tick, atom::stale_check) { - if ( self->state.stale_time < 0 ) - return; - - // Checking the timestamp is needed in the case there are multiple - // connects/disconnects within a short period of time (we don't want - // to go stale too early). - if ( now(clock) < self->state.stale_time ) - return; - - self->state.is_stale = true; - }, - [=](atom::tick, atom::mutable_check) { - if ( self->state.unmutable_time < 0 ) - return; - - if ( now(clock) < self->state.unmutable_time ) - return; - - self->state.mutation_buffer.clear(); - self->state.mutation_buffer.shrink_to_fit(); + [=](atom::tick) { + auto& st = self->state; + st.tick(); + clock->send_later(self, defaults::store::tick_interval, + caf::make_message(atom::tick_v)); + if (!st.idle_callbacks.empty() && st.idle()) { + for (auto& rp : st.idle_callbacks) + rp.deliver(atom::ok_v); + st.idle_callbacks.clear(); + } }, [=](atom::get, atom::keys) -> caf::result { - if ( self->state.is_stale ) + if (!self->state.has_master()) return {ec::stale_data}; auto x = self->state.keys(); BROKER_INFO("KEYS ->" << x); return {x}; }, [=](atom::get, atom::keys, request_id id) { - if ( self->state.is_stale ) + if (!self->state.has_master()) return caf::make_message(make_error(ec::stale_data), id); - auto x = self->state.keys(); - BROKER_INFO("KEYS" << "with id" << id << "->" << x); + BROKER_INFO("KEYS" + << "with id" << id << "->" << x); return caf::make_message(std::move(x), id); }, [=](atom::exists, const data& key) -> caf::result { - if (self->state.is_stale) + if (!self->state.has_master()) return {ec::stale_data}; auto result = (self->state.store.find(key) != self->state.store.end()); BROKER_INFO("EXISTS" << key << "->" << result); return data{result}; }, [=](atom::exists, const data& key, request_id id) { - if (self->state.is_stale) + if (!self->state.has_master()) return caf::make_message(make_error(ec::stale_data), id); - auto r = (self->state.store.find(key) != self->state.store.end()); auto result = caf::make_message(data{r}, id); BROKER_INFO("EXISTS" << key << "with id" << id << "->" << r); return result; }, [=](atom::get, const data& key) -> caf::result { - if (self->state.is_stale) + if (!self->state.has_master()) return {ec::stale_data}; expected result = ec::no_such_key; auto i = self->state.store.find(key); @@ -351,7 +483,7 @@ caf::behavior clone_actor(caf::stateful_actor* self, return result; }, [=](atom::get, const data& key, const data& aspect) -> caf::result { - if (self->state.is_stale) + if (!self->state.has_master()) return {ec::stale_data}; expected result = ec::no_such_key; auto i = self->state.store.find(key); @@ -361,7 +493,7 @@ caf::behavior clone_actor(caf::stateful_actor* self, return result; }, [=](atom::get, const data& key, request_id id) { - if (self->state.is_stale) + if (!self->state.has_master()) return caf::make_message(make_error(ec::stale_data), id); caf::message result; auto i = self->state.store.find(key); @@ -375,9 +507,8 @@ caf::behavior clone_actor(caf::stateful_actor* self, return result; }, [=](atom::get, const data& key, const data& aspect, request_id id) { - if ( self->state.is_stale ) + if (!self->state.has_master()) return caf::make_message(make_error(ec::stale_data), id); - caf::message result; auto i = self->state.store.find(key); if (i != self->state.store.end()) { @@ -394,7 +525,15 @@ caf::behavior clone_actor(caf::stateful_actor* self, } return result; }, - [=](atom::get, atom::name) { return self->state.id; }, + [=](atom::get, atom::name) { return self->state.store_name; }, + [=](atom::await, atom::idle) -> caf::result { + auto& st = self->state; + if (st.idle()) + return atom::ok_v; + auto rp = self->make_response_promise(); + st.idle_callbacks.emplace_back(std::move(rp)); + return caf::delegated(); + }, // --- stream handshake with core ------------------------------------------ [=](store::stream_type in) { attach_stream_sink( @@ -406,27 +545,10 @@ caf::behavior clone_actor(caf::stateful_actor* self, // nop }, // processing step - [=](caf::unit_t&, store::stream_type::value_type y) { - // TODO: our operator() overloads require mutable references, but - // only a fraction actually benefit from it. - auto cmd = move_command(y); - if (caf::holds_alternative(cmd)) { - self->state.command(cmd); - return; - } - - if ( self->state.awaiting_snapshot_sync ) - return; - - if ( self->state.awaiting_snapshot ) { - self->state.pending_remote_updates.emplace_back(std::move(cmd)); - return; - } - - self->state.command(cmd); + [=](caf::unit_t&, store::stream_type::value_type msg) { + self->state.dispatch(msg); }); - }}; + }); } -} // namespace detail -} // namespace broker +} // namespace broker::detail diff --git a/src/detail/data_generator.cc b/src/detail/data_generator.cc index 1985600b..f202a3c4 100644 --- a/src/detail/data_generator.cc +++ b/src/detail/data_generator.cc @@ -109,8 +109,6 @@ caf::error data_generator::generate(internal_command::type tag, internal_command& x) { using tag_type = internal_command::type; switch (tag) { - case tag_type::none: - break; case tag_type::put_command: { data key; data val; @@ -124,8 +122,8 @@ caf::error data_generator::generate(internal_command::type tag, data val; GENERATE(key); GENERATE(val); - x.content - = put_unique_command{std::move(key), std::move(val), nil, nullptr, 0}; + x.content = put_unique_command{std::move(key), std::move(val), nil, + entity_id::nil(), 0}; break; } case tag_type::erase_command: { @@ -134,6 +132,12 @@ caf::error data_generator::generate(internal_command::type tag, x.content = erase_command{std::move(key)}; break; } + case tag_type::expire_command: { + data key; + GENERATE(key); + x.content = expire_command{std::move(key)}; + break; + } case tag_type::add_command: { data key; data val; @@ -152,22 +156,54 @@ caf::error data_generator::generate(internal_command::type tag, x.content = subtract_command{std::move(key), std::move(val)}; break; } - case tag_type::snapshot_command: { - x.content = snapshot_command{nullptr, nullptr}; + case tag_type::clear_command: { + x.content = clear_command{}; break; } - case tag_type::snapshot_sync_command: { - x.content = snapshot_sync_command{}; + case tag_type::attach_clone_command: { + x.content = attach_clone_command{}; break; } - case tag_type::set_command: { - std::unordered_map xs; - GENERATE(xs); - x.content = set_command{std::move(xs)}; + case tag_type::attach_writer_command: { + sequence_number_type offset; + tick_interval_type heartbeat_interval; + GENERATE(offset); + GENERATE(heartbeat_interval); + x.content = attach_writer_command{offset, heartbeat_interval}; break; } - case tag_type::clear_command: { - x.content = clear_command{}; + case tag_type::keepalive_command: { + sequence_number_type seq; + GENERATE(seq); + x.content = keepalive_command{seq}; + break; + } + case tag_type::cumulative_ack_command: { + sequence_number_type seq; + GENERATE(seq); + x.content = cumulative_ack_command{seq}; + break; + } + case tag_type::nack_command: { + std::vector seqs; + GENERATE(seqs); + x.content = nack_command{std::move(seqs)}; + break; + } + case tag_type::ack_clone_command: { + sequence_number_type seq; + tick_interval_type heartbeat_interval; + snapshot state; + GENERATE(seq); + GENERATE(heartbeat_interval); + GENERATE(state); + x.content = ack_clone_command{seq, heartbeat_interval, std::move(state)}; + break; + } + case tag_type::retransmit_failed_command: { + sequence_number_type offset; + GENERATE(offset); + x.content = retransmit_failed_command{offset}; break; } default: @@ -289,11 +325,6 @@ void data_generator::shuffle(data& x) { caf::visit(f, x); } -void data_generator::shuffle(vector& xs) { - for (auto& x : xs) - shuffle(x); -} - void data_generator::shuffle(set& xs) { // We can't reasonably shuffle a set, so just create a new one. recreate(*this, xs); diff --git a/src/detail/flare_actor.cc b/src/detail/flare_actor.cc index eae129df..1eb0b85e 100644 --- a/src/detail/flare_actor.cc +++ b/src/detail/flare_actor.cc @@ -5,6 +5,7 @@ #include #include +#include "broker/detail/assert.hh" #include "broker/logger.hh" namespace broker { @@ -73,8 +74,11 @@ caf::mailbox_element_ptr flare_actor::dequeue() { std::unique_lock lock{flare_mtx_}; auto rval = blocking_actor::dequeue(); - if (rval) - this->extinguish_one(); + if (rval) { + [[maybe_unused]] auto extinguished = flare_.extinguish_one(); + BROKER_ASSERT(extinguished); + --flare_count_; + } return rval; } @@ -85,8 +89,8 @@ const char* flare_actor::name() const { void flare_actor::extinguish_one() { std::unique_lock lock{flare_mtx_}; - auto extinguished = flare_.extinguish_one(); - CAF_ASSERT(extinguished); + [[maybe_unused]] auto extinguished = flare_.extinguish_one(); + BROKER_ASSERT(extinguished); --flare_count_; } diff --git a/src/detail/generator_file_writer.cc b/src/detail/generator_file_writer.cc index b4f50f96..55a8a70a 100644 --- a/src/detail/generator_file_writer.cc +++ b/src/detail/generator_file_writer.cc @@ -3,7 +3,6 @@ #include #include -#include "broker/detail/meta_command_writer.hh" #include "broker/detail/meta_data_writer.hh" #include "broker/detail/write_value.hh" #include "broker/error.hh" @@ -78,7 +77,7 @@ caf::error generator_file_writer::write(const data_message& x) { } caf::error generator_file_writer::write(const command_message& x) { - meta_command_writer writer{sink_}; + meta_data_writer writer{sink_}; uint16_t tid; auto entry = format::entry_type::command_message; BROKER_TRY(topic_id(get_topic(x), tid), write_value(sink_, entry), diff --git a/src/detail/master_actor.cc b/src/detail/master_actor.cc index 42af4515..1d3086f0 100644 --- a/src/detail/master_actor.cc +++ b/src/detail/master_actor.cc @@ -14,6 +14,7 @@ #include "broker/atoms.hh" #include "broker/convert.hh" #include "broker/data.hh" +#include "broker/defaults.hh" #include "broker/store.hh" #include "broker/time.hh" #include "broker/topic.hh" @@ -30,11 +31,19 @@ static optional to_opt_timestamp(timestamp ts, return span ? ts + *span : optional(); } -void master_state::init(caf::event_based_actor* ptr, std::string&& nm, - backend_pointer&& bp, caf::actor&& parent, - endpoint::clock* ep_clock) { - super::init(ptr, ep_clock, std::move(nm), std::move(parent)); - clones_topic = id / topics::clone_suffix; +// -- initialization ----------------------------------------------------------- + +master_state::master_state() : output(this) { + // nop +} + +void master_state::init(caf::event_based_actor* ptr, endpoint_id this_endpoint, + std::string&& nm, backend_pointer&& bp, + caf::actor&& parent, endpoint::clock* ep_clock) { + super::init(ptr, std::move(this_endpoint), ep_clock, std::move(nm), + std::move(parent)); + super::init(output); + clones_topic = store_name / topics::clone_suffix; backend = std::move(bp); if (auto es = backend->expiries()) { for (auto& e : *es) { @@ -50,9 +59,90 @@ void master_state::init(caf::event_based_actor* ptr, std::string&& nm, } } -void master_state::broadcast(internal_command&& x) { - self->send(core, atom::publish_v, - make_command_message(clones_topic, std::move(x))); +void master_state::dispatch(command_message& msg) { + BROKER_TRACE(BROKER_ARG(msg)); + // Here, we receive all command messages from the stream. The first step is + // figuring out whether the received message stems from a writer or clone. + // Clones can only send control messages (they are always consumers). Writers + // can send us either actions or control messages (they are producers). + auto& cmd = get_command(msg); + auto seq = cmd.seq; + auto tag = detail::tag_of(cmd); + auto type = detail::type_of(cmd); + switch (tag) { + case command_tag::action: { + // Action messages from writers. + if (auto i = inputs.find(cmd.sender); i != inputs.end()) + i->second.handle_event(seq, std::move(msg)); + else + BROKER_DEBUG("received action from unknown sender:" << cmd.sender); + break; + } + case command_tag::producer_control: { + // Control messages from writers. + if (auto i = inputs.find(cmd.sender); i != inputs.end()) { + switch (type) { + case internal_command::type::attach_writer_command: { + BROKER_DEBUG("ignore repeated handshake from" << cmd.sender); + break; + } + case internal_command::type::keepalive_command: { + auto& inner = get(cmd.content); + i->second.handle_heartbeat(inner.seq); + break; + } + case internal_command::type::retransmit_failed_command: { + auto& inner = get(cmd.content); + i->second.handle_retransmit_failed(inner.seq); + break; + } + default: { + BROKER_ERROR("received bogus producer control message:" << cmd); + } + } + } else if (type == internal_command::type::attach_writer_command) { + BROKER_DEBUG("attach new writer:" << cmd.sender); + auto& inner = get(cmd.content); + i = inputs.emplace(cmd.sender, this).first; + super::init(i->second); + i->second.producer(cmd.sender); + if (!i->second.handle_handshake(inner.offset, + inner.heartbeat_interval)) { + BROKER_ERROR("abort connection: handle_handshake returned false"); + inputs.erase(i); + } + } else { + BROKER_DEBUG("received command from unknown sender:" << cmd); + } + break; + } + default: { + BROKER_ASSERT(tag == command_tag::consumer_control); + // Control messages from clones. + switch (type) { + case internal_command::type::cumulative_ack_command: { + auto& inner = get(cmd.content); + output.handle_ack(cmd.sender, inner.seq); + break; + } + case internal_command::type::nack_command: { + auto& inner = get(cmd.content); + output.handle_nack(cmd.sender, inner.seqs); + break; + } + default: { + BROKER_ERROR("received bogus consumer control message:" << cmd); + } + } + } + } +} + +void master_state::tick() { + BROKER_TRACE(""); + output.tick(); + for (auto& kvp : inputs) + kvp.second.tick(); } void master_state::remind(timespan expiry, const data& key) { @@ -67,26 +157,23 @@ void master_state::expire(data& key) { } else if (!*result) { BROKER_INFO("EXPIRE" << key << "(IGNORE/STALE)"); } else { - expire_command cmd{std::move(key), publisher_id{self->node(), self->id()}}; + expire_command cmd{std::move(key), id}; emit_expire_event(cmd); - broadcast_cmd_to_clones(std::move(cmd)); + broadcast(std::move(cmd)); } } -void master_state::command(internal_command& cmd) { - command(cmd.content); -} - -void master_state::command(internal_command::variant_type& cmd) { - caf::visit(*this, cmd); -} +// -- callbacks for the consumer ----------------------------------------------- -void master_state::operator()(none) { - BROKER_INFO("received empty command"); +void master_state::consume(consumer_type*, command_message& msg) { + auto f = [this](auto& cmd) { consume(cmd); }; + caf::visit(f, get<1>(msg.unshared()).content); } -void master_state::operator()(put_command& x) { - BROKER_INFO("PUT" << x.key << "->" << x.value << "with expiry" << (x.expiry ? to_string(*x.expiry) : "none")); +void master_state::consume(put_command& x) { + BROKER_TRACE(BROKER_ARG(x)); + BROKER_INFO("PUT" << x.key << "->" << x.value << "with expiry" + << (x.expiry ? to_string(*x.expiry) : "none")); auto et = to_opt_timestamp(clock->now(), x.expiry); auto old_value = backend->get(x.key); auto result = backend->put(x.key, x.value, et); @@ -100,49 +187,56 @@ void master_state::operator()(put_command& x) { emit_update_event(x, *old_value); else emit_insert_event(x); - broadcast_cmd_to_clones(std::move(x)); + broadcast(std::move(x)); } -void master_state::operator()(put_unique_command& x) { - BROKER_INFO("PUT_UNIQUE" << x.key << "->" << x.value << "with expiry" << (x.expiry ? to_string(*x.expiry) : "none")); +void master_state::consume(put_unique_command& x) { + BROKER_TRACE(BROKER_ARG(x)); + BROKER_INFO("PUT_UNIQUE" << x.key << "->" << x.value << "with expiry" + << (x.expiry ? to_string(*x.expiry) : "none")); + auto broadcast_result = [this, &x](bool inserted) { + broadcast(put_unique_result_command{inserted, x.who, x.req_id, id}); + if (x.who) { + local_request_key key{x.who, x.req_id}; + if (auto i = local_requests.find(key); i != local_requests.end()) { + i->second.deliver(data{inserted}, x.req_id); + local_requests.erase(i); + } + } + }; if (exists(x.key)) { - // Note that we don't bother broadcasting this operation to clones since - // no change took place. - self->send(x.who, caf::make_message(data{false}, x.req_id)); + broadcast_result(false); return; } auto et = to_opt_timestamp(clock->now(), x.expiry); if (auto res = backend->put(x.key, x.value, et); !res) { BROKER_WARNING("failed to put_unique" << x.key << "->" << x.value); - self->send(x.who, caf::make_message(data{false}, x.req_id)); + broadcast_result(false); return; } - self->send(x.who, caf::make_message(data{true}, x.req_id)); if (x.expiry) remind(*x.expiry, x.key); emit_insert_event(x); - // Broadcast a regular "put" command. Clones don't have to do their own - // existence check. - put_command cmd{std::move(x.key), std::move(x.value), x.expiry, - std::move(x.publisher)}; - broadcast_cmd_to_clones(std::move(cmd)); + // Broadcast a regular "put" command (clones don't have to do their own + // existence check) followed by the (positive) result message. + broadcast(put_command{std::move(x.key), std::move(x.value), x.expiry, + std::move(x.publisher)}); + broadcast_result(true); } -void master_state::operator()(erase_command& x) { +void master_state::consume(erase_command& x) { + BROKER_TRACE(BROKER_ARG(x)); BROKER_INFO("ERASE" << x.key); if (auto res = backend->erase(x.key); !res) { BROKER_WARNING("failed to erase" << x.key << "->" << res.error()); return; // TODO: propagate failure? to all clones? as status msg? } emit_erase_event(x.key, x.publisher); - broadcast_cmd_to_clones(std::move(x)); -} - -void master_state::operator()(expire_command&) { - BROKER_ERROR("received an expire_command in master actor"); + broadcast(std::move(x)); } -void master_state::operator()(add_command& x) { +void master_state::consume(add_command& x) { + BROKER_TRACE(BROKER_ARG(x)); BROKER_INFO("ADD" << x); auto old_value = backend->get(x.key); auto et = to_opt_timestamp(clock->now(), x.expiry); @@ -166,11 +260,12 @@ void master_state::operator()(add_command& x) { emit_update_event(cmd, *old_value); else emit_insert_event(cmd); - broadcast_cmd_to_clones(std::move(cmd)); + broadcast(std::move(cmd)); } } -void master_state::operator()(subtract_command& x) { +void master_state::consume(subtract_command& x) { + BROKER_TRACE(BROKER_ARG(x)); BROKER_INFO("SUBTRACT" << x); auto et = to_opt_timestamp(clock->now(), x.expiry); auto old_value = backend->get(x.key); @@ -196,52 +291,12 @@ void master_state::operator()(subtract_command& x) { put_command cmd{std::move(x.key), std::move(*val), nil, std::move(x.publisher)}; emit_update_event(cmd, *old_value); - broadcast_cmd_to_clones(std::move(cmd)); + broadcast(std::move(cmd)); } } -void master_state::operator()(snapshot_command& x) { - BROKER_INFO("SNAPSHOT from" << to_string(x.remote_core)); - if (x.remote_core == nullptr || x.remote_clone == nullptr) { - BROKER_INFO("snapshot command with invalid address received"); - return; - } - auto ss = backend->snapshot(); - if (!ss) - die("failed to snapshot master"); - self->monitor(x.remote_core); - clones.emplace(x.remote_core->address(), x.remote_clone); - - // The snapshot gets sent over a different channel than updates, - // so we send a "sync" point over the update channel that target clone - // can use in order to apply any updates that arrived before it - // received the now-outdated snapshot. - broadcast_cmd_to_clones(snapshot_sync_command{x.remote_clone}); - - // TODO: possible improvements to do here - // (1) Use a separate *streaming* channel to send the snapshot. - // A benefit of that would potentially be less latent queries - // that go directly against the master store. - // (2) Always keep an updated snapshot in memory on the master to - // avoid numerous expensive retrievals from persistent backends - // in quick succession (e.g. at startup). - // (3) As an alternative to (2), give backends an API to stream - // key-value pairs without ever needing the full snapshot in - // memory. Note that this would require halting the application - // of updates on the master while there are any snapshot streams - // still underway. - self->send(x.remote_clone, set_command{std::move(*ss)}); -} - -void master_state::operator()(snapshot_sync_command&) { - BROKER_ERROR("received a snapshot_sync_command in master actor"); -} - -void master_state::operator()(set_command& x) { - BROKER_ERROR("received a set_command in master actor"); -} - -void master_state::operator()(clear_command& x) { +void master_state::consume(clear_command& x) { + BROKER_TRACE(BROKER_ARG(x)); BROKER_INFO("CLEAR" << x); if (auto keys_res = backend->keys(); !keys_res) { BROKER_ERROR("unable to obtain keys:" << keys_res.error()); @@ -259,45 +314,177 @@ void master_state::operator()(clear_command& x) { } if (auto res = backend->clear(); !res) die("failed to clear master"); - broadcast_cmd_to_clones(std::move(x)); + broadcast(std::move(x)); +} + +error master_state::consume_nil(consumer_type* src) { + BROKER_TRACE(""); + // We lost a message from a writer. This is obviously bad, since we lost some + // information before it made it into the backend. However, it is not a fatal + // error in the sense that we must abort processing. Hence, we return `none` + // here to keep processing messages from the writer. + BROKER_ERROR("lost a message from" << src->producer()); + return nil; } +void master_state::close(consumer_type* src, [[maybe_unused]] error reason) { + BROKER_TRACE(BROKER_ARG(reason)); + if (auto i = inputs.find(src->producer()); i != inputs.end()) { + if (reason) + BROKER_INFO("removed" << src->producer() << "due to an error:" << reason); + else + BROKER_DEBUG("received graceful shutdown for" << src->producer()); + inputs.erase(i); + } else { + BROKER_ERROR("close called from an unknown consumer"); + } +} + +void master_state::send(consumer_type* ptr, channel_type::cumulative_ack ack) { + BROKER_TRACE(BROKER_ARG(ack)); + auto msg = make_command_message( + clones_topic, internal_command{0, id, cumulative_ack_command{ack.seq}}); + self->send(core, atom::publish_v, std::move(msg), ptr->producer().endpoint); +} + +void master_state::send(consumer_type* ptr, channel_type::nack nack) { + BROKER_TRACE(BROKER_ARG(nack)); + auto msg = make_command_message( + clones_topic, internal_command{0, id, nack_command{std::move(nack.seqs)}}); + self->send(core, atom::publish_v, std::move(msg), ptr->producer().endpoint); +} + +// -- callbacks for the producer ----------------------------------------------- + +void master_state::send(producer_type*, const entity_id& whom, + const channel_type::event& what) { + BROKER_TRACE(BROKER_ARG(whom) << BROKER_ARG(what)); + BROKER_ASSERT(what.seq == get_command(what.content).seq); + self->send(core, atom::publish_v, what.content, whom.endpoint); +} + +void master_state::send(producer_type*, const entity_id& whom, + channel_type::handshake msg) { + BROKER_TRACE(BROKER_ARG(whom) << BROKER_ARG(msg)); + auto i = open_handshakes.find(whom); + if (i == open_handshakes.end()) { + auto ss = backend->snapshot(); + if (!ss) + die("failed to snapshot master"); + auto cmd = make_command_message( + clones_topic, + internal_command{ + msg.offset, id, + ack_clone_command{msg.offset, msg.heartbeat_interval, std::move(*ss)}}); + i = open_handshakes.emplace(whom, std::move(cmd)).first; + } + self->send(core, atom::publish_v, i->second, whom.endpoint); +} + +void master_state::send(producer_type*, const entity_id& whom, + channel_type::retransmit_failed msg) { + BROKER_TRACE(BROKER_ARG(whom) << BROKER_ARG(msg)); + auto cmd = make_command_message( + clones_topic, internal_command{0, id, retransmit_failed_command{msg.seq}}); + self->send(core, atom::publish_v, std::move(cmd), whom.endpoint); +} + +void master_state::broadcast(producer_type*, channel_type::heartbeat msg) { + BROKER_TRACE(BROKER_ARG(msg)); + auto cmd = make_command_message( + clones_topic, internal_command{0, id, keepalive_command{msg.seq}}); + self->send(core, atom::publish_v, std::move(cmd)); +} + +void master_state::broadcast(producer_type*, const channel_type::event& what) { + BROKER_TRACE(BROKER_ARG(what)); + BROKER_ASSERT(what.seq == get_command(what.content).seq); + self->send(core, atom::publish_v, what.content); +} + +void master_state::drop(producer_type*, const entity_id& clone, + [[maybe_unused]] ec reason) { + BROKER_TRACE(BROKER_ARG(clone) << BROKER_ARG(reason)); + open_handshakes.erase(clone); + inputs.erase(clone); +} + +void master_state::handshake_completed(producer_type*, const entity_id& clone) { + BROKER_TRACE(BROKER_ARG(clone)); + open_handshakes.erase(clone); +} + +// -- properties --------------------------------------------------------------- + bool master_state::exists(const data& key) { if (auto res = backend->exists(key)) return *res; return false; } +bool master_state::idle() const noexcept { + auto is_idle = [](auto& kvp) { return kvp.second.idle(); }; + return output.idle() && std::all_of(inputs.begin(), inputs.end(), is_idle) + && open_handshakes.empty(); +} + +// -- master actor ------------------------------------------------------------- + caf::behavior master_actor(caf::stateful_actor* self, - caf::actor core, std::string id, + endpoint_id this_endpoint, caf::actor core, + std::string store_name, master_state::backend_pointer backend, endpoint::clock* clock) { + BROKER_TRACE(BROKER_ARG(this_endpoint) + << BROKER_ARG(core) << BROKER_ARG(store_name)); + // Setup. self->monitor(core); - self->state.init(self, std::move(id), std::move(backend), - std::move(core), clock); - self->set_down_handler( - [=](const caf::down_msg& msg) { - if (msg.source == core) { - BROKER_INFO("core is down, kill master as well"); - self->quit(msg.reason); + self->state.init(self, std::move(this_endpoint), std::move(store_name), + std::move(backend), std::move(core), clock); + self->set_down_handler([self](const caf::down_msg& msg) { + self->state.on_down_msg(msg.source, msg.reason); + }); + // Schedule first tick. + clock->send_later(self, self->state.tick_interval, + caf::make_message(atom::tick_v)); + return self->state.make_behavior( + // --- local communication ------------------------------------------------- + [=](atom::local, internal_command& cmd) { + // Locally received message are already ordered and reliable. Hence, we + // can process them immediately. + auto tag = detail::tag_of(cmd); + if (tag == command_tag::action) { + if (auto ptr = get_if(&cmd.content); + ptr && ptr->who) { + if (auto rp = self->make_response_promise(); rp.pending()) { + store_actor_state::local_request_key key{ptr->who, ptr->req_id}; + if (!self->state.local_requests.emplace(key, rp).second) { + rp.deliver(make_error(ec::repeated_request_id), ptr->req_id); + return; + } + } + } + auto f = [self](auto& cmd) { self->state.consume(cmd); }; + caf::visit(f, cmd.content); } else { - BROKER_INFO("lost a clone"); - self->state.clones.erase(msg.source); + BROKER_ERROR("received unexpected command locally: " << cmd); + } + }, + [=](atom::tick) { + auto& st = self->state; + st.tick(); + clock->send_later(self, self->state.tick_interval, + caf::make_message(atom::tick_v)); + if (!st.idle_callbacks.empty() && st.idle()) { + for (auto& rp : st.idle_callbacks) + rp.deliver(atom::ok_v); + st.idle_callbacks.clear(); } - } - ); - return { - // --- local communication ------------------------------------------------- - [=](atom::local, internal_command& x) { - // treat locally and remotely received commands in the same way - self->state.command(x); }, [=](atom::sync_point, caf::actor& who) { self->send(who, atom::sync_point_v); }, - [=](atom::expire, data& key) { - self->state.expire(key); - }, + [=](atom::expire, data& key) { self->state.expire(key); }, [=](atom::get, atom::keys) -> caf::result { auto x = self->state.backend->keys(); BROKER_INFO("KEYS ->" << x); @@ -305,7 +492,8 @@ caf::behavior master_actor(caf::stateful_actor* self, }, [=](atom::get, atom::keys, request_id id) { auto x = self->state.backend->keys(); - BROKER_INFO("KEYS" << "with id:" << id << "->" << x); + BROKER_INFO("KEYS" + << "with id:" << id << "->" << x); if (x) return caf::make_message(std::move(*x), id); return caf::make_message(std::move(x.error()), id); @@ -339,16 +527,23 @@ caf::behavior master_actor(caf::stateful_actor* self, }, [=](atom::get, const data& key, const data& value, request_id id) { auto x = self->state.backend->get(key, value); - BROKER_INFO("GET" << key << "->" << value << "with id:" << id << "->" << x); + BROKER_INFO("GET" << key << "->" << value << "with id:" << id << "->" + << x); if (x) return caf::make_message(std::move(*x), id); return caf::make_message(std::move(x.error()), id); }, - [=](atom::get, atom::name) { - return self->state.id; + [=](atom::get, atom::name) { return self->state.store_name; }, + [=](atom::await, atom::idle) -> caf::result { + auto& st = self->state; + if (st.idle()) + return atom::ok_v; + auto rp = self->make_response_promise(); + st.idle_callbacks.emplace_back(std::move(rp)); + return caf::delegated(); }, // --- stream handshake with core ------------------------------------------ - [=](const store::stream_type& in) { + [=](store::stream_type in) { BROKER_DEBUG("received stream handshake from core"); attach_stream_sink( self, @@ -359,18 +554,15 @@ caf::behavior master_actor(caf::stateful_actor* self, // nop }, // processing step - [=](caf::unit_t&, store::stream_type::value_type y) { - // TODO: our operator() overloads require mutable references, but - // only a fraction actually benefit from it. - auto cmd = move_command(y); - self->state.command(cmd); + [=](caf::unit_t&, command_message msg) { + // forward to state + self->state.dispatch(msg); }, // cleanup [](caf::unit_t&, const caf::error&) { // nop }); - } - }; + }); } } // namespace detail diff --git a/src/detail/meta_data_writer.cc b/src/detail/meta_data_writer.cc index 55333716..5310e96e 100644 --- a/src/detail/meta_data_writer.cc +++ b/src/detail/meta_data_writer.cc @@ -2,26 +2,164 @@ #include +#include "broker/internal_command.hh" + namespace broker::detail { +namespace { + +using caf::visit; + +class helper { +public: + explicit helper(caf::binary_serializer& f) : f_(f) { + // nop + } + + template + bool operator()(const T&) { + // We only store dynamic type information and container sizes. + return true; + } + + bool operator()(const std::string& x) { + return apply(static_cast(x.size())); + } + + bool operator()(const enum_value& x) { + return (*this)(x.name); + } + + bool operator()(const set& xs) { + if (!apply(static_cast(xs.size()))) + return false; + for (auto& x : xs) + if (!(*this)(x)) + return false; + return true; + } + + bool operator()(const table& xs) { + if (!apply(static_cast(xs.size()))) + return false; + for (auto& kvp : xs) + if (!(*this)(kvp.first) || !(*this)(kvp.second)) + return false; + return true; + } + + bool operator()(const vector& xs) { + if (!apply(static_cast(xs.size()))) + return false; + for (auto& x : xs) + if (!(*this)(x)) + return false; + return true; + } + + bool operator()(const data& x) { + return apply(x.get_type()) + && visit([this](const auto& value) { return (*this)(value); }, x); + } + + bool operator()(const std::vector& x) { + return apply(static_cast(x.size())); + } + + bool operator()(const put_command& x) { + return (*this)(x.key) && (*this)(x.value); + } + + bool operator()(const put_unique_command& x) { + return (*this)(x.key) && (*this)(x.value); + } + + bool operator()(const erase_command& x) { + return (*this)(x.key); + } + + bool operator()(const expire_command& x) { + return (*this)(x.key); + } + + bool operator()(const add_command& x) { + return (*this)(x.key) && (*this)(x.value) && (*this)(x.init_type); + } + + bool operator()(const subtract_command& x) { + return (*this)(x.key) && (*this)(x.value); + } + + bool operator()(const attach_writer_command& x) { + return (*this)(x.offset) && (*this)(x.heartbeat_interval); + } + + bool operator()(const keepalive_command& x) { + return (*this)(x.seq); + } + + bool operator()(const cumulative_ack_command& x) { + return (*this)(x.seq); + } + + bool operator()(const nack_command& x) { + return (*this)(x.seqs); + } + + bool operator()(const ack_clone_command& x) { + return (*this)(x.offset) // + && (*this)(x.heartbeat_interval) // + && (*this)(x.state); + } + + bool operator()(const retransmit_failed_command& x) { + return (*this)(x.seq); + } + + bool operator()(const internal_command& x) { + return apply(type_of(x)) + && visit([this](auto& value) { return (*this)(value); }, x.content); + } + + error&& move_error() { + return std::move(err_); + } + +private: + template + bool apply(T x) { + if constexpr (std::is_integral::value) { + if (!f_.value(x)) { + err_ = f_.get_error(); + return false; + } + return true; + } else { + static_assert(std::is_enum::value); + return apply(static_cast>(x)); + } + } + + caf::binary_serializer& f_; + error err_; +}; + +} // namespace + meta_data_writer::meta_data_writer(caf::binary_serializer& sink) : sink_(sink) { // nop } -caf::error meta_data_writer::apply(data::type tag) { - auto val = static_cast>(tag); - if (sink_.value(val)) - return {}; - else - return sink_.get_error(); +error meta_data_writer::operator()(const data& x) { + helper h{sink_}; + h(x); + return h.move_error(); } -caf::error meta_data_writer::apply(size_t container_size) { - auto val = static_cast(container_size); - if (sink_.value(val)) - return {}; - else - return sink_.get_error(); +error meta_data_writer::operator()(const internal_command& x) { + helper h{sink_}; + h(x); + return h.move_error(); } } // namespace broker::detail diff --git a/src/detail/monotonic_buffer_resource.cc b/src/detail/monotonic_buffer_resource.cc new file mode 100644 index 00000000..10d5570b --- /dev/null +++ b/src/detail/monotonic_buffer_resource.cc @@ -0,0 +1,49 @@ +#include "broker/detail/monotonic_buffer_resource.hh" + +#include +#include + +namespace broker::detail { + +namespace { + +// Unlike the standard version, our implementation does *not* follow a geometric +// progression. Simply because our use cases (alm::multipath) allow for a +// simpler implementation. +constexpr size_t block_size = 1024; + +} // namespace + +void* monotonic_buffer_resource::allocate(size_t num_bytes, size_t alignment) { + if (auto res = std::align(alignment, num_bytes, + current_->bytes, remaining_)) { + current_->bytes = static_cast(res) + num_bytes; + remaining_ -= num_bytes; + return res; + } else { + allocate_block(current_); + return allocate(num_bytes, alignment); + } +} + +void monotonic_buffer_resource::allocate_block(block* prev_block) { + if (auto vptr = malloc(block_size)) { + current_ = static_cast(vptr); + current_->next = prev_block; + current_->bytes = static_cast(vptr) + sizeof(block); + remaining_ = block_size - sizeof(block); + } else { + throw std::bad_alloc(); + } +} + +void monotonic_buffer_resource::destroy() noexcept { + auto blk = current_; + while (blk != nullptr) { + auto prev = blk; + blk = blk->next; + free(prev); + } +} + +} // namespace broker::detail diff --git a/src/detail/network_cache.cc b/src/detail/network_cache.cc index 7dae349c..aa98bbcb 100644 --- a/src/detail/network_cache.cc +++ b/src/detail/network_cache.cc @@ -5,8 +5,26 @@ namespace broker { namespace detail { +namespace { + +template +auto type_erase(const Handle& x) { + return caf::actor_cast(x); +} + +} // namespace + network_cache::network_cache(caf::event_based_actor* selfptr) : self(selfptr) { - // nop + auto& sys = self->home_system(); + if (sys.has_middleman()) + mm_ = type_erase(sys.middleman().actor_handle()); +} + +void network_cache::set_use_ssl(bool use_ssl) { + BROKER_INFO("initiating connections using" << (use_ssl ? "SSL" : "no SSL")); + auto& sys = self->home_system(); + mm_ = type_erase(use_ssl ? sys.openssl_manager().actor_handle() + : sys.middleman().actor_handle()); } caf::result network_cache::fetch(const network_info& x) { diff --git a/src/detail/peer_handshake.cc b/src/detail/peer_handshake.cc new file mode 100644 index 00000000..77fa9bce --- /dev/null +++ b/src/detail/peer_handshake.cc @@ -0,0 +1,360 @@ +#include "broker/detail/peer_handshake.hh" + +#include +#include + +#include "broker/detail/unipath_manager.hh" +#include "broker/message.hh" + +namespace broker::detail { + +// -- nested type: fsm --------------------------------------------------------- + +std::string peer_handshake::fsm::pretty_state() const { + std::string result; + switch (state) { + case init_state: + result = "init"; + break; + case fail_state: + result = "fail"; + break; + case done_state: + result = "done"; + break; + case started: + result = "started"; + break; + default: + BROKER_ASSERT(has_flag(started)); + if (has_flag(has_open_stream_msg)) { + result = "has_open_stream_msg"; + } else { + BROKER_ASSERT(has_flag(has_ack_open_msg)); + result = "has_ack_open_msg"; + } + } + return result; +} + +// -- nested type: originator -------------------------------------------------- + +bool peer_handshake::originator::start() { + BROKER_TRACE(BROKER_ARG2("state", pretty_state())); + switch (state) { + case fsm::init_state: { + state = fsm::started; + auto owner = parent->owner; + BROKER_ASSERT(owner != nullptr); + auto strong_owner = peer_manager_ptr{owner}; + auto self = owner->this_actor(); + auto id = owner->this_endpoint(); + BROKER_ASSERT(self != nullptr); + self + ->request(parent->remote_hdl, std::chrono::minutes(10), atom::peer_v, + atom::init_v, id, self) + .then( + [](atom::peer, atom::ok, const endpoint_id&) { + // Note: we do *not* fulfill the promise here. We do so after + // receiving the ack_open. + }, + [this, ptr{std::move(strong_owner)}](caf::error& err) { + // If this request fails, this means most likely that the remote + // core died or disconnected before the handshake could finish. This + // message handler may "outlive" the transport, so we hold on to a + // strong reference. Calling `fail` on the parent is safe even after + // the transport terminated since it only accesses local and actor + // state. We discard the error in case the FSM made state + // transitions in the meantime, because then we have other + // mechanisms that detect errors plus the error we receive here may + // be a stale request timeout. + if (auto hs = std::addressof(ptr->handshake())) { + BROKER_ASSERT(hs == parent); + if (state == fsm::started) + hs->fail(ec::peer_disconnect_during_handshake); + } + }); + return true; + } + case fsm::fail_state: + return false; + default: + parent->fail(ec::repeated_peering_handshake_request); + return false; + } +} + +bool peer_handshake::originator::handle_open_stream_msg() { + BROKER_TRACE(BROKER_ARG2("state", pretty_state())); + switch (state) { + case fsm::started: { + auto mgr = parent->owner; + caf::stream token; + parent->in = mgr->add_unchecked_inbound_path(token); + if (parent->in == caf::invalid_stream_slot) { + auto err = make_error(caf::sec::runtime_error, + "originator failed to open inbound path"); + parent->fail(std::move(err)); + return false; + } + auto tup = std::make_tuple(atom::ok_v, caf::actor{mgr->this_actor()}, + mgr->this_endpoint(), mgr->local_filter(), + mgr->local_timestamp()); + auto hdl = parent->remote_hdl; + parent->out + = mgr->add_unchecked_outbound_path(hdl, std::move(tup)) + .value(); + if (parent->out == caf::invalid_stream_slot) { + auto err = make_error(caf::sec::runtime_error, + "originator failed to open outbound path"); + parent->fail(std::move(err)); + return false; + } + state |= fsm::has_open_stream_msg; + return true; + } + case fsm::fail_state: + return false; + default: + parent->fail(ec::unexpected_handshake_message); + return false; + } + return false; +} + +bool peer_handshake::originator::handle_ack_open_msg() { + BROKER_TRACE(BROKER_ARG2("state", pretty_state())); + switch (state) { + case fsm::started | fsm::has_open_stream_msg: { + state |= fsm::has_ack_open_msg; + return parent->done_transition(); + } + case fsm::fail_state: + return false; + default: + parent->fail(ec::unexpected_handshake_message); + return false; + } +} + +// -- nested type: responder --------------------------------------------------- + +bool peer_handshake::responder::start() { + BROKER_TRACE(BROKER_ARG2("state", pretty_state())); + switch (state) { + case fsm::init_state: { + auto mgr = parent->owner; + auto tup = std::make_tuple(caf::actor{mgr->this_actor()}, + mgr->this_endpoint(), + mgr->local_filter(), mgr->local_timestamp()); + auto hdl = parent->remote_hdl; + parent->out + = mgr->add_unchecked_outbound_path(hdl, std::move(tup)) + .value(); + if (parent->out != caf::invalid_stream_slot) { + state = fsm::started; + return true; + } else { + auto err = make_error(caf::sec::runtime_error, + "responder failed to open outbound path"); + parent->fail(std::move(err)); + return false; + } + } + case fsm::fail_state: + return false; + default: + parent->fail(ec::repeated_peering_handshake_request); + return false; + } +} + +bool peer_handshake::responder::handle_open_stream_msg() { + BROKER_TRACE(BROKER_ARG2("state", pretty_state())); + switch (state) { + case fsm::started: + case fsm::started | fsm::has_ack_open_msg: { + auto mgr = parent->owner; + caf::stream token; + parent->in = mgr->add_unchecked_inbound_path(token); + if (parent->in != caf::invalid_stream_slot) { + state |= fsm::has_open_stream_msg; + return post_msg_action(); + } else { + auto err = make_error(caf::sec::runtime_error, + "responder failed to open inbound path"); + parent->fail(std::move(err)); + return false; + } + } + case fsm::fail_state: + return false; + default: + parent->fail(ec::repeated_peering_handshake_request); + return false; + } +} + +bool peer_handshake::responder::handle_ack_open_msg() { + BROKER_TRACE(BROKER_ARG2("state", pretty_state())); + switch (state) { + case fsm::started: + case fsm::started | fsm::has_open_stream_msg: { + state |= fsm::has_ack_open_msg; + return post_msg_action(); + } + case fsm::fail_state: + return false; + default: + parent->fail(ec::repeated_peering_handshake_request); + return false; + } +} + +bool peer_handshake::responder::post_msg_action() { + BROKER_TRACE(BROKER_ARG2("state", pretty_state())); + if (done()) { + return parent->done_transition(); + } else { + return true; + } +} + +// -- state transitions -------------------------------------------------------- + +bool peer_handshake::originator_start_peering(endpoint_id peer_id, + caf::actor peer_hdl, + caf::response_promise rp) { + BROKER_TRACE(BROKER_ARG2("impl", pretty_impl()) + << BROKER_ARG(peer_id) + << BROKER_ARG2("rp.pending", rp.pending())); + remote_id = std::move(peer_id); + remote_hdl = std::move(peer_hdl); + if (rp.pending()) + promises.emplace_back(std::move(rp)); + if (caf::holds_alternative(impl)) { + impl = originator{this}; + return caf::get(impl).start(); + } else if (is_originator() && !started()) { + return caf::get(impl).start(); + } else { + fail(ec::invalid_handshake_state); + return false; + } +} + +bool peer_handshake::originator_handle_open_stream_msg( + filter_type filter, alm::lamport_timestamp timestamp) { + BROKER_TRACE(BROKER_ARG2("impl", pretty_impl())); + if (is_originator()) { + remote_filter = std::move(filter); + remote_timestamp = timestamp; + return caf::get(impl).handle_open_stream_msg(); + } else { + fail(ec::invalid_handshake_state); + return false; + } +} + +bool peer_handshake::responder_start_peering(endpoint_id peer_id, + caf::actor peer_hdl) { + BROKER_TRACE(BROKER_ARG2("impl", pretty_impl()) << BROKER_ARG(peer_id)); + remote_id = std::move(peer_id); + remote_hdl = std::move(peer_hdl); + if (caf::holds_alternative(impl)) { + impl = responder{this}; + return caf::get(impl).start(); + } else if (is_responder() && !started()) { + return caf::get(impl).start(); + } else { + fail(ec::invalid_handshake_state); + return false; + } +} + +bool peer_handshake::responder_handle_open_stream_msg( + filter_type filter, alm::lamport_timestamp timestamp) { + BROKER_TRACE(BROKER_ARG2("impl", pretty_impl())); + if (is_responder()) { + remote_filter = std::move(filter); + remote_timestamp = timestamp; + return caf::get(impl).handle_open_stream_msg(); + } else { + fail(ec::invalid_handshake_state); + return false; + } +} + +bool peer_handshake::handle_ack_open_msg() { + BROKER_TRACE(BROKER_ARG2("impl", pretty_impl())); + return visit_impl( + [this](caf::unit_t&) { + fail(ec::invalid_handshake_state); + return false; + }, + [this](auto& obj) { return obj.handle_ack_open_msg(); }); +} + +// -- callbacks for the FSM implementations ------------------------------------ + +bool peer_handshake::done_transition() { + BROKER_TRACE(BROKER_ARG2("impl", pretty_impl())); + if (owner->finalize_handshake()) { + if (!promises.empty()) { + for (auto& promise : promises) + promise.deliver(atom::peer_v, atom::ok_v, remote_id); + promises.clear(); + } + return true; + } else { + fail(ec::invalid_handshake_state); + return false; + } +} + +// -- error handling ----------------------------------------------------------- + +void peer_handshake::fail(error reason) { + BROKER_TRACE(BROKER_ARG2("impl", pretty_impl()) << BROKER_ARG(reason)); + if (err) { + BROKER_ERROR("cannot fail a handshake twice"); + } else { + visit_impl([](caf::unit_t&) {}, + [](auto& obj) { obj.state = fsm::fail_state; }); + err = std::move(reason); + if (!promises.empty()) { + for (auto& promise : promises) + promise.deliver(err); + promises.clear(); + } + owner->handshake_failed(err); + } +} + +int peer_handshake::state() const noexcept { + return visit_impl([](const caf::unit_t&) { return fsm::init_state; }, + [](const auto& obj) { return obj.state; }); +} + +caf::actor peer_handshake::self_hdl() { + return caf::actor_cast(owner->this_actor()); +} + +std::string peer_handshake::pretty_impl() const { + return visit_impl([](const caf::unit_t&) { return "indeterminate"; }, + [](const originator&) { return "originator"; }, + [](const responder&) { return "responder"; }); +} + +// -- FSM management ----------------------------------------------------------- + +bool peer_handshake::to_responder() { + if (!started()) { + impl = responder{this}; + return true; + } else { + return false; + } +} + +} // namespace broker::detail diff --git a/src/detail/store_actor.cc b/src/detail/store_actor.cc index 8be5f8cc..b48a4e21 100644 --- a/src/detail/store_actor.cc +++ b/src/detail/store_actor.cc @@ -8,7 +8,7 @@ namespace { template constexpr size_t vec_slots() { - if constexpr (std::is_same::value) + if constexpr (std::is_same::value) return 2; else return 1; @@ -27,7 +27,7 @@ void append(vector& xs, const optional& x) { xs.emplace_back(nil); } -void append(vector& xs, const publisher_id& x) { +void append(vector& xs, const entity_id& x) { if (x) { if (auto ep = to(x.endpoint)) { xs.emplace_back(std::move(*ep)); @@ -48,22 +48,30 @@ void fill_vector(vector& vec, const Ts&... xs) { } // namespace void store_actor_state::init(caf::event_based_actor* self, - endpoint::clock* clock, std::string&& id, - caf::actor&& core) { + endpoint_id this_endpoint, endpoint::clock* clock, + std::string&& store_name, caf::actor&& core) { +auto ep_str=to_string(this_endpoint); BROKER_ASSERT(self != nullptr); BROKER_ASSERT(clock != nullptr); this->self = self; this->clock = clock; - this->id = std::move(id); + this->store_name = std::move(store_name); + this->id.endpoint = this_endpoint; + this->id.object = self->id(); this->core = std::move(core); - this->dst = topics::store_events / this->id; + this->dst = topics::store_events / this->store_name; + auto& cfg = self->system().config(); + tick_interval = caf::get_or(cfg, "broker.store.tick-interval", + defaults::store::tick_interval); } +// -- event signaling ---------------------------------------------------------- + void store_actor_state::emit_insert_event(const data& key, const data& value, const optional& expiry, - const publisher_id& publisher) { + const entity_id& publisher) { vector xs; - fill_vector(xs, "insert"s, id, key, value, expiry, publisher); + fill_vector(xs, "insert"s, store_name, key, value, expiry, publisher); self->send(core, atom::publish_v, atom::local_v, make_data_message(dst, data{std::move(xs)})); } @@ -72,27 +80,46 @@ void store_actor_state::emit_update_event(const data& key, const data& old_value, const data& new_value, const optional& expiry, - const publisher_id& publisher) { + const entity_id& publisher) { vector xs; - fill_vector(xs, "update"s, id, key, old_value, new_value, expiry, publisher); + fill_vector(xs, "update"s, store_name, key, old_value, new_value, expiry, + publisher); self->send(core, atom::publish_v, atom::local_v, make_data_message(dst, data{std::move(xs)})); } void store_actor_state::emit_erase_event(const data& key, - const publisher_id& publisher) { + const entity_id& publisher) { vector xs; - fill_vector(xs, "erase"s, id, key, publisher); + fill_vector(xs, "erase"s, store_name, key, publisher); self->send(core, atom::publish_v, atom::local_v, make_data_message(dst, data{std::move(xs)})); } void store_actor_state::emit_expire_event(const data& key, - const publisher_id& publisher) { + const entity_id& publisher) { vector xs; - fill_vector(xs, "expire"s, id, key, publisher); + fill_vector(xs, "expire"s, store_name, key, publisher); self->send(core, atom::publish_v, atom::local_v, make_data_message(dst, data{std::move(xs)})); } +// -- callbacks for the behavior ----------------------------------------------- + +void store_actor_state::on_down_msg(const caf::actor_addr& source, + const error& reason) { + if (source == core) { + BROKER_INFO("core is down, quit"); + self->quit(reason); + return; + } + auto i = local_requests.begin(); + while (i != local_requests.end()) { + if (source == i->second.next()) + i = local_requests.erase(i); + else + ++i; + } +} + } // namespace broker::detail diff --git a/src/detail/unipath_manager.cc b/src/detail/unipath_manager.cc index f52131a4..3e6a4eb8 100644 --- a/src/detail/unipath_manager.cc +++ b/src/detail/unipath_manager.cc @@ -4,12 +4,13 @@ #include #include -#include +#include #include #include #include #include +#include "broker/alm/stream_transport.hh" #include "broker/defaults.hh" #include "broker/detail/assert.hh" #include "broker/detail/central_dispatcher.hh" @@ -27,42 +28,11 @@ bool ends_with(caf::string_view str, caf::string_view suffix) { && str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; } -// Checks whether a downstream of type T is eligible for inputs of given scope. -template -[[nodiscard]] constexpr bool is_eligible(item_scope x) noexcept { - if constexpr (std::is_same::value - || std::is_same::value) { - // Paths to data_message and command_message receivers always belong to - // local subscribers. - return x != item_scope::remote; - } else { - // Paths to node_message receivers always forward data to peers. - static_assert(std::is_same::value); - return x != item_scope::local; - } -} - -// Checks whether a downstream of type T is eligible a given message. -template -[[nodiscard]] bool is_eligible(const node_message& msg) noexcept { - if constexpr (std::is_same::value) { - // Paths to data_message receivers are always local subscribers. - return is_data_message(msg.content); - } else if constexpr (std::is_same::value) { - // Paths to command_message receivers are also always local subscribers. - return is_command_message(msg.content); - } else { - // Paths to node_message receivers are always peers. - static_assert(std::is_same::value); - return msg.ttl > 0; - } -} - // A downstream manager with at most one outbound path. template -class unipath_downstream : public caf::downstream_manager_base { +class unipath_downstream : public caf::downstream_manager { public: - using super = caf::downstream_manager_base; + using super = downstream_manager; using output_type = T; @@ -70,46 +40,20 @@ class unipath_downstream : public caf::downstream_manager_base { using unique_path_ptr = std::unique_ptr; - unipath_downstream(caf::stream_manager* parent) - : super(parent, caf::type_id_v) { - // nop - } - - ~unipath_downstream() { - if (!cache_.empty()) - super::dropped_messages(cache_.size()); - } + using super::super; - bool enqueue(item_scope scope, caf::span messages, - long pending_handshakes) { - BROKER_TRACE(BROKER_ARG(scope) - << BROKER_ARG(pending_handshakes) - << BROKER_ARG2("num-messages", messages.size())); - if (is_eligible(scope)) { - prefix_matcher matches_filter; - auto old_size = cache_.size(); - for (const auto& msg : messages) { - if (is_eligible(msg) && matches_filter(filter_, msg)) { - if constexpr (std::is_same::value) { - cache_.emplace_back(caf::get(msg.content)); - } else if constexpr (std::is_same::value) { - cache_.emplace_back(caf::get(msg.content)); - } else { - cache_.emplace_back(msg); - } - } - } - if (auto added = cache_.size() - old_size; added > 0) { - super::generated_messages(added); - if (path_) { - emit_batches_impl(false); - return true; - } else { - return pending_handshakes != 0; - } + void enqueue(const T& msg) { + if (path_) { + if constexpr (std::is_same::value) { + cache_.emplace_back(msg); + } else { + prefix_matcher accept; + if (accept(filter_, msg)) + cache_.emplace_back(msg); } + } else { + BROKER_DEBUG("no path available"); } - return path_ || pending_handshakes != 0; } bool has_path() const noexcept { @@ -126,7 +70,6 @@ class unipath_downstream : public caf::downstream_manager_base { bool remove_path(caf::stream_slot x, caf::error reason, bool silent) noexcept override { - BROKER_TRACE(BROKER_ARG(x) << BROKER_ARG(reason) << BROKER_ARG(silent)); if (is_this_slot(x)) { super::about_to_erase(path_.get(), silent, &reason); path_.reset(); @@ -142,23 +85,20 @@ class unipath_downstream : public caf::downstream_manager_base { } void emit_batches_impl(bool forced) { - auto old_size = cache_.size(); - path_->emit_batches(super::self(), cache_, forced || path_->closing); - auto new_size = cache_.size(); - if (auto shipped = old_size - new_size; shipped > 0) { - super::shipped_messages(shipped); - super::last_send_ = super::self()->now(); + if (!cache_.empty()) { + BROKER_DEBUG(BROKER_ARG2("cache.size", cache_.size())); + BROKER_ASSERT(path_ != nullptr); + auto old_size = cache_.size(); + path_->emit_batches(super::self(), cache_, forced || path_->closing); } } void emit_batches() override { - if (path_ && !cache_.empty()) - emit_batches_impl(false); + emit_batches_impl(false); } void force_emit_batches() override { - if (path_ && !cache_.empty()) - emit_batches_impl(true); + emit_batches_impl(true); } void clear_paths() override { @@ -189,7 +129,6 @@ class unipath_downstream : public caf::downstream_manager_base { } bool insert_path(unique_path_ptr ptr) override { - BROKER_TRACE(BROKER_ARG(ptr)); using std::swap; if (!path_) { swap(path_, ptr); @@ -228,40 +167,38 @@ class unipath_downstream : public caf::downstream_manager_base { std::vector cache_; }; -template -class unipath_manager_out : public unipath_manager { +template +class unipath_sink_impl : public Base { public: - using super = unipath_manager; + static_assert(std::is_base_of::value); + + using super = Base; - unipath_manager_out(central_dispatcher* dispatcher, + using message_type = typename super::message_type; + + unipath_sink_impl(central_dispatcher* dispatcher, unipath_manager::observer* observer) : super(dispatcher, observer), out_(this) { // nop } template - unipath_manager_out(central_dispatcher* dispatcher, + unipath_sink_impl(central_dispatcher* dispatcher, unipath_manager::observer* observer, Filter&& filter) - : unipath_manager_out(dispatcher, observer) { - BROKER_TRACE(BROKER_ARG(filter)); + : unipath_sink_impl(dispatcher, observer) { out_.filter_ = std::forward(filter); } - bool enqueue(const unipath_manager* source, item_scope scope, - caf::span xs) override { - if (source != this) { - return out_.enqueue(scope, xs, pending_handshakes_); - } else { - return true; - } + void enqueue(const message_type& msg) override { + BROKER_TRACE(BROKER_ARG(msg)); + out_.enqueue(msg); } - filter_type filter() override { + filter_type filter() const override { return out_.filter_; } void filter(filter_type new_filter) override { - BROKER_TRACE(BROKER_ARG(new_filter)); out_.filter_ = std::move(new_filter); } @@ -270,8 +207,8 @@ class unipath_manager_out : public unipath_manager { return accept(out_.filter_, t); } - caf::type_id_t message_type() const noexcept override { - return caf::type_id_v; + caf::type_id_t message_type_id() const noexcept override { + return caf::type_id_v; } caf::downstream_manager& out() override { @@ -282,7 +219,10 @@ class unipath_manager_out : public unipath_manager { auto open_paths = out_.num_paths() + this->pending_handshakes_ + this->inbound_paths_.size(); - return open_paths == 0 || !super::self_->has_behavior(); + return open_paths == 0 + || ((!this->running() || super::dispatcher_->tearing_down()) + && this->inbound_paths_idle() + && out_.clean()); } bool idle() const noexcept override { @@ -307,8 +247,15 @@ class unipath_manager_out : public unipath_manager { this->closing(false, caf::sec::runtime_error); return false; } else if (caf::stream_manager::handle(slots, x)) { - if (auto ptr = this->observer_) - ptr->downstream_connected(this, caf::actor_cast(rebind_to)); + if constexpr (std::is_same::value) { + if (!this->handshake_.handle_ack_open_msg()) { + auto reason = this->handshake_.err; + this->closing(false, reason); + return false; + } + if (this->handshake_.done()) + this->downstream_connected(caf::actor_cast(rebind_to)); + } return true; } else { BROKER_ERROR("unipath manager failed to process ack_open!"); @@ -318,179 +265,156 @@ class unipath_manager_out : public unipath_manager { } protected: - unipath_downstream out_; + unipath_downstream out_; }; -class unipath_manager_in_only : public unipath_manager { -public: - using super = unipath_manager; +} // namespace - unipath_manager_in_only(central_dispatcher* dispatcher, - unipath_manager::observer* observer) - : super(dispatcher, observer), out_(this) { - // nop - } +namespace { - bool enqueue(const unipath_manager*, item_scope, - caf::span) override { - return false; - } +template +struct pending_buf { + std::vector xs; - filter_type filter() override { - return {}; + void add(T&& x) { + if (!x.unique()) + xs.emplace_back(std::move(x)); } - void filter(filter_type) override { - // nop + void drop_shipped() { + auto is_shipped = [](const T& msg) { return msg.unique(); }; + xs.erase(std::remove_if(xs.begin(), xs.end(), is_shipped), xs.end()); } - bool accepts(const topic&) const noexcept override { - return false; + [[nodiscard]] size_t size() const noexcept { + return xs.size(); } +}; - caf::downstream_manager& out() override { - return out_; +template <> +struct pending_buf { + pending_buf data_msgs; + pending_buf command_msgs; + + void add(node_message_content&& x) { + if (is_data_message(x)) + data_msgs.add(std::move(get_data_message(x))); + else + command_msgs.add(std::move(get_command_message(x))); } - bool done() const override { - return inbound_paths_.empty() || !super::self_->has_behavior(); + void drop_shipped() { + data_msgs.drop_shipped(); + command_msgs.drop_shipped(); } - bool idle() const noexcept override { - return inbound_paths_idle(); + [[nodiscard]] size_t size() const noexcept { + return data_msgs.size() + command_msgs.size(); } +}; -protected: - caf::downstream_manager out_; +template <> +struct pending_buf : pending_buf { + // nop }; -template -class unipath_manager_in : public Base { +template +class unipath_manager_source_impl : public Base { public: using super = Base; template - explicit unipath_manager_in(central_dispatcher* dispatcher, + explicit unipath_manager_source_impl(central_dispatcher* dispatcher, unipath_manager::observer* observer, Ts&&... xs) : super(dispatcher, observer, std::forward(xs)...) { auto sptr = super::self(); auto& cfg = sptr->system().config(); - if (!std::is_same::value - || caf::get_or(cfg, "broker.forward", true)) { - ttl_ = caf::get_or(cfg, "broker.ttl", defaults::ttl); - } else { - // Set TTL to 0 when forwarding was disabled and this manager receives - // node messages from other peers. This causes all peer managers to drop - // node messages instead of forwarding them. - ttl_ = 0; - } - } - - void block_inputs() override { - block_inputs_ = true; } void unblock_inputs() override { - if (block_inputs_) { - block_inputs_ = false; + if (!blocked_messages_.empty()) { + auto sptr = this->self(); + auto& bs = sptr->bhvr_stack(); + if (bs.empty()) { + BROKER_ERROR("failed to unblock inputs: actor has no behavior"); + } else { + auto bhvr = bs.back(); + for (auto& msg : blocked_messages_) { + if (auto res = bhvr(msg); !res) { + BROKER_ERROR("failed to unblock inputs: no match for" << msg); + } else if (!res->empty()) { + BROKER_ERROR("peer tried to respond to an unblocked input with" + << *res); + } + } + } + blocked_messages_.clear(); + } + if (!blocked_batches_.empty()) { for (auto& batch : blocked_batches_) handle(nullptr, batch); blocked_batches_.clear(); } } - bool blocks_inputs() override { - return block_inputs_; + void add_blocked_input(caf::message msg) override { + BROKER_ASSERT(this->blocks_inputs()); + blocked_messages_.emplace_back(std::move(msg)); } - caf::type_id_t message_type() const noexcept override { + caf::type_id_t message_type_id() const noexcept override { return caf::type_id_v; } bool idle() const noexcept override { - // The difference from the default implementation is that we do *not* check - // for out_.stalled(). This is because we limit credit by in-flights from - // this manager rather than by available credit downstream. - return this->out_.clean() && this->inbound_paths_idle(); - } - - using super::handle; - - void handle_batch(std::vector& xs) { - auto old_size = pending_.size(); - for (auto& x : xs) { - if (x.ttl == 0) { - BROKER_WARNING("received node message with TTL 0: dropped"); - continue; - } - // Somewhat hacky, but don't forward data store clone messages. - auto ttl = ends_with(get_topic(x).string(), topics::clone_suffix.string()) - ? uint16_t{0} - : std::min(ttl_, static_cast(x.ttl - 1)); - x.ttl = ttl; - // We are using the reference count as a means to detect whether all - // receivers have processed the message. Hence, we must make sure that the - // reference count to the message's content is 1 at this point. - force_unshared(x); - pending_.emplace_back(std::move(x)); - } - if (auto added = pending_.size() - old_size; added > 0) { - auto ys = caf::make_span(std::addressof(pending_[old_size]), added); - super::dispatcher_->enqueue(this, item_scope::global, ys); + if constexpr (std::is_same::value) { + return super::idle(); + } else { + // The difference from the default implementation is that we do *not* + // check for out_.stalled(). This is because we limit credit by in-flights + // from this manager rather than by available credit downstream. + return this->out_.clean() && this->inbound_paths_idle(); } } - template - void handle_batch(std::vector& xs) { - auto old_size = pending_.size(); - for (auto& x : xs) { - force_unshared(x); - pending_.emplace_back(make_node_message(std::move(x), ttl_)); - } - if (auto added = pending_.size() - old_size; added > 0) { - auto ys = caf::make_span(std::addressof(pending_[old_size]), added); - super::dispatcher_->enqueue(this, item_scope::remote, ys); - } - } + using super::handle; void handle(caf::inbound_path*, caf::downstream_msg::batch& b) override { BROKER_TRACE(BROKER_ARG(b)); - BROKER_DEBUG(BROKER_ARG2("batch.size", b.xs_size) - << BROKER_ARG(block_inputs_)); - if (block_inputs_) { - blocked_batches_.push_back(std::move(b)); + BROKER_DEBUG(BROKER_ARG2("batch.size", b.xs_size)); + if (this->blocks_inputs()) { + blocked_batches_.emplace_back(std::move(b)); } else if (auto view = caf::make_typed_message_view>(b.xs)) { - handle_batch(get<0>(view)); + for (auto& x : get<0>(view)) { + if constexpr (std::is_same::value){ + force_unshared(get<0>(x.unshared())); + auto content = get_content(x); + super::dispatcher_->dispatch(std::move(x)); + pending_.add(std::move(content)); + } else { + force_unshared(x); + super::dispatcher_->dispatch(x); + pending_.add(std::move(x)); + } + } } else { BROKER_ERROR("received unexpected batch type (dropped)"); } } int32_t acquire_credit(caf::inbound_path* in, int32_t desired) override { - // Drop pending inputs that are no longer referenced by output paths. - auto is_shipped = [](const node_message& msg) { - if (is_data_message(msg)) { - return caf::get(msg.content).unique(); - } else { - return caf::get(msg.content).unique(); - } - }; - pending_.erase(std::remove_if(pending_.begin(), pending_.end(), is_shipped), - pending_.end()); // Limit credit by pending (in-flight) messages from this path. + pending_.drop_shipped(); auto total = in->assigned_credit + desired; auto used = static_cast(pending_.size()); - if (auto delta = total - used; delta > 0) - return delta; - else - return 0; + return std::max(total - used, int32_t{0}); } private: - uint16_t ttl_; - bool block_inputs_ = false; + std::vector buf_; + std::vector blocked_messages_; std::vector blocked_batches_; - std::vector pending_; + pending_buf pending_; }; } // namespace @@ -502,7 +426,7 @@ unipath_manager::observer::~observer() { } unipath_manager::unipath_manager(central_dispatcher* dispatcher, observer* obs) - : super(dispatcher->self()), dispatcher_(dispatcher), observer_(obs) { + : super(dispatcher->this_actor()), dispatcher_(dispatcher), observer_(obs) { // nop } @@ -510,24 +434,22 @@ unipath_manager::~unipath_manager() { // nop } -void unipath_manager::block_inputs() { - // nop -} - -void unipath_manager::unblock_inputs() { - // nop -} - -bool unipath_manager::blocks_inputs() { +bool unipath_manager::blocks_inputs() const noexcept { return false; } -bool unipath_manager::has_inbound_path() const noexcept { - return inbound_paths().size() == 1; -} - -bool unipath_manager::has_outbound_path() const noexcept { - return out().num_paths() == 1; +caf::actor unipath_manager::hdl() const noexcept { + if (auto& vec = inbound_paths(); vec.size() == 1) { + return caf::actor_cast(vec[0]->hdl); + } else { + // We only ever have 0 or 1 path. Hence, for_each_path gets called either + // once or not at all. + caf::actor result; + out().for_each_path([&](const caf::outbound_path& x) { + result = caf::actor_cast(x.hdl); + }); + return result; + } } caf::stream_slot unipath_manager::inbound_path_slot() const noexcept { @@ -546,18 +468,20 @@ caf::stream_slot unipath_manager::outbound_path_slot() const noexcept { return caf::invalid_stream_slot; } -caf::actor unipath_manager::hdl() const noexcept { - if (auto& vec = inbound_paths(); vec.size() == 1) { - return caf::actor_cast(vec[0]->hdl); - } else { - // We only ever have 0 or 1 path. Hence, for_each_path gets called either - // once or not at all. - caf::actor result; - out().for_each_path([&](const caf::outbound_path& x) { - result = caf::actor_cast(x.hdl); - }); - return result; - } +caf::event_based_actor* unipath_manager::this_actor() noexcept { + return dispatcher_->this_actor(); +} + +endpoint_id unipath_manager::this_endpoint() const { + return dispatcher_->this_endpoint(); +} + +filter_type unipath_manager::local_filter() const { + return dispatcher_->local_filter(); +} + +alm::lamport_timestamp unipath_manager::local_timestamp() const noexcept { + return dispatcher_->local_timestamp(); } bool unipath_manager::congested(const caf::inbound_path&) const noexcept { @@ -571,8 +495,9 @@ bool unipath_manager::congested(const caf::inbound_path&) const noexcept { void unipath_manager::handle(caf::inbound_path* path, caf::downstream_msg::close& x) { - closing(true, {}); super::handle(path, x); + if (unconnected()) + closing(true, {}); } void unipath_manager::handle(caf::inbound_path* path, @@ -583,8 +508,9 @@ void unipath_manager::handle(caf::inbound_path* path, void unipath_manager::handle(caf::stream_slots slots, caf::upstream_msg::drop& x) { - closing(true, {}); super::handle(slots, x); + if (unconnected()) + closing(true, {}); } void unipath_manager::handle(caf::stream_slots slots, @@ -600,63 +526,160 @@ void unipath_manager::closing(bool graceful, const caf::error& reason) { } } -// -- free functions ----------------------------------------------------------- +void unipath_manager::downstream_connected(caf::actor hdl) { + if (observer_) + observer_->downstream_connected(this, std::move(hdl)); +} + +// -- data_sink ---------------------------------------------------------------- -unipath_manager_ptr make_data_source(central_dispatcher* dispatcher) { - using impl_t = unipath_manager_in; - return caf::make_counted(dispatcher, nullptr); +unipath_data_sink::~unipath_data_sink() { + // nop } -unipath_manager_ptr make_command_source(central_dispatcher* dispatcher) { - using impl_t = unipath_manager_in; - return caf::make_counted(dispatcher, nullptr); +unipath_manager::derived_pointer unipath_data_sink::derived_ptr() noexcept { + return this; } -unipath_manager_ptr make_source(central_dispatcher* dispatcher, - caf::stream in) { - auto mgr = make_data_source(dispatcher); - mgr->add_unchecked_inbound_path(in); - return mgr; +unipath_data_sink_ptr make_unipath_data_sink(central_dispatcher* dispatcher, + filter_type filter) { + using impl_t = unipath_sink_impl; + return caf::make_counted(dispatcher, nullptr, std::move(filter)); } -unipath_manager_ptr make_source(central_dispatcher* dispatcher, - caf::stream in) { - auto mgr = make_command_source(dispatcher); - mgr->add_unchecked_inbound_path(in); - return mgr; +// -- command_sink ---------------------------------------------------------------- + +unipath_command_sink::~unipath_command_sink() { + // nop +} + +unipath_manager::derived_pointer unipath_command_sink::derived_ptr() noexcept { + return this; +} + +unipath_command_sink_ptr +make_unipath_command_sink(central_dispatcher* dispatcher, filter_type filter) { + using impl_t = unipath_sink_impl; + return caf::make_counted(dispatcher, nullptr, std::move(filter)); +} + +// -- peer_manager ------------------------------------------------------------- + +peer_manager::peer_manager(central_dispatcher* cp, observer* obs) + : super(cp, obs), handshake_(this) { + // nop +} + +peer_manager::~peer_manager() { + // nop +} + +bool peer_manager::handshake_completed() const noexcept { + return handshake_.done(); +} + +bool peer_manager::blocks_inputs() const noexcept { + return !handshake_completed(); } -unipath_manager_ptr make_source(central_dispatcher* dispatcher, - caf::stream in) { - using impl_t = unipath_manager_in; +void peer_manager::handshake_failed(error reason) { + if (observer_) + observer_->abort_handshake(this); +} + +bool peer_manager::finalize_handshake() { + BROKER_TRACE(""); + if (observer_) { + if (observer_->finalize_handshake(this)) { + unblock_inputs(); + return true; + } else { + return false; + } + } else { + BROKER_ERROR("cannot finalize a peer_manager without an observer"); + return false; + } +} + +void peer_manager::closing(bool graceful, const caf::error& reason) { + if (!handshake_completed()) + handshake_.fail(ec::peer_disconnect_during_handshake); + super::closing(graceful, reason); +} + +unipath_manager::derived_pointer peer_manager::derived_ptr() noexcept { + return this; +} + +peer_manager_ptr make_peer_manager(central_dispatcher* dispatcher, + unipath_manager::observer* observer) { + using base_t = unipath_sink_impl; + using impl_t = unipath_manager_source_impl; + return caf::make_counted(dispatcher, observer); +} + +peer_manager_ptr make_peer_manager(alm::stream_transport* transport) { + return make_peer_manager(transport, transport); +} + +// -- unipath_source ----------------------------------------------------------- + +unipath_source::~unipath_source() { + // nop +} + +unipath_manager::derived_pointer unipath_source::derived_ptr() noexcept { + return this; +} + +filter_type unipath_source::filter() const { + return {}; +} + +void unipath_source::filter(filter_type) { + // nop +} + +bool unipath_source::accepts(const topic&) const noexcept { + return false; +} + +caf::downstream_manager& unipath_source::out() { + return out_; +} + +bool unipath_source::done() const { + return inbound_paths_.empty() + || (super::dispatcher_->tearing_down() && inbound_paths_idle()); +} + +bool unipath_source::idle() const noexcept { + return inbound_paths_idle(); +} + +unipath_source_ptr make_unipath_source(central_dispatcher* dispatcher, + caf::stream in) { + using impl_t = unipath_manager_source_impl; auto mgr = caf::make_counted(dispatcher, nullptr); mgr->add_unchecked_inbound_path(in); return mgr; } -unipath_manager_ptr make_data_sink(central_dispatcher* dispatcher, - filter_type filter) { - using impl_t = unipath_manager_out; - auto ptr = caf::make_counted(dispatcher, nullptr, std::move(filter)); - dispatcher->add(ptr); - return ptr; -} - -unipath_manager_ptr make_command_sink(central_dispatcher* dispatcher, - filter_type filter) { - using impl_t = unipath_manager_out; - auto ptr = caf::make_counted(dispatcher, nullptr, std::move(filter)); - dispatcher->add(ptr); - return ptr; +unipath_source_ptr make_unipath_source(central_dispatcher* dispatcher, + caf::stream in) { + using impl_t = unipath_manager_source_impl; + auto mgr = caf::make_counted(dispatcher, nullptr); + mgr->add_unchecked_inbound_path(in); + return mgr; } -unipath_manager_ptr make_peer_manager(central_dispatcher* dispatcher, - unipath_manager::observer* observer) { - using base_t = unipath_manager_out; - using impl_t = unipath_manager_in; - auto ptr = caf::make_counted(dispatcher, observer); - ptr->block_inputs(); - return ptr; +unipath_source_ptr make_unipath_source(central_dispatcher* dispatcher, + caf::stream in) { + using impl_t = unipath_manager_source_impl; + auto mgr = caf::make_counted(dispatcher, nullptr); + mgr->add_unchecked_inbound_path(in); + return mgr; } } // namespace broker::detail diff --git a/src/domain_options.cc b/src/domain_options.cc new file mode 100644 index 00000000..6469816c --- /dev/null +++ b/src/domain_options.cc @@ -0,0 +1,16 @@ +#include "broker/domain_options.hh" + +#include + +namespace broker { + +void domain_options::save(caf::settings& sink) { + caf::put(sink, "broker.disable-forwarding", disable_forwarding); +} + +void domain_options::load(const caf::settings& source) { + using caf::get_or; + disable_forwarding = get_or(source, "broker.disable-forwarding", false); +} + +} // namespace broker diff --git a/src/endpoint.cc b/src/endpoint.cc index 619ab364..3ac5dc99 100644 --- a/src/endpoint.cc +++ b/src/endpoint.cc @@ -166,7 +166,6 @@ void pretty_print(std::ostream& out, const caf::settings& xs, endpoint::endpoint(configuration config) : config_(std::move(config)), - await_stores_on_shutdown_(false), destroyed_(false) { // Stop immediately if any helptext was printed. if (config_.cli_helptext_printed) @@ -195,41 +194,46 @@ endpoint::endpoint(configuration config) clock_ = new clock(&system_, opts.use_real_time); if (system_.has_openssl_manager() || opts.disable_ssl) { BROKER_INFO("creating endpoint"); - core_ = system_.spawn(filter_type{}, opts, clock_); + core_ = system_.spawn(filter_type{}, clock_); } else { - detail::die("SSL is enabled but CAF OpenSSL manager is not available"); + detail::die("CAF OpenSSL manager is not available"); } } endpoint::~endpoint() { - BROKER_INFO("destroying endpoint"); shutdown(); } void endpoint::shutdown() { - BROKER_INFO("shutting down endpoint"); if (destroyed_) return; - destroyed_ = true; - if (!await_stores_on_shutdown_) { - BROKER_DEBUG("tell core actor to terminate stores"); - anon_send(core_, atom::shutdown_v, atom::store_v); - } - if (!children_.empty()) { + // Lifetime scope of the BROKER_TRACE object: must go out of scope before + // calling the destructor of caf::actor_system. + { + BROKER_TRACE(""); + BROKER_INFO("shutting down endpoint"); caf::scoped_actor self{system_}; - BROKER_DEBUG("send exit messages to all children"); - for (auto& child : children_) - // exit_reason::kill seems more reliable than - // exit_reason::user_shutdown in terms of avoiding deadlocks/hangs, - // possibly due to the former having more explicit logic that will - // shut down streams. - self->send_exit(child, caf::exit_reason::kill); - BROKER_DEBUG("wait until all children have terminated"); - self->wait_for(children_); - children_.clear(); + BROKER_DEBUG("send shutdown message to core actor"); + self->monitor(core_); + self->send(core_, atom::shutdown_v, shutdown_options_); + self->receive( // Give the core 5s time to shut down gracefully. + [](const caf::down_msg&) {}, + caf::after(std::chrono::seconds(5)) >> + [&] { + BROKER_WARNING("endpoint failed to shut down gracefully, kill"); + self->send_exit(core_, caf::exit_reason::kill); + self->wait_for(core_); + }); + if (!children_.empty()) { + BROKER_DEBUG("kill remaining children if the core failed to stop them"); + for (auto& child : children_) + self->send_exit(child, caf::exit_reason::kill); + BROKER_DEBUG("wait until all children have terminated"); + self->wait_for(children_); + children_.clear(); + } } - BROKER_DEBUG("send shutdown message to core actor"); - anon_send(core_, atom::shutdown_v); + destroyed_ = true; core_ = nullptr; system_.~actor_system(); delete clock_; @@ -237,6 +241,7 @@ void endpoint::shutdown() { } uint16_t endpoint::listen(const std::string& address, uint16_t port) { + BROKER_TRACE(BROKER_ARG(address) << BROKER_ARG(port)); BROKER_INFO("listening on" << (address + ":" + std::to_string(port)) << (config_.options().disable_ssl ? "(no SSL)" : "(SSL)")); @@ -260,7 +265,7 @@ bool endpoint::peer(const std::string& address, uint16_t port, self->request(core_, caf::infinite, atom::peer_v, network_info{address, port, retry}) .receive( - [&](const caf::actor&) { + [&](atom::peer, atom::ok, const endpoint_id&) { result = true; }, [&](caf::error& err) { @@ -271,6 +276,16 @@ bool endpoint::peer(const std::string& address, uint16_t port, return result; } +bool endpoint::peer(const caf::uri& locator, timeout::seconds retry) { + BROKER_TRACE(BROKER_ARG(locator) << BROKER_ARG(retry)); + if (auto info = to(locator)) { + return peer(info->address, info->port, retry); + } else { + BROKER_INFO("invalid URI:" << locator); + return false; + } +} + void endpoint::peer_nosync(const std::string& address, uint16_t port, timeout::seconds retry) { BROKER_TRACE(BROKER_ARG(address) << BROKER_ARG(port)); @@ -406,6 +421,7 @@ caf::actor endpoint::make_actor(actor_init_fun f) { expected endpoint::attach_master(std::string name, backend type, backend_options opts) { + BROKER_TRACE(BROKER_ARG(name) << BROKER_ARG(type) << BROKER_ARG(opts)); BROKER_INFO("attaching master store" << name << "of type" << type); expected res{ec::unspecified}; caf::scoped_actor self{system_}; @@ -426,6 +442,9 @@ expected endpoint::attach_clone(std::string name, double resync_interval, double stale_interval, double mutation_buffer_interval) { + BROKER_TRACE(BROKER_ARG(name) + << BROKER_ARG(resync_interval) << BROKER_ARG(stale_interval) + << BROKER_ARG(mutation_buffer_interval)); BROKER_INFO("attaching clone store" << name); expected res{ec::unspecified}; caf::scoped_actor self{core()->home_system()}; @@ -442,4 +461,40 @@ expected endpoint::attach_clone(std::string name, return res; } +bool endpoint::await_peer(endpoint_id whom, timespan timeout) { + BROKER_TRACE(BROKER_ARG(whom) << BROKER_ARG(timeout)); + bool result = false; + caf::scoped_actor self{core()->home_system()}; + self->request(core(), timeout, atom::await_v, whom) + .receive( + [&]([[maybe_unused]] endpoint_id& discovered) { + BROKER_ASSERT(whom == discovered); + result = true; + }, + [&](caf::error& e) { + // nop + }); + return result; +} + +void endpoint::await_peer(endpoint_id whom, std::function callback, + timespan timeout) { + BROKER_TRACE(BROKER_ARG(whom) << BROKER_ARG(timeout)); + if (!callback) { + BROKER_ERROR("invalid callback received for await_peer"); + return; + } + auto f = [whom, cb{std::move(callback)}](caf::event_based_actor* self, + caf::actor core, timespan t) { + self->request(core, t, atom::await_v, whom) + .then( + [&]([[maybe_unused]] endpoint_id& discovered) { + BROKER_ASSERT(whom == discovered); + cb(true); + }, + [&](caf::error& e) { cb(false); }); + }; + core()->home_system().spawn(f, core(), timeout); +} + } // namespace broker diff --git a/src/endpoint_info.cc b/src/endpoint_info.cc index 5fbe0454..01b1f28e 100644 --- a/src/endpoint_info.cc +++ b/src/endpoint_info.cc @@ -78,7 +78,12 @@ std::string to_string(const endpoint_info& x) { std::string result = "endpoint_info("; result += to_string(x.node); result += ", "; - result += caf::deep_to_string(x.network); + if (auto& net = x.network) { + result += '*'; + result += to_string(*net); + } else { + result += "none"; + } result += ')'; return result; } diff --git a/src/publisher_id.cc b/src/entity_id.cc similarity index 67% rename from src/publisher_id.cc rename to src/entity_id.cc index ea29d63c..4ca9f6f1 100644 --- a/src/publisher_id.cc +++ b/src/entity_id.cc @@ -1,17 +1,17 @@ -#include "broker/publisher_id.hh" +#include "broker/entity_id.hh" #include namespace broker { -size_t publisher_id ::hash() const { +size_t entity_id ::hash() const noexcept { return caf::hash::fnv::compute(*this); } -std::string to_string(const publisher_id& x) { +std::string to_string(const entity_id& x) { using std::to_string; std::string result; - if (x) { + if (x.object != 0 || x.endpoint) { result = to_string(x.object); result += "@"; result += to_string(x.endpoint); diff --git a/src/error.cc b/src/error.cc index 389acb16..11e57de5 100644 --- a/src/error.cc +++ b/src/error.cc @@ -31,14 +31,32 @@ const char* ec_names[] = { "invalid_topic_key", "end_of_file", "invalid_tag", + "invalid_message", "invalid_status", + "conversion_failed", + "consumer_exists", + "connection_timeout", + "bad_member_function_call", + "repeated_request_id", + "broken_clone", + "shutting_down", + "invalid_peering_request", + "repeated_peering_handshake_request", + "unexpected_handshake_message", + "invalid_handshake_state", + "no_path_to_peer", }; +template +constexpr size_t array_size(const T (&)[N]) { + return N; +} + } // namespace std::string to_string(ec code) noexcept { auto index = static_cast(code); - BROKER_ASSERT(index < sizeof(ec_names)); + BROKER_ASSERT(index < array_size(ec_names)); return ec_names[index]; } diff --git a/src/gateway.cc b/src/gateway.cc new file mode 100644 index 00000000..6ecdf8d5 --- /dev/null +++ b/src/gateway.cc @@ -0,0 +1,133 @@ +#include "broker/gateway.hh" + +#include +#include +#include + +#include "broker/configuration.hh" +#include "broker/core_actor.hh" + +namespace broker { + +// -- member types ------------------------------------------------------------- + +struct gateway::impl { + // -- constructors, destructors, and assignment operators -------------------- + + impl(configuration&& source_config, + const domain_options* adapt_internal = nullptr, + const domain_options* adapt_external = nullptr) + : cfg(std::move(source_config)), sys(cfg) { + // Spin up two cores. + internal = sys.spawn(filter_type{}, nullptr, adapt_internal); + external = sys.spawn(filter_type{}, nullptr, adapt_external); + gateway::setup(internal, external); + } + + // -- member variables ------------------------------------------------------- + + configuration cfg; + caf::actor_system sys; + caf::actor internal; + caf::actor external; +}; + +// -- constructors, destructors, and assignment operators ---------------------- + +gateway::~gateway() { + // Must appear out-of-line because of ptr_. +} + +gateway::gateway(std::unique_ptr&& ptr) : ptr_(std::move(ptr)) { + // nop +} + +expected gateway::make(configuration cfg, + domain_options internal_adaptation, + domain_options external_adaptation) { + return gateway{std::make_unique(std::move(cfg), &internal_adaptation, + &external_adaptation)}; +} + +expected gateway::make() { + return gateway{std::make_unique(configuration{})}; +} + +// -- setup -------------------------------------------------------------------- + +void gateway::setup(const caf::actor& internal, const caf::actor& external) { + caf::anon_send(internal, atom::join_v, external, filter_type{""}); + caf::anon_send(external, atom::join_v, internal, filter_type{""}); +} + +void gateway::shutdown(){ + anon_send(internal_core(), atom::shutdown_v); + anon_send(external_core(), atom::shutdown_v); +} + +// -- properties --------------------------------------------------------------- + +const caf::actor& gateway::internal_core() const noexcept { + return ptr_->internal; +} + +const caf::actor& gateway::external_core() const noexcept { + return ptr_->external; +} + +const configuration& gateway::config() const noexcept { + return ptr_->cfg; +} + +// --- peer management --------------------------------------------------------- + +uint16_t gateway::listen_external(const std::string& address, uint16_t port) { + return listen_impl(ptr_->external, address, port); +} + +uint16_t gateway::listen_internal(const std::string& address, uint16_t port) { + return listen_impl(ptr_->internal, address, port); +} + +uint16_t gateway::listen_impl(const caf::actor& core, + const std::string& address, uint16_t port) { + char const* addr = address.empty() ? nullptr : address.c_str(); + using pub_fun = caf::expected (*)(const caf::actor& hdl, uint16_t, + const char*, bool); + pub_fun publish; + if (ptr_->cfg.options().disable_ssl) + publish = caf::io::publish; + else + publish = caf::openssl::publish; + if (auto res = publish(core, port, addr, true)) + return *res; + return 0; +} + +std::map +gateway::peer(const std::vector& internal_peers, + const std::vector& external_peers, + timeout::seconds retry) { + std::map failures; + caf::scoped_actor self{ptr_->sys}; + auto f = [&](const auto& peers, const auto& core) { + for (const auto& peer : peers) { + if (auto info = to(peer)) { + info->retry = retry; + self->request(core, caf::infinite, atom::peer_v, std::move(*info)) + .receive( + [&](const caf::actor&) { + // success + }, + [&](caf::error& err) { failures.emplace(peer, err); }); + } else { + failures.emplace(peer, ec::conversion_failed); + } + } + }; + f(internal_peers, ptr_->internal); + f(external_peers, ptr_->external); + return failures; +} + +} // namespace broker diff --git a/src/internal_command.cc b/src/internal_command.cc index bc279912..31653afd 100644 --- a/src/internal_command.cc +++ b/src/internal_command.cc @@ -2,8 +2,17 @@ namespace broker { -internal_command::internal_command(variant_type x) : content(std::move(x)) { - // nop -} +#define TO_STRING_CASE(name) \ + name: \ + return #name; +std::string to_string(command_tag x) { + switch (x) { + TO_STRING_CASE(action) + TO_STRING_CASE(producer_control) + TO_STRING_CASE(consumer_control) + default: + return "???"; + } +} } // namespace broker diff --git a/src/message.cc b/src/message.cc index a1df36fc..112c8703 100644 --- a/src/message.cc +++ b/src/message.cc @@ -8,9 +8,38 @@ std::string to_string(const data_message& msg) { return caf::deep_to_string(msg.data()); } - std::string to_string(const command_message& msg) { return caf::deep_to_string(msg.data()); } +std::string to_string(const node_message& msg) { + return caf::deep_to_string(msg.data()); +} + } // namespace broker + +namespace broker::detail{ + +namespace { + +thread_local topic_cache_type topic_cache; + +thread_local path_cache_type path_cache; + +thread_local content_buf_type content_buf; + +} // namespace + +topic_cache_type& thread_local_topic_cache() { + return topic_cache; +} + +path_cache_type& thread_local_path_cache() { + return path_cache; +} + +content_buf_type& thread_local_content_buf() { + return content_buf; +} + +} // namespace broker::detail diff --git a/src/network_info.cc b/src/network_info.cc index 39cfc4c0..88be4d18 100644 --- a/src/network_info.cc +++ b/src/network_info.cc @@ -5,7 +5,9 @@ #include #include -#include "caf/uri.hpp" +#include + +#include "broker/network_info.hh" namespace broker { diff --git a/src/publisher.cc b/src/publisher.cc index 95587382..4bff7c33 100644 --- a/src/publisher.cc +++ b/src/publisher.cc @@ -30,7 +30,7 @@ struct publisher_worker_state { size_t counter = 0; bool shutting_down = false; - static const char* name; + static inline const char* name = "broker.publisher"; void tick() { if (buf.size() < sample_size) { @@ -49,8 +49,6 @@ struct publisher_worker_state { } }; -const char* publisher_worker_state::name = "publisher_worker"; - behavior publisher_worker(stateful_actor* self, endpoint* ep, detail::shared_publisher_queue_ptr<> qptr) { diff --git a/src/shutdown_options.cc b/src/shutdown_options.cc new file mode 100644 index 00000000..a870d2ef --- /dev/null +++ b/src/shutdown_options.cc @@ -0,0 +1,29 @@ +#include "broker//shutdown_options.hh" + +namespace { + +static constexpr const char* shutdown_options_strings[] = { + "nullopt", + "await_stores_on_shutdown", +}; + +void append(std::string& result, broker::shutdown_options::flag flag) { + if (result.back() != '(') + result += ", "; + result += shutdown_options_strings[static_cast(flag)]; +} + +} // namespace + +namespace broker { + +std::string to_string(shutdown_options options) { + std::string result = "shutdown_options("; + for (auto flag : {shutdown_options::await_stores_on_shutdown}) + if (options.contains(flag)) + append(result, flag); + result += ')'; + return result; +} + +} // namespace broker diff --git a/src/status.cc b/src/status.cc index 2aeb094b..f26c8153 100644 --- a/src/status.cc +++ b/src/status.cc @@ -92,23 +92,36 @@ bool operator==(sc x, const status& y) { return y == x; } -std::string to_string(const status& s) { - std::string result = to_string(s.code()); +namespace { + +template +std::string status_to_string_impl(const StatusOrView& x) { + std::string result = to_string(x.code()); result += '('; - if (s.context_.node) { - result += to_string(s.context_.node); - if (s.context_.network) { + if (auto ctx = x.context()) { + result += to_string(ctx->node); + if (ctx->network) { result += ", "; - result += to_string(*s.context_.network); + result += to_string(*ctx->network); } result += ", "; } result += '"'; - result += to_string(s.message_); + result += *x.message(); result += "\")"; return result; } +} // namespace + +std::string to_string(const status& x) { + return status_to_string_impl(x); +} + +std::string to_string(status_view x) { + return status_to_string_impl(x); +} + bool convertible_to_status(const vector& xs) noexcept { if (xs.size() != 4 || !is(xs[0])) return false; @@ -180,24 +193,6 @@ optional status_view::context() const { return {std::move(ei)}; } -std::string to_string(status_view s) { - std::string result = to_string(s.code()); - result += '('; - if (auto ctx = s.context()) { - result += to_string(ctx->node); - if (ctx->network) { - result += ", "; - result += to_string(*ctx->network); - } - result += ", "; - } - result += '"'; - if (auto msg = s.message()) - result += *msg; - result += "\")"; - return result; -} - status_view status_view::make(const data& src) { return status_view{convertible_to_status(src) ? &get(src) : nullptr}; } diff --git a/src/store.cc b/src/store.cc index 984f65ab..0322f2e1 100644 --- a/src/store.cc +++ b/src/store.cc @@ -1,25 +1,122 @@ -#include -#include +#include "broker/store.hh" -#include "broker/logger.hh" +#include +#include #include #include #include +#include #include +#include #include #include -#include "broker/store.hh" +#include "broker/detail/flare_actor.hh" +#include "broker/detail/store_state.hh" #include "broker/expected.hh" #include "broker/internal_command.hh" -#include "broker/detail/flare_actor.hh" +#include "broker/logger.hh" + +/// Checks whether the store has been initialized and logs an error message +/// otherwise before "returning" void. +#define CHECK_INITIALIZED_VOID() \ + do { \ + if (!initialized()) { \ + BROKER_ERROR(__func__ << "called on an uninitialized store"); \ + return; \ + } \ + } while (false) + +namespace { + +template +auto make_internal_command(Ts&&... xs) { + using namespace broker; + return internal_command{0, entity_id::nil(), T{std::forward(xs)...}}; +} + +template +broker::expected +fetch(const broker::detail::weak_store_state_ptr& state, Ts&&... xs) { + using namespace broker; + if (auto ptr = state.lock()) + return ptr->request(std::forward(xs)...); + return make_error(ec::bad_member_function_call, + "store state not initialized"); +} + +template +broker::expected +with_state(const broker::detail::weak_store_state_ptr& state, F f) { + using namespace broker; + if (auto ptr = state.lock()) + return f(*ptr); + return make_error(ec::bad_member_function_call, + "store state not initialized"); +} + +} // namespace using namespace broker::detail; namespace broker { -store::proxy::proxy(store& s) : frontend_{s.frontend_} { +// -- constructors, destructors, and assignment operators ---------------------- + +store::store() { + // Required out-of-line for weak_store_state_ptr. +} + +store::store(store&& other) : state_(std::move(other.state_)) { + // Required out-of-line for weak_store_state_ptr. +} + +store::store(const store& other) : state_(other.state_) { + if (auto ptr = state_.lock()) + caf::anon_send(ptr->frontend, atom::increment_v, ptr); +} + +store::store(caf::actor frontend, std::string name) { + BROKER_TRACE(BROKER_ARG(frontend) << BROKER_ARG(name)); + if (!frontend) { + BROKER_ERROR("store::store called with frontend == nullptr"); + return; + } + if (name.empty()) { + BROKER_ERROR("store::store called with empty name"); + return; + } + auto ptr = std::make_shared(std::move(name), frontend); + state_ = ptr; + caf::anon_send(frontend, atom::increment_v, std::move(ptr)); +} + +store& store::operator=(store&& other) { + if (auto ptr = state_.lock()) + caf::anon_send(ptr->frontend, atom::decrement_v, ptr); + state_ = std::move(other.state_); + return *this; +} + +store& store::operator=(const store& other) { + if (auto ptr = state_.lock()) + caf::anon_send(ptr->frontend, atom::decrement_v, ptr); + if (auto new_ptr = other.state_.lock()) { + state_ = new_ptr; + caf::anon_send(new_ptr->frontend, atom::decrement_v, new_ptr); + } else { + state_.reset(); + } + return *this; +} + +store::~store() { + if (auto ptr = state_.lock()) + caf::anon_send(ptr->frontend, atom::decrement_v, ptr); +} + +store::proxy::proxy(store& st) : frontend_{st.frontend()} { proxy_ = frontend_.home_system().spawn(); } @@ -40,10 +137,10 @@ request_id store::proxy::get(data key) { request_id store::proxy::put_unique(data key, data val, optional expiry) { if (!frontend_) return 0; - send_as( - proxy_, frontend_, atom::local_v, - make_internal_command( - std::move(key), std::move(val), expiry, proxy_, ++id_, frontend_id())); + send_as(proxy_, frontend_, atom::local_v, + make_internal_command( + std::move(key), std::move(val), expiry, entity_id::from(proxy_), + ++id_, frontend_id())); return id_; } @@ -61,132 +158,185 @@ request_id store::proxy::keys() { return id_; } +caf::actor store::frontend() const { + if (auto ptr = state_.lock()) + return ptr->frontend; + return {}; +} + +entity_id store::frontend_id() const { + if (auto ptr = state_.lock()) + return entity_id::from(ptr->frontend); + return entity_id::nil(); +} + +caf::actor store::self_hdl() const { + if (auto ptr = state_.lock()) + return caf::actor{ptr->self.ptr()}; + return caf::actor{}; +} + +entity_id store::self_id() const { + if (auto ptr = state_.lock()) + return entity_id::from(ptr->self); + return entity_id::nil(); +} + mailbox store::proxy::mailbox() { return make_mailbox(caf::actor_cast(proxy_)); } store::response store::proxy::receive() { + BROKER_TRACE(""); auto resp = response{error{}, 0}; auto fa = caf::actor_cast(proxy_); fa->receive( - [&](data& x, request_id id) { + [&resp, fa](data& x, request_id id) { resp = {std::move(x), id}; fa->extinguish_one(); }, - [&](caf::error& e, request_id id) { - BROKER_ERROR("proxy failed to receive response from store" << id); - resp = {std::move(e), id}; + [&resp, fa](caf::error& err, request_id id) { + resp = {std::move(err), id}; + fa->extinguish_one(); + }, +#if CAF_VERSION >= 1800 + caf::others >> [&](caf::message& x) -> caf::skippable_result { + BROKER_ERROR("proxy received an unexpected message:" << x); +#else + caf::others >> [&](caf::message_view& x) -> caf::result { + BROKER_ERROR("proxy received an unexpected message:" << x.content()); +#endif + // We *must* make sure to consume any and all messages, because the flare + // actor messes with the mailbox signaling. The flare fires on each + // enqueued message and the flare actor reports data available as long as + // the flare count is > 0. However, the blocking actor is unaware of this + // flare and hence does not extinguish automatically when dequeueing a + // message from the mailbox. Without this default handler to actively + // discard unexpected messages, CAF would spin on the mailbox forever when + // attempting to skip unhandled inputs because flare_actor::await_data + // would always return `true`. fa->extinguish_one(); - } - ); + resp.answer = caf::make_error(caf::sec::unexpected_message); + return resp.answer.error(); + }); + BROKER_DEBUG("received response from frontend:" << resp); return resp; } std::vector store::proxy::receive(size_t n) { + BROKER_TRACE(BROKER_ARG(n)); std::vector rval; rval.reserve(n); - size_t i = 0; - auto fa = caf::actor_cast(proxy_); - - fa->receive_for(i, n) ( - [&](data& x, request_id id) { - rval.emplace_back(store::response{std::move(x), id}); - fa->extinguish_one(); - }, - [&](caf::error& e, request_id id) { - BROKER_ERROR("proxy failed to receive response from store" << id); - rval.emplace_back(store::response{std::move(e), id}); - fa->extinguish_one(); - } - ); - + for (size_t i = 0; i < n; ++i) + rval.emplace_back(receive()); return rval; } -const std::string& store::name() const { - return name_; +std::string store::name() const { + if (auto ptr = state_.lock()) + return ptr->name; + return {}; } expected store::exists(data key) const { - return request(atom::exists_v, std::move(key)); + return fetch(state_, atom::exists_v, std::move(key)); } expected store::get(data key) const { - return request(atom::get_v, std::move(key)); + return fetch(state_, atom::get_v, std::move(key)); } -expected store::put_unique(data key, data val, optional expiry) const { - if (!frontend_) - return make_error(ec::unspecified, "store not initialized"); - - expected res{ec::unspecified}; - caf::scoped_actor self{frontend_->home_system()}; - auto cmd = make_internal_command( - std::move(key), std::move(val), expiry, self, request_id(-1), - frontend_id()); - auto msg = caf::make_message(atom::local_v, std::move(cmd)); - - self->send(frontend_, std::move(msg)); - self->delayed_send(self, timeout::frontend, atom::tick_v); - self->receive( - [&](data& x, request_id) { - res = std::move(x); - }, - [&](atom::tick) { - }, - [&](caf::error& e) { - res = std::move(e); - } - ); - - return res; +expected store::put_unique(data key, data val, + optional expiry) { + return with_state(state_, [&](detail::store_state& state) { + auto tag = state.req_id++; + return state.request_tagged(tag, atom::local_v, + make_internal_command( + std::move(key), std::move(val), expiry, + entity_id::from(state.self), tag, + frontend_id())); + }); } expected store::get_index_from_value(data key, data index) const { - return request(atom::get_v, std::move(key), std::move(index)); + return fetch(state_, atom::get_v, std::move(key), std::move(index)); } expected store::keys() const { - return request(atom::get_v, atom::keys_v); + return fetch(state_, atom::get_v, atom::keys_v); } -void store::put(data key, data value, optional expiry) const { - anon_send(frontend_, atom::local_v, - make_internal_command(std::move(key), std::move(value), - expiry, frontend_id())); +bool store::initialized() const noexcept { + return !state_.expired(); } -void store::erase(data key) const { - anon_send( - frontend_, atom::local_v, - make_internal_command(std::move(key), frontend_id())); +void store::put(data key, data value, optional expiry) { + if (auto ptr = state_.lock()) + ptr->anon_send(atom::local_v, + make_internal_command( + std::move(key), std::move(value), expiry, frontend_id())); +} + +void store::erase(data key) { + if (auto ptr = state_.lock()) + ptr->anon_send(atom::local_v, make_internal_command( + std::move(key), frontend_id())); } void store::add(data key, data value, data::type init_type, - optional expiry) const { - anon_send(frontend_, atom::local_v, - make_internal_command(std::move(key), std::move(value), - init_type, expiry, - frontend_id())); + optional expiry) { + if (auto ptr = state_.lock()) + ptr->anon_send(atom::local_v, make_internal_command( + std::move(key), std::move(value), init_type, + expiry, frontend_id())); +} + +void store::subtract(data key, data value, optional expiry) { + if (auto ptr = state_.lock()) + ptr->anon_send(atom::local_v, + make_internal_command( + std::move(key), std::move(value), expiry, frontend_id())); } -void store::subtract(data key, data value, optional expiry) const { - anon_send(frontend_, atom::local_v, - make_internal_command( - std::move(key), std::move(value), expiry, frontend_id())); +void store::clear() { + if (auto ptr = state_.lock()) + ptr->anon_send(atom::local_v, + make_internal_command(frontend_id())); } -void store::clear() const { - anon_send(frontend_, atom::local_v, - make_internal_command(frontend_id())); +bool store::await_idle(timespan timeout) { + BROKER_TRACE(BROKER_ARG(timeout)); + bool result = false; + if (auto ptr = state_.lock()) + ptr->self->request(ptr->frontend, timeout, atom::await_v, atom::idle_v) + .receive([&result](atom::ok) { result = true; }, + []([[maybe_unused]] const error& err) { + BROKER_ERROR("await_idle failed: " << err); + }); + return result; } -store::store(caf::actor actor, std::string name) - : frontend_{std::move(actor)}, name_{std::move(name)} { - // nop +void store::await_idle(std::function callback, timespan timeout) { + BROKER_TRACE(BROKER_ARG(timeout)); + if (!callback) { + BROKER_ERROR("invalid callback received for await_idle"); + return; + } + if (auto ptr = state_.lock()) { + auto await_actor = [cb{std::move(callback)}](caf::event_based_actor* self, + caf::actor frontend, + timespan t) { + self->request(frontend, t, atom::await_v, atom::idle_v) + .then([cb](atom::ok) { cb(true); }, [cb](const error&) { cb(false); }); + }; + ptr->self->spawn(std::move(await_actor), ptr->frontend, timeout); + } else { + callback(false); + } } void store::reset() { + state_.reset(); } } // namespace broker diff --git a/src/store_event.cc b/src/store_event.cc index d5a56488..cbfde9d3 100644 --- a/src/store_event.cc +++ b/src/store_event.cc @@ -13,8 +13,8 @@ constexpr const char* type_strings[] = { "expire", }; -bool is_publisher_id(const vector& xs, size_t endpoint_index, - size_t object_index) { +bool is_entity_id(const vector& xs, size_t endpoint_index, + size_t object_index) { return (is(xs[endpoint_index]) && is(xs[object_index])) || (can_convert_to(xs[endpoint_index]) && is(xs[object_index])); @@ -27,7 +27,7 @@ store_event::insert store_event::insert::make(const vector& xs) noexcept { && to(xs[0]) == store_event::type::insert && is(xs[1]) && (is(xs[4]) || is(xs[4])) - && is_publisher_id(xs, 5, 6) + && is_entity_id(xs, 5, 6) ? &xs : nullptr}; } @@ -37,7 +37,7 @@ store_event::update store_event::update::make(const vector& xs) noexcept { && to(xs[0]) == store_event::type::update && is(xs[1]) && (is(xs[5]) || is(xs[5])) - && is_publisher_id(xs, 6, 7) + && is_entity_id(xs, 6, 7) ? &xs : nullptr}; } @@ -46,7 +46,7 @@ store_event::erase store_event::erase::make(const vector& xs) noexcept { return erase{xs.size() == 5 && to(xs[0]) == store_event::type::erase && is(xs[1]) - && is_publisher_id(xs, 3, 4) + && is_entity_id(xs, 3, 4) ? &xs : nullptr}; } @@ -55,7 +55,7 @@ store_event::expire store_event::expire::make(const vector& xs) noexcept { return expire{xs.size() == 5 && to(xs[0]) == store_event::type::expire && is(xs[1]) - && is_publisher_id(xs, 3, 4) + && is_entity_id(xs, 3, 4) ? &xs : nullptr}; } diff --git a/src/subscriber.cc b/src/subscriber.cc index 9f36c727..575f793c 100644 --- a/src/subscriber.cc +++ b/src/subscriber.cc @@ -7,6 +7,7 @@ #include #include +#include #include #include "broker/atoms.hh" @@ -31,7 +32,7 @@ struct subscriber_worker_state { bool calculate_rate = true; - static const char* name; + static inline const char* name = "broker.subscriber"; void tick() { if (buf.size() < sample_size) { @@ -50,8 +51,6 @@ struct subscriber_worker_state { } }; -const char* subscriber_worker_state::name = "subscriber_worker"; - class subscriber_sink : public stream_sink { public: using super = stream_sink; @@ -162,7 +161,7 @@ subscriber::subscriber(endpoint& e, std::vector ts, size_t max_qsize) } subscriber::~subscriber() { - if ( worker_ ) + if (worker_) anon_send_exit(worker_, exit_reason::user_shutdown); } diff --git a/src/time.cc b/src/time.cc index 27c7f8d9..0580f122 100644 --- a/src/time.cc +++ b/src/time.cc @@ -1,5 +1,7 @@ #include "broker/time.hh" +#include "caf/timestamp.hpp" + namespace broker { bool convert(timespan s, std::string& str) { @@ -20,7 +22,8 @@ bool convert(timespan s, double& secs) { } bool convert(timestamp t, std::string& str) { - return convert(t.time_since_epoch(), str); + caf::append_timestamp_to_string(str, t); + return true; } bool convert(timestamp t, double& secs) { diff --git a/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/expected-tpl.conf b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/expected-tpl.conf new file mode 100644 index 00000000..fca19e16 --- /dev/null +++ b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/expected-tpl.conf @@ -0,0 +1,126 @@ +nodes { + logger { + id = + num-inputs = 194 + inputs-by-node { + manager = 33 + proxy = 16 + worker = 145 + } + forward = false + topics = [ + "bro/cluster/logger", + "bro/cluster/node/logger-1/", + "bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294/", + "bro/cluster/pool/logger", + "bro/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294", + "bro/logs/", + "bro/supervisor", + "zeek/cluster/logger", + "zeek/cluster/node/logger-1/", + "zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294/", + "zeek/cluster/pool/logger", + "zeek/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294", + "zeek/known/certs/<$>/data/clone", + "zeek/known/hosts/<$>/data/clone", + "zeek/known/services/<$>/data/clone", + "zeek/logs/", + "zeek/supervisor", + ] + peers = [ + "manager", + "proxy", + "worker", + ] + generator-file = "%(path)s/logger/messages.dat" + } + manager { + id = + num-inputs = 12 + inputs-by-node { + logger = 4 + proxy = 4 + worker = 4 + } + forward = false + topics = [ + "bro/cluster/manager", + "bro/cluster/node/manager/", + "bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341/", + "bro/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341", + "bro/supervisor", + "zeek/cluster/manager", + "zeek/cluster/node/manager/", + "zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341/", + "zeek/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341", + "zeek/known/certs/<$>/data/master", + "zeek/known/hosts/<$>/data/master", + "zeek/known/services/<$>/data/master", + "zeek/supervisor", + ] + peers = [ + "proxy", + "worker", + ] + generator-file = "%(path)s/manager/messages.dat" + } + proxy { + id = + num-inputs = 12 + inputs-by-node { + logger = 1 + manager = 10 + worker = 1 + } + forward = false + topics = [ + "bro/cluster/node/proxy-1/", + "bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387/", + "bro/cluster/pool/proxy", + "bro/cluster/proxy", + "bro/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387", + "bro/supervisor", + "zeek/cluster/node/proxy-1/", + "zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387/", + "zeek/cluster/pool/proxy", + "zeek/cluster/proxy", + "zeek/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387", + "zeek/known/certs/<$>/data/clone", + "zeek/known/hosts/<$>/data/clone", + "zeek/known/services/<$>/data/clone", + "zeek/supervisor", + ] + peers = [ + "worker", + ] + generator-file = "%(path)s/proxy/messages.dat" + } + worker { + id = + num-inputs = 19 + inputs-by-node { + logger = 1 + manager = 17 + proxy = 1 + } + forward = false + topics = [ + "bro/cluster/node/worker-1/", + "bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432/", + "bro/cluster/pool/worker", + "bro/cluster/worker", + "bro/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432", + "bro/supervisor", + "zeek/cluster/node/worker-1/", + "zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432/", + "zeek/cluster/pool/worker", + "zeek/cluster/worker", + "zeek/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432", + "zeek/known/certs/<$>/data/clone", + "zeek/known/hosts/<$>/data/clone", + "zeek/known/services/<$>/data/clone", + "zeek/supervisor", + ] + generator-file = "%(path)s/worker/messages.dat" + } +} diff --git a/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/logger/id.txt b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/logger/id.txt new file mode 100644 index 00000000..0728245c --- /dev/null +++ b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/logger/id.txt @@ -0,0 +1 @@ +D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294 diff --git a/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/logger/messages.dat b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/logger/messages.dat new file mode 100644 index 00000000..c3f73ed5 Binary files /dev/null and b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/logger/messages.dat differ diff --git a/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/logger/peers.txt b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/logger/peers.txt new file mode 100644 index 00000000..069acf9d --- /dev/null +++ b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/logger/peers.txt @@ -0,0 +1,3 @@ +D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341 +D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387 +D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432 diff --git a/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/logger/topics.txt b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/logger/topics.txt new file mode 100644 index 00000000..3cb79feb --- /dev/null +++ b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/logger/topics.txt @@ -0,0 +1,108 @@ +zeek/supervisor +zeek/supervisor +bro/supervisor +zeek/known/services/<$>/data/clone +zeek/known/hosts/<$>/data/clone +zeek/known/certs/<$>/data/clone +zeek/supervisor +bro/supervisor +zeek/cluster/pool/logger +zeek/supervisor +bro/supervisor +zeek/cluster/pool/logger +bro/cluster/pool/logger +zeek/supervisor +bro/supervisor +zeek/cluster/pool/logger +bro/cluster/pool/logger +zeek/cluster/logger +zeek/supervisor +bro/supervisor +zeek/cluster/pool/logger +bro/cluster/pool/logger +zeek/cluster/logger +bro/cluster/logger +zeek/supervisor +bro/supervisor +zeek/cluster/pool/logger +bro/cluster/pool/logger +zeek/cluster/logger +bro/cluster/logger +zeek/logs/ +zeek/supervisor +bro/supervisor +zeek/cluster/pool/logger +bro/cluster/pool/logger +zeek/cluster/logger +bro/cluster/logger +zeek/logs/ +bro/logs/ +zeek/supervisor +bro/supervisor +zeek/cluster/pool/logger +bro/cluster/pool/logger +zeek/cluster/logger +bro/cluster/logger +zeek/logs/ +bro/logs/ +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294/ +zeek/supervisor +bro/supervisor +zeek/cluster/pool/logger +bro/cluster/pool/logger +zeek/cluster/logger +bro/cluster/logger +zeek/logs/ +bro/logs/ +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294/ +zeek/supervisor +bro/supervisor +zeek/cluster/pool/logger +bro/cluster/pool/logger +zeek/cluster/logger +bro/cluster/logger +zeek/logs/ +bro/logs/ +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294/ +zeek/cluster/node/logger-1/ +zeek/supervisor +bro/supervisor +zeek/cluster/pool/logger +bro/cluster/pool/logger +zeek/cluster/logger +bro/cluster/logger +zeek/logs/ +bro/logs/ +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294/ +zeek/cluster/node/logger-1/ +bro/cluster/node/logger-1/ +zeek/supervisor +bro/supervisor +zeek/cluster/pool/logger +bro/cluster/pool/logger +zeek/cluster/logger +bro/cluster/logger +zeek/logs/ +bro/logs/ +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294/ +zeek/cluster/node/logger-1/ +bro/cluster/node/logger-1/ +zeek/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294 +zeek/supervisor +bro/supervisor +zeek/cluster/pool/logger +bro/cluster/pool/logger +zeek/cluster/logger +bro/cluster/logger +zeek/logs/ +bro/logs/ +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294/ +zeek/cluster/node/logger-1/ +bro/cluster/node/logger-1/ +zeek/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294 +bro/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294 diff --git a/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/manager/id.txt b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/manager/id.txt new file mode 100644 index 00000000..87734ad6 --- /dev/null +++ b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/manager/id.txt @@ -0,0 +1 @@ +D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341 diff --git a/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/manager/messages.dat b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/manager/messages.dat new file mode 100644 index 00000000..7f793815 Binary files /dev/null and b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/manager/messages.dat differ diff --git a/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/manager/peers.txt b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/manager/peers.txt new file mode 100644 index 00000000..1e03e55c --- /dev/null +++ b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/manager/peers.txt @@ -0,0 +1,3 @@ +D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294 +D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387 +D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432 diff --git a/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/manager/topics.txt b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/manager/topics.txt new file mode 100644 index 00000000..69bd79e1 --- /dev/null +++ b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/manager/topics.txt @@ -0,0 +1,58 @@ +zeek/supervisor +zeek/supervisor +bro/supervisor +zeek/known/services/<$>/data/master +zeek/known/hosts/<$>/data/master +zeek/known/certs/<$>/data/master +zeek/supervisor +bro/supervisor +zeek/cluster/manager +zeek/supervisor +bro/supervisor +zeek/cluster/manager +bro/cluster/manager +zeek/supervisor +bro/supervisor +zeek/cluster/manager +bro/cluster/manager +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341/ +zeek/supervisor +bro/supervisor +zeek/cluster/manager +bro/cluster/manager +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341/ +zeek/supervisor +bro/supervisor +zeek/cluster/manager +bro/cluster/manager +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341/ +zeek/cluster/node/manager/ +zeek/supervisor +bro/supervisor +zeek/cluster/manager +bro/cluster/manager +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341/ +zeek/cluster/node/manager/ +bro/cluster/node/manager/ +zeek/supervisor +bro/supervisor +zeek/cluster/manager +bro/cluster/manager +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341/ +zeek/cluster/node/manager/ +bro/cluster/node/manager/ +zeek/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341 +zeek/supervisor +bro/supervisor +zeek/cluster/manager +bro/cluster/manager +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341/ +zeek/cluster/node/manager/ +bro/cluster/node/manager/ +zeek/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341 +bro/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341 diff --git a/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/proxy/id.txt b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/proxy/id.txt new file mode 100644 index 00000000..ee94e9dd --- /dev/null +++ b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/proxy/id.txt @@ -0,0 +1 @@ +D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387 diff --git a/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/proxy/messages.dat b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/proxy/messages.dat new file mode 100644 index 00000000..c7ba52b7 Binary files /dev/null and b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/proxy/messages.dat differ diff --git a/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/proxy/peers.txt b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/proxy/peers.txt new file mode 100644 index 00000000..8ea52930 --- /dev/null +++ b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/proxy/peers.txt @@ -0,0 +1,3 @@ +D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294 +D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341 +D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432 diff --git a/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/proxy/topics.txt b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/proxy/topics.txt new file mode 100644 index 00000000..ca4499eb --- /dev/null +++ b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/proxy/topics.txt @@ -0,0 +1,81 @@ +zeek/supervisor +zeek/supervisor +bro/supervisor +zeek/known/services/<$>/data/clone +zeek/known/hosts/<$>/data/clone +zeek/known/certs/<$>/data/clone +zeek/supervisor +bro/supervisor +zeek/cluster/pool/proxy +zeek/supervisor +bro/supervisor +zeek/cluster/pool/proxy +bro/cluster/pool/proxy +zeek/supervisor +bro/supervisor +zeek/cluster/pool/proxy +bro/cluster/pool/proxy +zeek/cluster/proxy +zeek/supervisor +bro/supervisor +zeek/cluster/pool/proxy +bro/cluster/pool/proxy +zeek/cluster/proxy +bro/cluster/proxy +zeek/supervisor +bro/supervisor +zeek/cluster/pool/proxy +bro/cluster/pool/proxy +zeek/cluster/proxy +bro/cluster/proxy +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387/ +zeek/supervisor +bro/supervisor +zeek/cluster/pool/proxy +bro/cluster/pool/proxy +zeek/cluster/proxy +bro/cluster/proxy +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387/ +zeek/supervisor +bro/supervisor +zeek/cluster/pool/proxy +bro/cluster/pool/proxy +zeek/cluster/proxy +bro/cluster/proxy +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387/ +zeek/cluster/node/proxy-1/ +zeek/supervisor +bro/supervisor +zeek/cluster/pool/proxy +bro/cluster/pool/proxy +zeek/cluster/proxy +bro/cluster/proxy +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387/ +zeek/cluster/node/proxy-1/ +bro/cluster/node/proxy-1/ +zeek/supervisor +bro/supervisor +zeek/cluster/pool/proxy +bro/cluster/pool/proxy +zeek/cluster/proxy +bro/cluster/proxy +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387/ +zeek/cluster/node/proxy-1/ +bro/cluster/node/proxy-1/ +zeek/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387 +zeek/supervisor +bro/supervisor +zeek/cluster/pool/proxy +bro/cluster/pool/proxy +zeek/cluster/proxy +bro/cluster/proxy +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387/ +zeek/cluster/node/proxy-1/ +bro/cluster/node/proxy-1/ +zeek/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387 +bro/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387 diff --git a/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/worker/id.txt b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/worker/id.txt new file mode 100644 index 00000000..18bb14ee --- /dev/null +++ b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/worker/id.txt @@ -0,0 +1 @@ +D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432 diff --git a/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/worker/messages.dat b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/worker/messages.dat new file mode 100644 index 00000000..76862833 Binary files /dev/null and b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/worker/messages.dat differ diff --git a/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/worker/peers.txt b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/worker/peers.txt new file mode 100644 index 00000000..09e0bba8 --- /dev/null +++ b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/worker/peers.txt @@ -0,0 +1,3 @@ +D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27294 +D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27387 +D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27341 diff --git a/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/worker/topics.txt b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/worker/topics.txt new file mode 100644 index 00000000..c15b7a1b --- /dev/null +++ b/tests/.tmp/broker-cluster-benchmark/zeek-dns-traffic-recording/worker/topics.txt @@ -0,0 +1,81 @@ +zeek/supervisor +zeek/supervisor +bro/supervisor +zeek/known/services/<$>/data/clone +zeek/known/hosts/<$>/data/clone +zeek/known/certs/<$>/data/clone +zeek/supervisor +bro/supervisor +zeek/cluster/pool/worker +zeek/supervisor +bro/supervisor +zeek/cluster/pool/worker +bro/cluster/pool/worker +zeek/supervisor +bro/supervisor +zeek/cluster/pool/worker +bro/cluster/pool/worker +zeek/cluster/worker +zeek/supervisor +bro/supervisor +zeek/cluster/pool/worker +bro/cluster/pool/worker +zeek/cluster/worker +bro/cluster/worker +zeek/supervisor +bro/supervisor +zeek/cluster/pool/worker +bro/cluster/pool/worker +zeek/cluster/worker +bro/cluster/worker +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432/ +zeek/supervisor +bro/supervisor +zeek/cluster/pool/worker +bro/cluster/pool/worker +zeek/cluster/worker +bro/cluster/worker +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432/ +zeek/supervisor +bro/supervisor +zeek/cluster/pool/worker +bro/cluster/pool/worker +zeek/cluster/worker +bro/cluster/worker +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432/ +zeek/cluster/node/worker-1/ +zeek/supervisor +bro/supervisor +zeek/cluster/pool/worker +bro/cluster/pool/worker +zeek/cluster/worker +bro/cluster/worker +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432/ +zeek/cluster/node/worker-1/ +bro/cluster/node/worker-1/ +zeek/supervisor +bro/supervisor +zeek/cluster/pool/worker +bro/cluster/pool/worker +zeek/cluster/worker +bro/cluster/worker +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432/ +zeek/cluster/node/worker-1/ +bro/cluster/node/worker-1/ +zeek/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432 +zeek/supervisor +bro/supervisor +zeek/cluster/pool/worker +bro/cluster/pool/worker +zeek/cluster/worker +bro/cluster/worker +zeek/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432/ +bro/cluster/nodeid/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432/ +zeek/cluster/node/worker-1/ +bro/cluster/node/worker-1/ +zeek/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432 +bro/control/D2FF66DFFB3612DE4C9A6AF8381D8040DF106C00#27432 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index eed3a181..5fa0ceb5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,20 +7,30 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/cpp) # -- C++ ---------------------------------------------------------------------- set(tests + cpp/alm/multipath.cc + cpp/alm/peer/async_transport.cc + cpp/alm/peer/stream_transport.cc + cpp/alm/routing_table.cc cpp/backend.cc - cpp/core.cc + cpp/core_actor.cc cpp/data.cc cpp/detail/central_dispatcher.cc + cpp/detail/channel.cc cpp/detail/data_generator.cc cpp/detail/generator_file_writer.cc - cpp/detail/meta_command_writer.cc + cpp/detail/iterator_range.cc cpp/detail/meta_data_writer.cc + cpp/detail/unipath_manager.cc + cpp/domain_options.cc cpp/error.cc cpp/filter_type.cc + cpp/gateway.cc cpp/integration.cc cpp/master.cc + cpp/mixin/connector.cc + cpp/mixin/data_store_manager.cc + cpp/mixin/notifier.cc cpp/publisher.cc - cpp/publisher_id.cc cpp/radix_tree.cc cpp/ssl.cc cpp/status.cc @@ -28,11 +38,19 @@ set(tests cpp/store.cc cpp/store_event.cc cpp/subscriber.cc + cpp/system/shutdown.cc cpp/test.cc cpp/topic.cc cpp/zeek.cc ) +# Our sytem testing suites require `socketpair`, but Windows lacks this API. +if (NOT MSVC) + list(APPEND tests + cpp/system/shutdown.cc + ) +endif() + # Setup correct broker library (static/shared). if (ENABLE_SHARED) set(libbroker broker) @@ -50,7 +68,7 @@ macro(make_cpp_test suite) if (${test_name} STREQUAL radix_tree) set(test_verbosity 3) # otherwise it just produces way too much output endif () - add_test(NAME ${test_name} COMMAND broker-test -v ${test_verbosity} -s "${suite}" ${ARGN}) + add_test(NAME ${test_name} COMMAND broker-test -v ${test_verbosity} -s "^${suite}$" ${ARGN}) set_tests_properties(${test_name} PROPERTIES TIMEOUT ${BROKER_TEST_TIMEOUT}) set_tests_properties(${test_name} PROPERTIES ENVIRONMENT "BROKER_TEST_DIR=${BROKER_TEST_DIR}") @@ -109,9 +127,16 @@ if (BROKER_PYTHON_BINDINGS) make_python_test(ssl-tests) make_python_test(store) make_python_test(topic) - make_python_test(broker-cluster-benchmark - $) + # TODO: re-enable after updating generator files or adding backwards compatiblity + # make_python_test(broker-cluster-benchmark + # $) + # + # # allow some extra time for the benchmark integration test + # set_tests_properties(python-broker-cluster-benchmark PROPERTIES TIMEOUT 120) +endif () + +# -- Python ------------------------------------------------------------------- - # allow some extra time for the benchmark integration test - set_tests_properties(python-broker-cluster-benchmark PROPERTIES TIMEOUT 120) +if (BROKER_ENABLE_MICRO_BENCHMARKS) + add_subdirectory(micro-benchmark) endif () diff --git a/tests/benchmark/broker-benchmark.cc b/tests/benchmark/broker-benchmark.cc index 02c35d75..f4e0fd52 100644 --- a/tests/benchmark/broker-benchmark.cc +++ b/tests/benchmark/broker-benchmark.cc @@ -152,28 +152,12 @@ void send_batch(endpoint& ep, publisher& p) { p.publish(std::move(batch)); } -const vector* inner_vector(const vector& vec) { - for (auto& x : vec) - if (auto ptr = caf::get_if(&x)) - return inner_vector(*ptr); - return &vec; -} - -void receivedStats(endpoint& ep, const data& x) { +void receivedStats(endpoint& ep, data x) { // Example for an x: '[1, 1, [stats_update, [1ns, 1ns, 0]]]'. - // We are only interested in the '[1ns, 1ns, 0]' part (the inner vector). - if (!caf::holds_alternative(x)) { - std::cerr << "received invalid stats (not a vector): " << to_string(x) - << '\n'; - return; - } - auto inner = inner_vector(caf::get(x)); - if (inner->size() != 3) { - std::cerr << "received invalid stats (most inner vector has size " - << inner->size() << ", expected 3): " << to_string(x) << '\n'; - return; - } - auto& rec = *inner; + // We are only interested in the '[1ns, 1ns, 0]' part. + auto xvec = caf::get(x); + auto yvec = caf::get(xvec[2]); + auto rec = caf::get(yvec[1]); double t; convert(caf::get(rec[0]), t); @@ -228,6 +212,10 @@ void receivedStats(endpoint& ep, const data& x) { max_exceeded_counter = 0; } +struct source_state { + static inline const char* name = "broker.benchmark.source"; +}; + void client_mode(endpoint& ep, const std::string& host, int port) { // Make sure to receive status updates. auto ss = ep.make_status_subscriber(true); @@ -239,7 +227,7 @@ void client_mode(endpoint& ep, const std::string& host, int port) { }, [&](caf::unit_t&, data_message x) { // Print everything we receive. - receivedStats(ep, get_data(x)); + receivedStats(ep, move_data(x)); }, [](caf::unit_t&, const caf::error&) { // nop @@ -258,17 +246,23 @@ void client_mode(endpoint& ep, const std::string& host, int port) { if (verbose) std::cout << "*** endpoint is now peering to remote" << std::endl; if (batch_rate == 0) { - ep.publish_all( - [](caf::unit_t&) {}, - [](caf::unit_t&, caf::downstream& out, size_t hint) { - for (size_t i = 0; i < hint; ++i) { - auto name = "event_" + std::to_string(event_type); - out.push(data_message{"/benchmark/events", - zeek::Event(std::move(name), createEventArgs())}); - } + ep.system().spawn( + [](caf::stateful_actor* self, caf::actor core) { + caf::attach_stream_source( + self, core, [](caf::unit_t&) {}, + [self](caf::unit_t&, caf::downstream& out, + size_t hint) { + // get_downstream_queue().total_task_size(); + for (size_t i = 0; i < hint; ++i) { + auto name = "event_" + std::to_string(event_type); + out.push( + data_message{"/benchmark/events", + zeek::Event(std::move(name), createEventArgs())}); + } + }, + [](const caf::unit_t&) { return false; }); }, - [](const caf::unit_t&) { return false; } - ); + ep.core()); for (;;) { // Print status events. auto ev = ss.get(); @@ -312,32 +306,46 @@ void client_mode(endpoint& ep, const std::string& host, int port) { } } +struct sink_state { + static inline const char* name = "broker.benchmark.sink"; +}; + // This mode mimics what benchmark.bro does. void server_mode(endpoint& ep, const std::string& iface, int port) { // Make sure to receive status updates. auto ss = ep.make_status_subscriber(true); // Subscribe to /benchmark/events. - ep.subscribe_nosync( - {"/benchmark/events"}, - [](caf::unit_t&) { - // nop - }, - [&](caf::unit_t&, data_message x) { - auto msg = move_data(x); - // Count number of events (counts each element in a batch as one event). - if (zeek::Message::type(msg) == zeek::Message::Type::Event) { - ++num_events; - } else if (zeek::Message::type(msg) == zeek::Message::Type::Batch) { - zeek::Batch batch(std::move(msg)); - num_events += batch.batch().size(); - } else { - std::cerr << "unexpected message type" << std::endl; - exit(1); - } + ep.system().spawn( + [](caf::stateful_actor* self, caf::actor core) { + std::vector topics{"/benchmark/events"}; + self->send(self * core, atom::join_v, std::move(topics)); + self->become([=](broker::endpoint::stream_type in) { + caf::attach_stream_sink( + self, in, + [](caf::unit_t&) { + // nop + }, + [self](caf::unit_t&, data_message x) { + auto msg = move_data(x); + // Count number of events (counts each element in a batch as one + // event). + if (zeek::Message::type(msg) == zeek::Message::Type::Event) { + ++num_events; + } else if (zeek::Message::type(msg) == zeek::Message::Type::Batch) { + zeek::Batch batch(std::move(msg)); + num_events += batch.batch().size(); + } else { + std::cerr << "unexpected message type" << std::endl; + exit(1); + } + }, + [](caf::unit_t&, const caf::error&) { + // nop + }); + self->unbecome(); + }); }, - [](caf::unit_t&, const caf::error&) { - // nop - }); + ep.core()); // Listen on /benchmark/terminate for stop message. std::atomic terminate{false}; ep.subscribe_nosync( @@ -367,7 +375,7 @@ void server_mode(endpoint& ep, const std::string& iface, int port) { auto stats = vector{now, now - last_time, count{reset_num_events()}}; if (verbose) std::cout << "stats: " << caf::deep_to_string(stats) << std::endl; - zeek::Event ev("stats_update", std::move(stats)); + zeek::Event ev("stats_update", vector{std::move(stats)}); ep.publish("/benchmark/stats", std::move(ev)); // Advance time and print status events. last_time = now; diff --git a/tests/benchmark/broker-cluster-benchmark.cc b/tests/benchmark/broker-cluster-benchmark.cc index 64bbef81..cf87c9ed 100644 --- a/tests/benchmark/broker-cluster-benchmark.cc +++ b/tests/benchmark/broker-cluster-benchmark.cc @@ -8,9 +8,11 @@ #include "caf/actor_system.hpp" #include "caf/actor_system_config.hpp" +#include "caf/after.hpp" #include "caf/attach_stream_sink.hpp" #include "caf/attach_stream_source.hpp" #include "caf/event_based_actor.hpp" +#include "caf/scoped_actor.hpp" #include "caf/settings.hpp" #include "caf/stateful_actor.hpp" #include "caf/string_algorithms.hpp" @@ -222,8 +224,8 @@ struct node { /// Stores how many messages we expect on this node during measurement. size_t num_inputs = 0; - /// Stores whether this node regularly forwards Broker events. - bool forward = true; + /// Stores whether this node disables forwarding of subscriptions. + bool disable_forwarding = true; /// Stores how many messages we produce using the gernerator file. If `none`, /// we produce the number of messages in the generator file. @@ -265,40 +267,6 @@ std::vector topics(const node& x) { return result; } -#define HAS_ROUTING_LOOP_FUN(direction) \ - bool has_##direction##_loop(const node& x, std::vector path) { \ - if (!x.forward) \ - return false; \ - auto in_path = [&path](const node& n) { \ - return std::find(path.begin(), path.end(), n.name) != path.end(); \ - }; \ - size_t res = path.size(); \ - for (const auto y : x.direction) { \ - if (in_path(*y)) \ - return true; \ - auto cpy = path; \ - cpy.emplace_back(y->name); \ - if (has_##direction##_loop(*y, std::move(cpy))) \ - return true; \ - } \ - return false; \ - } \ - \ - bool has_##direction##_loop(const node& x) { \ - for (const auto y : x.direction) \ - if (has_##direction##_loop(*y, {x.name, y->name})) \ - return true; \ - return false; \ - } - -HAS_ROUTING_LOOP_FUN(left) - -HAS_ROUTING_LOOP_FUN(right) - -bool has_routing_loop(const node& x) { - return has_left_loop(x) || has_right_loop(x); -} - template struct strip_optional { using type = T; @@ -332,7 +300,7 @@ expected make_node(const string& name, const caf::settings& parameters) { SET_FIELD(topics, mandatory); SET_FIELD(generator_file, optional); SET_FIELD(num_inputs, optional); - SET_FIELD(forward, optional); + SET_FIELD(disable_forwarding, optional); SET_FIELD(num_outputs, optional); SET_FIELD(inputs_by_node, optional); SET_FIELD(log_verbosity, optional); @@ -373,7 +341,7 @@ struct node_manager_state { BROKER_ASSERT(this_node_ptr != nullptr); this_node = this_node_ptr; broker::broker_options opts; - opts.forward = this_node_ptr->forward; + opts.disable_forwarding = this_node_ptr->disable_forwarding; opts.disable_ssl = true; opts.ignore_broker_conf = true; // Make sure no one messes with our setup. broker::configuration cfg{opts}; @@ -387,15 +355,13 @@ struct node_manager_state { using node_manager_actor = caf::stateful_actor; struct generator_state { - static const char* name; + static inline const char* name = "broker.benchmark.generator"; }; -const char* generator_state::name = "generator"; - void generator(caf::stateful_actor* self, node* this_node, caf::actor core, broker::detail::generator_file_reader_ptr ptr) { using generator_ptr = broker::detail::generator_file_reader_ptr; - using value_type = broker::node_message::value_type; + using value_type = broker::node_message_content; if (this_node->num_outputs != caf::none) { struct state { generator_ptr gptr; @@ -534,12 +500,11 @@ struct consumer_state { node* this_node; caf::event_based_actor* self; size_t connected_streams = 0; - static const char* name; std::chrono::steady_clock::time_point start; caf::actor observer; -}; -const char* consumer_state::name = "consumer"; + static inline const char* name = "broker.benchmark.consumer"; +}; caf::behavior consumer(caf::stateful_actor* self, node* this_node, caf::actor core, caf::actor observer) { @@ -632,7 +597,7 @@ caf::behavior node_manager(node_manager_actor* self, node* this_node) { // Otherwise, we get a race on the topics and can "loose" initial messages. // Despite its name, endpoint::forward does not force any forwarding. It only // makes sure that the topic is in our local filter. - if (is_receiver(*this_node) || this_node->forward) + if (is_receiver(*this_node)) self->state.ep.forward(topics(*this_node)); return { [=](broker::atom::init) -> caf::result { @@ -743,13 +708,6 @@ bool build_node_tree(std::vector& nodes) { return false; } } - // Sanity check: there must be no loop. - for (auto& x : nodes) { - if (has_routing_loop(x) && is_sender(x)) { - err::println("starting at node '", x.name, "' results in a routing loop"); - return false; - } - } // Reduce the number of connections on startup to a minimum: if A peers to B // and B peers to A, then we can safely drop the "B peers to A" part from the // config. @@ -874,7 +832,12 @@ int generate_config(string_list directories) { verbose::println("fetch config parameters for this node from broker.conf"); auto conf_file = directory + "/broker.conf"; if (auto conf = actor_system_config::parse_config_file(conf_file.c_str())) { - node.forward = caf::get_or(*conf, "broker.forward", true); + // Older versions of Broker use 'broker.forward' as config parameter. + if (auto val = caf::get_if(std::addressof(*conf), "broker.forward")) + node.disable_forwarding = !*val; + else + node.disable_forwarding + = caf::get_or(*conf, "broker.disable-forwarding", false); } else { err::println("unable to parse ", quoted{conf_file}, ": ", to_string(conf.error())); @@ -955,7 +918,7 @@ int generate_config(string_list directories) { traverse = [&](node& src, node& dst, const output_map& out, const filter& f, walk_fun walk) { step(src, dst, out, f); - if (!dst.forward) + if (dst.disable_forwarding) return; // TODO: take TTL counter into consideration for (auto peer : walk(dst)) { @@ -1012,7 +975,7 @@ int generate_config(string_list directories) { out::println(" ", kvp.first, " = ", kvp.second); out::println(" }"); } - out::println(" forward = ", node.forward); + out::println(" disable_forwarding = ", node.disable_forwarding); print_field("topics", node.topics); print_field("peers", node.peers); if (!node.generator_file.empty() && !outputs[node.name].empty()) @@ -1235,7 +1198,7 @@ int main(int argc, char** argv) { size_t data_entries = 0; size_t command_entries = 0; std::map entries_by_topic; - broker::node_message::value_type x; + broker::node_message_content x; while (!gptr->at_end()) { if (auto err = gptr->read(x)) { err::println("error while parsing ", file_name, ": ", to_string(err)); diff --git a/tests/cpp/alm/multipath.cc b/tests/cpp/alm/multipath.cc new file mode 100644 index 00000000..7e498a1b --- /dev/null +++ b/tests/cpp/alm/multipath.cc @@ -0,0 +1,226 @@ +#define SUITE alm.multipath + +#include "broker/alm/multipath.hh" + +#include "test.hh" + +#include + +#include +#include + +#include "broker/alm/routing_table.hh" + +using broker::alm::multipath; + +using namespace broker; + +namespace { + +struct fixture : base_fixture { + void stringify(const multipath& path, std::string& result) { + if (!path.head().id()) { + result += "()"; + } else { + result += '('; + result += id_by_value(path.head().id()); + if (path.num_nodes() > 0) { + result += ", ["; + path.for_each_node([this, &result, first{true}](const auto& n) mutable { + if (first) + first = false; + else + result += ", "; + stringify(n, result); + }); + result += ']'; + } + result += ')'; + } + } + + // Creates a list of endpoint IDs from their char key. + template + auto ls(Ts... xs) { + return std::vector{ids[xs]...}; + }; + + std::string stringify(const multipath& path) { + std::string result; + stringify(path, result); + return result; + } + + std::shared_ptr tptr; + + void make_tree(char id) { + tptr = std::make_shared(ids[id]); + } + + using emplace_result = std::pair; + + emplace_result emplace(alm::multipath_node* pos, char id) { + return pos->nodes().emplace(tptr->mem, ids[id]); + } + + emplace_result emplace(char id) { + return emplace(tptr->root, id); + } +}; + +} // namespace + +FIXTURE_SCOPE(multipath_tests, fixture) + +TEST(multipaths are default constructible) { + multipath p; + CHECK(!p.head().id()); + CHECK_EQUAL(p.num_nodes(), 0u); + CHECK_EQUAL(stringify(p), "()"); +} + +TEST(multipath trees grow with emplace) { + make_tree('A'); + auto ac = emplace('C').first; + CHECK_EQUAL(ac->head().id(), ids['C']); + emplace(ac, 'D'); + emplace(ac, 'E'); + auto ab = emplace('B').first; + CHECK_EQUAL(ab->head().id(), ids['B']); + emplace(ab, 'F'); + emplace(ab, 'G'); + std::string buf; + auto path = multipath{tptr}; + CHECK_EQUAL(stringify(path), "(A, [(B, [(F), (G)]), (C, [(D), (E)])])"); +} + +TEST(multipath nodes sort their children) { + std::vector children{'B', 'C', 'D', 'E'}; + do { + make_tree('A'); + for (auto child : children) + emplace(child); + CHECK_EQUAL(stringify(multipath{tptr}), "(A, [(B), (C), (D), (E)])"); + } while (std::next_permutation(children.begin(), children.end())); +} + +TEST(multipaths are constructible from linear paths) { + auto abc = ls('A', 'B', 'C'); + multipath path{abc.begin(), abc.end()}; + CHECK_EQUAL(stringify(path), "(A, [(B, [(C)])])"); +} + +TEST(multipaths are copy constructible and comparable) { + auto abc = ls('A', 'B', 'C'); + multipath path1{abc.begin(), abc.end()}; + auto path2 = path1; + CHECK_EQUAL(stringify(path1), stringify(path2)); + CHECK_EQUAL(path1, path2); +} + +TEST(multipaths are serializable) { + make_tree('A'); + MESSAGE("fill the tree with nodes"); + { + auto ac = emplace('C').first; + emplace(ac, 'D'); + emplace(ac, 'E'); + auto ab = emplace('B').first; + emplace(ab, 'F'); + emplace(ab, 'G'); + } + auto path = multipath{tptr}; + caf::binary_serializer::container_type buf; + MESSAGE("serializer the path into a buffer"); + { + caf::binary_serializer sink{sys, buf}; + CHECK(sink.apply(path)); + } + multipath copy; + MESSAGE("deserializers a copy from the path from the buffer"); + { + caf::binary_deserializer source{sys,buf}; + CHECK(source.apply(copy)); + } + MESSAGE("after a serialization roundtrip, the path is equal to its copy"); + CHECK_EQUAL(stringify(path), stringify(copy)); + CHECK_EQUAL(to_string(path), to_string(copy)); + CHECK_EQUAL(path, copy); +} + +TEST(source routing extracts multipaths from routing tables) { + // We use the subset of the topology that we use in the peer unit test: + // + // +---+ + // +-----+ D +-----+ + // | +---+ | + // | | + // +---+ +---+ + // +-----+ B | | I +-+ + // | +---+ +---+ | + // | | | | + // | | +---+ | | + // | +-----+ E +-----+ | + // | +---+ | + // +---+ +---+ + // | A +-----------------------+ J | + // +---+ +---+ + // + alm::routing_table tbl; + auto add = [&](char id, std::vector> paths) { + auto& entry = tbl.emplace(ids[id], caf::actor{}).first->second; + for (auto& path : paths) { + std::vector xs; + for (auto c : path) + xs.emplace_back(ids[c]); + alm::add_or_update_path(tbl, ids[id], std::move(xs), + alm::vector_timestamp(path.size())); + } + }; + add('B', {{'B'}, {'J', 'I', 'D', 'B'}, {'J', 'I', 'E', 'B'}}); + add('D', {{'B', 'D'}, {'J', 'I', 'D'}}); + add('E', {{'B', 'E'}, {'J', 'I', 'E'}}); + add('I', {{'B', 'E', 'I'}, {'B', 'D', 'I'}, {'J', 'I'}}); + add('J', {{'J'}, {'B', 'D', 'I', 'J'}, {'B', 'E', 'I', 'J'}}); + MESSAGE("Sending to B and D creates a single multipath"); + { + std::vector paths; + std::vector unreachables; + alm::multipath::generate(ls('B', 'D'), tbl, paths, unreachables); + REQUIRE_EQUAL(paths.size(), 1u); + REQUIRE_EQUAL(unreachables.size(), 0u); + CHECK_EQUAL(stringify(paths[0]),"(B, [(D)])"); + } + MESSAGE("Sending to E and I creates two multipaths"); + { + std::vector paths; + std::vector unreachables; + alm::multipath::generate(ls('E', 'I'), tbl, paths, unreachables); + REQUIRE_EQUAL(paths.size(), 2u); + REQUIRE_EQUAL(unreachables.size(), 0u); + CHECK_EQUAL(stringify(paths[0]),"(B, [(E)])"); + CHECK_EQUAL(stringify(paths[1]),"(J, [(I)])"); + } + MESSAGE("Sending to D, E and I creates two multipaths"); + { + std::vector paths; + std::vector unreachables; + alm::multipath::generate(ls('D', 'E', 'I'), tbl, paths, unreachables); + REQUIRE_EQUAL(paths.size(), 2u); + REQUIRE_EQUAL(unreachables.size(), 0u); + CHECK_EQUAL(stringify(paths[0]),"(B, [(D), (E)])"); + CHECK_EQUAL(stringify(paths[1]),"(J, [(I)])"); + } + MESSAGE("Sending to B and G creates one path and one unreachable"); + { + std::vector paths; + std::vector unreachables; + alm::multipath::generate(ls('B', 'G'), tbl, paths, unreachables); + REQUIRE_EQUAL(paths.size(), 1u); + REQUIRE_EQUAL(unreachables.size(), 1u); + CHECK_EQUAL(stringify(paths[0]),"(B)"); + CHECK_EQUAL(unreachables, ls('G')); + } +} + +FIXTURE_SCOPE_END() diff --git a/tests/cpp/alm/peer/README.md b/tests/cpp/alm/peer/README.md new file mode 100644 index 00000000..d44cdd84 --- /dev/null +++ b/tests/cpp/alm/peer/README.md @@ -0,0 +1,23 @@ +The `alm::peer` is the main component for implementing the core actor (which +ultimately drives the `broker::endpoint`). The class itself manages routing +information and subscriptions. + +For communicating to peers in the network, the `peer` relies on a transport +layer. This layer is implemented by the derived class and the `peer` then uses +CRTP for calling the required functions. Please refer to the Developer Guide in +the manual for further details. + +This design implies that we cannot test the `peer` class on its own. Neither can +we test the transport classes on their own, at least not without excessive +mocking. + +For covering most basic functionality of `alm::peer` without the full CAF +streaming setup, we've split the unit tests for the `peer` into two test suites: + +1. The first setup uses the `alm::async_transport`, which is basically a mockup + transport that omits much functionality Broker requires in an actual + deployment. This setup only covers the most basic operations of `alm::peer` + for setting up a Broker cluster. +2. The second setup uses the `alm::stream_transport`, which drives the actual + core actor in Broker. This test suite covers data transmission, path + revocation, etc. diff --git a/tests/cpp/alm/peer/async_transport.cc b/tests/cpp/alm/peer/async_transport.cc new file mode 100644 index 00000000..39b239da --- /dev/null +++ b/tests/cpp/alm/peer/async_transport.cc @@ -0,0 +1,308 @@ +#define SUITE alm.peer.async_transport + +#include "broker/alm/peer.hh" + +#include "alm/peer/fixture.hh" + +#include "broker/alm/peer.hh" +#include "broker/configuration.hh" +#include "broker/defaults.hh" + +using broker::defaults::store::tick_interval; + +using namespace broker; +using namespace broker::alm; + +namespace { + +/// A transport based on asynchronous messages. For testing only. +class async_transport : public peer { +public: + using super = peer; + + async_transport(caf::event_based_actor* self) : peer(self) { + // nop + } + + void start_peering(const endpoint_id& remote_peer, caf::actor hdl) { + BROKER_TRACE(BROKER_ARG(remote_peer) << BROKER_ARG(hdl)); + if (!tbl().emplace(std::move(remote_peer), std::move(hdl)).second) { + BROKER_INFO("start_peering ignored: already peering with " + << remote_peer); + return; + } + self()->send(hdl, atom::peer_v, id(), filter(), timestamp()); + } + + auto handle_peering(const endpoint_id& remote_id, + const filter_type& remote_filter, + lamport_timestamp remote_timestamp) { + BROKER_TRACE(BROKER_ARG(remote_id)); + // Check whether we already send outbound traffic to the peer. Could use + // `BROKER_ASSERT` instead, because this mustn't get called for known peers. + auto src = caf::actor_cast(self()->current_sender()); + if (!tbl().emplace(remote_id, src).second) + BROKER_INFO("received repeated peering request"); + // Propagate filter to peers. + std::vector path{remote_id}; + vector_timestamp path_ts{remote_timestamp}; + handle_filter_update(path, path_ts, remote_filter); + // Reply with our own filter. + return caf::make_message(atom::peer_v, atom::ok_v, id(), filter(), + timestamp()); + } + + auto handle_peering_response(const endpoint_id& remote_id, + const filter_type& filter, + lamport_timestamp timestamp) { + auto src = caf::actor_cast(self()->current_sender()); + if (!tbl().emplace(remote_id, src).second) + BROKER_INFO("received repeated peering response"); + // Propagate filter to peers. + std::vector path{remote_id}; + vector_timestamp path_ts{timestamp}; + handle_filter_update(path, path_ts, filter); + } + + void flush() override { + // nop + } + + template + void dispatch_impl(const T& msg) { + const auto& topic = get_topic(msg); + detail::prefix_matcher matches; + endpoint_id_list receivers; + for (const auto& [peer, filter] : peer_filters_) + if (matches(filter, topic)) + receivers.emplace_back(peer); + if (!receivers.empty()) { + std::vector paths; + std::vector unreachables; + multipath::generate(receivers, tbl_, paths, unreachables); + for (auto&& path : paths) { + auto wrapped = node_message{msg, std::move(path)}; + if (auto row = find_row(tbl_, get_path(wrapped).head().id())) + self()->send(row->hdl, atom::publish_v, std::move(wrapped)); + else + BROKER_WARNING("cannot ship message: no path"); + } + if (!unreachables.empty()) + BROKER_WARNING("cannot ship message: no path to any of" + << unreachables); + } + } + + void dispatch(const data_message& msg) override { + dispatch_impl(msg); + } + + void dispatch(const command_message& msg) override { + dispatch_impl(msg); + } + + void dispatch(node_message&& msg) override { + auto& [content, path] = msg.unshared(); + if (path.head().id() != id()) { + BROKER_WARNING("received a message for another node"); + } else { + if (path.head().is_receiver()) + publish_locally(content); + path.for_each_node([this, cptr{&content}](multipath&& nested) { + if (auto row = find_row(tbl_, nested.head().id()); row && row->hdl) + self()->send(row->hdl, atom::publish_v, + make_node_message(*cptr, std::move(nested))); + else + BROKER_WARNING("cannot ship message: no direct connection to" + << nested.head().id()); + }); + } + } + + void publish(const caf::actor& receiver, atom::subscribe, + const endpoint_id_list& path, const vector_timestamp& ts, + const filter_type& filter) override { + self()->send(receiver, atom::subscribe_v, path, ts, filter); + } + + void publish(const caf::actor& receiver, atom::revoke, + const endpoint_id_list& path, const vector_timestamp& ts, + const endpoint_id& lost_peer, + const filter_type& filter) override { + self()->send(receiver, atom::revoke_v, path, ts, lost_peer, filter); + } + + using super::publish_locally; + + void publish_locally(const data_message&) override { + // nop + } + + void publish_locally(const command_message&) override { + // nop + } + + caf::behavior make_behavior() override { + using detail::lift; + return caf::message_handler{ + [this](atom::publish, node_message& msg) { + this->dispatch(std::move(msg)); + }, + [this](atom::publish, const data_message& msg) { + this->dispatch(msg); + }, + lift(*this, &async_transport::start_peering), + lift(*this, &async_transport::handle_peering), + lift(*this, + &async_transport::handle_peering_response), + lift(*this, &async_transport::subscribe), + lift(*this, &async_transport::handle_filter_update), + } + .or_else(super::make_behavior()); + } +}; + +// -- transport layer ---------------------------------------------------------- + +class async_peer_actor_state : public async_transport { +public: + using self_pointer = caf::event_based_actor*; + + async_peer_actor_state(self_pointer self) : async_transport(self) { + // nop + } + + async_peer_actor_state() = delete; + + async_peer_actor_state(const async_peer_actor_state&) = delete; + + async_peer_actor_state& operator=(const async_peer_actor_state&) = delete; + + auto& mgr() { + return *this; + } + + bool connected_to(const caf::actor& hdl) const noexcept { + auto predicate = [&](const auto& kvp) { return kvp.second.hdl == hdl; }; + return std::any_of(tbl().begin(), tbl().end(), predicate); + } + + std::vector shortest_path(const endpoint_id& to) { + if (auto ptr = alm::shortest_path(tbl(), to)) + return *ptr; + return {}; + } +}; + +class async_peer_actor : public caf::stateful_actor { +public: + using super = caf::stateful_actor; + + async_peer_actor(caf::actor_config& cfg, endpoint_id id) : super(cfg) { + state.id(std::move(id)); + } + + caf::behavior make_behavior() override { + return state.make_behavior(); + } +}; + +struct message_pattern { + topic t; + data d; + std::vector ps; +}; + +bool matches(const multipath& path, const std::vector& receivers) { + auto& head = path.head(); + auto i = std::find(receivers.begin(), receivers.end(), head.id()); + if (head.is_receiver() == (i != receivers.end())) { + auto result = true; + path.for_each_node([&result, &receivers](const multipath& nested) { + result &= matches(nested, receivers); + }); + return result; + } else { + return false; + } +} + +bool operator==(const message_pattern& x, const node_message& y) { + const auto& [content, path] = y.data(); + if (!is_data_message(content)) { + return false; + } else { + const auto& dm = get_data_message(content); + return x.t == get_topic(dm) && x.d == get_data(dm) && matches(path, x.ps); + } +} + +bool operator==(const node_message& x, const message_pattern& y) { + return y == x; +} + +// Our topology: +// +// +---+ +// +-----+ D +-----+ +// | +---+ | +// | | +// +---+ +---+ +// +-----+ B | | I +-+ +// | +---+ +---+ | +// | | | | +// | | +---+ | | +// | +-----+ E +-----+ | +// | +---+ | +// +---+ +---+ +// | A +-----------------------+ J | +// +---+ +---+ +// | +---+ | | +// | +-----+ F | | | +// | | +-+-+ | | +// | | | | | +// | +---+ +-+-+ | | +// +-----+ C +---+ G +--------+ | +// +---+ +-+-+ | +// | | | +// | +-+-+ | +// +-----+ H +----------+ +// +---+ +// + +} // namespace + +#define CHECK_DISTANCE(src, dst, val) \ + CHECK_EQUAL(alm::distance_to(get(src).tbl(), dst), size_t{val}) + +FIXTURE_SCOPE(async_peer_tests, fixture) + +TEST(topologies with loops resolve to simple forwarding tables) { + connect_peers(); + MESSAGE("after all links are connected, G subscribes to topic 'foo'"); + anon_send(peers["G"], atom::subscribe_v, filter_type{topic{"foo"}}); + run(tick_interval); + MESSAGE("after the subscription, all routing tables store a distance to G"); + CHECK_DISTANCE(A, G, 2); + CHECK_DISTANCE(B, G, 3); + CHECK_DISTANCE(C, G, 1); + CHECK_DISTANCE(D, G, 3); + CHECK_DISTANCE(E, G, 3); + CHECK_DISTANCE(F, G, 1); + CHECK_DISTANCE(H, G, 1); + CHECK_DISTANCE(I, G, 2); + CHECK_DISTANCE(J, G, 1); + MESSAGE("publishing to foo on A will send through C"); + anon_send(peers["A"], atom::publish_v, make_data_message("foo", 42)); + expect((atom::publish, data_message), from(_).to(peers["A"])); + expect((atom::publish, node_message), + from(peers["A"]) // + .to(peers["C"]) + .with(_, message_pattern{"foo", 42, endpoint_id_list{G}})); + expect((atom::publish, node_message), + from(peers["C"]) // + .to(peers["G"]) + .with(_, message_pattern{"foo", 42, endpoint_id_list{G}})); +} + +FIXTURE_SCOPE_END() diff --git a/tests/cpp/alm/peer/fixture.hh b/tests/cpp/alm/peer/fixture.hh new file mode 100644 index 00000000..412f64a7 --- /dev/null +++ b/tests/cpp/alm/peer/fixture.hh @@ -0,0 +1,112 @@ +#include "test.hh" + +// -- fixture ------------------------------------------------------------------ + +// In this fixture, we're setting up this messy topology full of loops: +// +// +---+ +// +-----+ D +-----+ +// | +---+ | +// | | +// +---+ +---+ +// +-----+ B | | I +-+ +// | +---+ +---+ | +// | | | | +// | | +---+ | | +// | +-----+ E +-----+ | +// | +---+ | +// +---+ +---+ +// | A +-----------------------+ J | +// +---+ +---+ +// | +---+ | | +// | +-----+ F | | | +// | | +-+-+ | | +// | | | | | +// | +---+ +-+-+ | | +// +-----+ C +---+ G +--------+ | +// +---+ +-+-+ | +// | | | +// | +-+-+ | +// +-----+ H +----------+ +// +---+ +// + +#define PEER_ID(var, num) broker::endpoint_id var = make_peer_id(num) +#define PEER_EXPAND(var) std::make_pair(std::string{#var}, var) + +template +struct fixture : time_aware_fixture, test_coordinator_fixture<>> { + static broker::endpoint_id make_peer_id(uint8_t num) { + std::array host_id; + host_id.fill(num); + return caf::make_node_id(num, host_id); + } + + PEER_ID(A, 1); + PEER_ID(B, 2); + PEER_ID(C, 3); + PEER_ID(D, 4); + PEER_ID(E, 5); + PEER_ID(F, 6); + PEER_ID(G, 7); + PEER_ID(H, 8); + PEER_ID(I, 9); + PEER_ID(J, 10); + + fixture() { + std::vector> cfg{ + PEER_EXPAND(A), PEER_EXPAND(B), PEER_EXPAND(C), PEER_EXPAND(D), + PEER_EXPAND(E), PEER_EXPAND(F), PEER_EXPAND(G), PEER_EXPAND(H), + PEER_EXPAND(I), PEER_EXPAND(J), + }; + for (const auto& [name, id] : cfg) { + names[id] = name; + peers[name] = this->sys.template spawn(id); + } + } + + auto& get(const broker::endpoint_id& id) { + return this->template deref(peers[names[id]]).state; + } + + auto& get(const caf::actor& hdl) { + return this->template deref(hdl).state; + } + + auto shortest_path(const broker::endpoint_id& from, + const broker::endpoint_id& to) { + return get(from).shortest_path(to); + } + + void connect_peers() { + std::map> connections{ + {"A", {"B", "C", "J"}}, {"B", {"A", "D", "E"}}, + {"C", {"A", "F", "G", "H"}}, {"D", {"B", "I"}}, + {"E", {"B", "I"}}, {"F", {"C", "G"}}, + {"I", {"D", "E", "J"}}, {"G", {"C", "F", "H", "J"}}, + {"H", {"C", "G", "J"}}, {"J", {"A", "I", "G", "H"}}, + }; + for (auto& [id, links] : connections) + for (auto& link : links) + caf::anon_send(peers[id], broker::atom::peer_v, get(peers[link]).id(), + peers[link]); + this->run(broker::defaults::store::tick_interval); + BROKER_ASSERT(get(A).connected_to(peers["B"])); + BROKER_ASSERT(get(A).connected_to(peers["C"])); + BROKER_ASSERT(get(A).connected_to(peers["J"])); + BROKER_ASSERT(not get(A).connected_to(peers["D"])); + BROKER_ASSERT(not get(A).connected_to(peers["E"])); + BROKER_ASSERT(not get(A).connected_to(peers["F"])); + BROKER_ASSERT(not get(A).connected_to(peers["G"])); + BROKER_ASSERT(not get(A).connected_to(peers["H"])); + BROKER_ASSERT(not get(A).connected_to(peers["I"])); + } + + ~fixture() { + for (auto& kvp : peers) + caf::anon_send_exit(kvp.second, caf::exit_reason::kill); + } + + std::map names; + std::map peers; +}; diff --git a/tests/cpp/alm/peer/stream_transport.cc b/tests/cpp/alm/peer/stream_transport.cc new file mode 100644 index 00000000..8bc4f17a --- /dev/null +++ b/tests/cpp/alm/peer/stream_transport.cc @@ -0,0 +1,176 @@ +#define SUITE alm.peer.stream_transport + +#include "broker/core_actor.hh" + +#include "alm/peer/fixture.hh" + +#include "broker/alm/peer.hh" +#include "broker/alm/stream_transport.hh" +#include "broker/configuration.hh" +#include "broker/defaults.hh" + +using broker::defaults::store::tick_interval; + +using namespace broker; +using namespace broker::alm; + +namespace { + +// -- transport layer ---------------------------------------------------------- + +class testee_state : public stream_transport { +public: + using super = stream_transport; + + static inline const char* name = "testee"; + + testee_state(caf::event_based_actor* self, endpoint_id id) : super(self) { + super::id(std::move(id)); + } + + void publish_locally(const data_message& msg) override { + buf.emplace_back(msg); + super::publish_locally(msg); + } + + std::vector shortest_path(const endpoint_id& to) { + if (auto ptr = alm::shortest_path(tbl(), to)) + return *ptr; + return {}; + } + + std::vector buf; +}; + +using stream_peer_actor = caf::stateful_actor; + +// Our topology: +// +// +---+ +// +-----+ D +-----+ +// | +---+ | +// | | +// +---+ +---+ +// +-----+ B | | I +-+ +// | +---+ +---+ | +// | | | | +// | | +---+ | | +// | +-----+ E +-----+ | +// | +---+ | +// +---+ +---+ +// | A +-----------------------+ J | +// +---+ +---+ +// | +---+ | | +// | +-----+ F | | | +// | | +-+-+ | | +// | | | | | +// | +---+ +-+-+ | | +// +-----+ C +---+ G +--------+ | +// +---+ +-+-+ | +// | | | +// | +-+-+ | +// +-----+ H +----------+ +// +---+ +// + +} // namespace + +#define CHECK_UNREACHABLE(src, dst) CHECK(get(src).shortest_path(dst).empty()) + +FIXTURE_SCOPE(stream_peer_tests, fixture) + +TEST(peers can revoke paths) { + connect_peers(); + MESSAGE("after B loses its connection to E, all paths to E go through I"); + anon_send(peers["B"], atom::unpeer_v, peers["E"]); + run(tick_interval); + CHECK_EQUAL(shortest_path(A, E), endpoint_id_list({J, I, E})); + CHECK_EQUAL(shortest_path(B, E), endpoint_id_list({D, I, E})); + CHECK_EQUAL(shortest_path(D, E), endpoint_id_list({I, E})); + MESSAGE("B and E both revoked the path"); + CHECK_EQUAL(get(A).revocations().entries.size(), 2u); + CHECK_EQUAL(get(B).revocations().entries.size(), 1u); + CHECK_EQUAL(get(C).revocations().entries.size(), 2u); + CHECK_EQUAL(get(D).revocations().entries.size(), 2u); + CHECK_EQUAL(get(E).revocations().entries.size(), 1u); + CHECK_EQUAL(get(F).revocations().entries.size(), 2u); + CHECK_EQUAL(get(H).revocations().entries.size(), 2u); + CHECK_EQUAL(get(I).revocations().entries.size(), 2u); + CHECK_EQUAL(get(J).revocations().entries.size(), 2u); + MESSAGE("after I loses its connection to E, no paths to E remain"); + anon_send(peers["I"], atom::unpeer_v, peers["E"]); + run(tick_interval); + CHECK_UNREACHABLE(A, E); + CHECK_UNREACHABLE(B, E); + CHECK_UNREACHABLE(C, E); + CHECK_UNREACHABLE(D, E); + CHECK_UNREACHABLE(F, E); + CHECK_UNREACHABLE(G, E); + CHECK_UNREACHABLE(H, E); + CHECK_UNREACHABLE(I, E); + CHECK_UNREACHABLE(J, E); + MESSAGE("revocationss contain one additional entry after I <-> E revocation"); + // Note: we skip E on purpose here. + CHECK_EQUAL(get(A).revocations().entries.size(), 3u); + CHECK_EQUAL(get(B).revocations().entries.size(), 2u); + CHECK_EQUAL(get(C).revocations().entries.size(), 3u); + CHECK_EQUAL(get(D).revocations().entries.size(), 3u); + CHECK_EQUAL(get(F).revocations().entries.size(), 3u); + CHECK_EQUAL(get(H).revocations().entries.size(), 3u); + CHECK_EQUAL(get(I).revocations().entries.size(), 2u); + CHECK_EQUAL(get(J).revocations().entries.size(), 3u); + MESSAGE("after max-age has expired, all peers clear their revocations"); + sched.clock().current_time += defaults::path_revocations::max_age; + for (auto& id : {A, B, C, D, F, H, I, J}) + get(id).age_revocations(); + CHECK_EQUAL(get(A).revocations().entries.size(), 0u); + CHECK_EQUAL(get(B).revocations().entries.size(), 0u); + CHECK_EQUAL(get(C).revocations().entries.size(), 0u); + CHECK_EQUAL(get(D).revocations().entries.size(), 0u); + CHECK_EQUAL(get(F).revocations().entries.size(), 0u); + CHECK_EQUAL(get(H).revocations().entries.size(), 0u); + CHECK_EQUAL(get(I).revocations().entries.size(), 0u); + CHECK_EQUAL(get(J).revocations().entries.size(), 0u); +} + +TEST(only receivers forward messages locally) { + connect_peers(); + MESSAGE("after all links are connected, G subscribes to topic 'foo'"); + anon_send(peers["G"], atom::subscribe_v, filter_type{topic{"foo"}}); + run(tick_interval); + MESSAGE("publishing to foo on A will result in only G having the message"); + anon_send(peers["A"], atom::publish_v, make_data_message("foo", 42)); + run(tick_interval); + CHECK_EQUAL(get(A).buf.size(), 0u); + CHECK_EQUAL(get(B).buf.size(), 0u); + CHECK_EQUAL(get(C).buf.size(), 0u); + CHECK_EQUAL(get(D).buf.size(), 0u); + CHECK_EQUAL(get(E).buf.size(), 0u); + CHECK_EQUAL(get(F).buf.size(), 0u); + CHECK_EQUAL(get(G).buf.size(), 1u); + CHECK_EQUAL(get(H).buf.size(), 0u); + CHECK_EQUAL(get(I).buf.size(), 0u); + CHECK_EQUAL(get(J).buf.size(), 0u); +} + +TEST(disabling forwarding turns peers into leaf nodes) { + run(tick_interval); + get(E).disable_forwarding(true); + connect_peers(); + MESSAGE("without forwarding, E only appears as leaf node in routing tables"); + using path_type = std::vector; + std::vector paths; + for (auto& id : {A, B, C, D, F, H, I, J}) + for (auto& kvp : get(id).tbl()) + for (auto& versioned_path : kvp.second.versioned_paths) + paths.emplace_back(versioned_path.first); + auto predicate = [this](auto& path) { + if (path.empty()) + return true; + auto i = std::find(path.begin(), path.end(), E); + return i == path.end() || i == std::prev(path.end()); + }; + CHECK(std::all_of(paths.begin(), paths.end(), predicate)); +} + +FIXTURE_SCOPE_END() diff --git a/tests/cpp/alm/routing_table.cc b/tests/cpp/alm/routing_table.cc new file mode 100644 index 00000000..23cccd38 --- /dev/null +++ b/tests/cpp/alm/routing_table.cc @@ -0,0 +1,236 @@ +#define SUITE alm.routing_table + +#include "broker/alm/routing_table.hh" + +#include "test.hh" + +using namespace broker; +using namespace broker::literals; + +namespace { + +struct fixture : base_fixture { + endpoint_id A; + + endpoint_id B; + + endpoint_id C; + + endpoint_id D; + + endpoint_id E; + + endpoint_id I; + + endpoint_id J; + + fixture() { + A = ids['A']; + B = ids['B']; + C = ids['C']; + D = ids['D']; + E = ids['E']; + I = ids['I']; + J = ids['J']; + // We use the subset of the topology that we use in the peer unit test: + // + // +---+ + // +-----+ D +-----+ + // | +---+ | + // | | + // +---+ +---+ + // +-----+ B | | I +-+ + // | +---+ +---+ | + // | | | | + // | | +---+ | | + // | +-----+ E +-----+ | + // | +---+ | + // +---+ +---+ + // | A +-----------------------+ J | + // +---+ +---+ + // + auto add = [&](endpoint_id id, + std::vector> paths) { + auto& entry = tbl.emplace(id, caf::actor{}).first->second; + for (auto& path : paths) + add_or_update_path(tbl, id, path, alm::vector_timestamp(path.size())); + }; + add(B, {{B}, {J, I, D, B}, {J, I, E, B}}); + add(D, {{B, D}, {J, I, D}}); + add(E, {{B, E}, {J, I, E}}); + add(I, {{B, E, I}, {B, D, I}, {J, I}}); + add(J, {{J}, {B, D, I, J}, {B, E, I, J}}); + } + + // Creates a list of IDs (endpoint IDs). + template + auto ls(Ts... xs) { + return std::vector{std::move(xs)...}; + } + + alm::routing_table tbl; +}; + +void nop(const endpoint_id&) { + // nop +} + +} // namespace + +FIXTURE_SCOPE(routing_table_tests, fixture) + +TEST(erase removes all paths that to and from a peer) { + MESSAGE("before removing J, the shortest path to I is: J -> I"); + { + auto path = shortest_path(tbl, I); + REQUIRE(path != nullptr); + CHECK_EQUAL(*path, ls(J, I)); + } + MESSAGE("after removing J, the shortest path to I is: B -> D -> I"); + { + erase(tbl, J, nop); + auto path = shortest_path(tbl, I); + REQUIRE(path != nullptr); + CHECK_EQUAL(*path, ls(B, D, I)); + } +} + +TEST(erase_direct drops the direct path but peers can remain reachable) { + MESSAGE("before calling erase_direct(B), we reach B in one hop"); + { + auto path = shortest_path(tbl, B); + REQUIRE(path != nullptr); + CHECK_EQUAL(*path, ls(B)); + } + MESSAGE("after calling erase_direct(B), we need four hops to reach B"); + { + erase_direct(tbl, B, nop); + auto path = shortest_path(tbl, B); + REQUIRE(path != nullptr); + CHECK_EQUAL(*path, ls(J, I, D, B)); + } +} + +TEST(peers may revoke paths) { + using alm::revoked; + auto path = ls(A, B, C, D); + auto ts = alm::vector_timestamp{{2_lt, 2_lt, 2_lt, 2_lt}}; + MESSAGE("revocations entries for X -> Y with timestamp 3 (newer) hit"); + { + CHECK(revoked(path, ts, A, 3_lt, B)); + CHECK(revoked(path, ts, B, 3_lt, A)); + CHECK(revoked(path, ts, B, 3_lt, C)); + CHECK(revoked(path, ts, C, 3_lt, B)); + CHECK(revoked(path, ts, C, 3_lt, D)); + CHECK(revoked(path, ts, D, 3_lt, C)); + } + MESSAGE("revocations entries for X -> Y with timestamp 2 (same) hit"); + { + CHECK(revoked(path, ts, A, 2_lt, B)); + CHECK(revoked(path, ts, B, 2_lt, A)); + CHECK(revoked(path, ts, B, 2_lt, C)); + CHECK(revoked(path, ts, C, 2_lt, B)); + CHECK(revoked(path, ts, C, 2_lt, D)); + CHECK(revoked(path, ts, D, 2_lt, C)); + } + MESSAGE("revocations entries for X -> Y with timestamp 1 (oder) do not hit"); + { + CHECK(not revoked(path, ts, A, 1_lt, B)); + CHECK(not revoked(path, ts, B, 1_lt, A)); + CHECK(not revoked(path, ts, B, 1_lt, C)); + CHECK(not revoked(path, ts, C, 1_lt, B)); + CHECK(not revoked(path, ts, C, 1_lt, D)); + CHECK(not revoked(path, ts, D, 1_lt, C)); + } +} + +TEST(revocationsing removes revokes paths) { + MESSAGE("before revoking B -> D, we reach D in two hops"); + { + auto path = shortest_path(tbl, D); + REQUIRE(path != nullptr); + CHECK_EQUAL(*path, ls(B, D)); + } + MESSAGE("after revoking B -> D, we reach D in three hops"); + { + auto callback = [](const auto&) { FAIL("OnRemovePeer callback called"); }; + revoke(tbl, B, alm::lamport_timestamp{2}, D, callback); + auto path = shortest_path(tbl, D); + REQUIRE(path != nullptr); + CHECK_EQUAL(*path, ls(J, I, D)); + } + MESSAGE("after revoking J -> I, we no longer reach D"); + { + std::vector unreachables; + auto callback = [&](const auto& x) { unreachables.emplace_back(x); }; + revoke(tbl, J, alm::lamport_timestamp{2}, I, callback); + CHECK_EQUAL(unreachables, ls(D)); + CHECK_EQUAL(shortest_path(tbl, D), nullptr); + } +} + +TEST(revocationsing does not affect newer paths) { + MESSAGE("set all timestamps to 3"); + { + for (auto& row : tbl) + for (auto& path : row.second.versioned_paths) + for (auto& t : path.second) + t = 3_lt; + } + MESSAGE("revoking B -> D with timestamp 2 has no effect"); + { + auto callback = [](const auto&) { FAIL("OnRemovePeer callback called"); }; + revoke(tbl, B, 2_lt, D, callback); + auto path = shortest_path(tbl, D); + REQUIRE(path != nullptr); + CHECK_EQUAL(*path, ls(B, D)); + } +} + +TEST(inseting into revocationss creates a sorted list) { + using revocations = alm::revocations; + revocations lst; + struct dummy_self { + auto clock() { + struct dummy_clock { + auto now() { + return caf::actor_clock::clock_type::now(); + } + }; + return dummy_clock{}; + } + }; + auto emplace = [&](endpoint_id revoker, alm::lamport_timestamp rtime, + endpoint_id hop) { + dummy_self self; + return alm::emplace(lst, &self, revoker, rtime, hop); + }; + auto to_revocations = [](auto range) { + return revocations(range.first, range.second); + }; + MESSAGE("filling the list with new entries inserts"); + CHECK(emplace(A, 1_lt, B).second); + CHECK(emplace(C, 2_lt, A).second); + CHECK(emplace(A, 3_lt, B).second); + CHECK(emplace(C, 1_lt, A).second); + CHECK(emplace(B, 7_lt, A).second); + CHECK(emplace(A, 2_lt, C).second); + MESSAGE("inserting twice is a no-op"); + CHECK(not emplace(A, 1_lt, B).second); + CHECK(not emplace(B, 7_lt, A).second); + MESSAGE("the final list is sorted on revoker, ts, hop"); + CHECK_EQUAL(lst, revocations({{A, 1_lt, B}, + {A, 2_lt, C}, + {A, 3_lt, B}, + {B, 7_lt, A}, + {C, 1_lt, A}, + {C, 2_lt, A}})); + MESSAGE("equal_range allows access to subranges by revoker"); + CHECK_EQUAL(to_revocations(equal_range(lst, A)), + revocations({{A, 1_lt, B}, {A, 2_lt, C}, {A, 3_lt, B}})); + CHECK_EQUAL(to_revocations(equal_range(lst, B)), revocations({{B, 7_lt, A}})); + CHECK_EQUAL(to_revocations(equal_range(lst, C)), + revocations({{C, 1_lt, A}, {C, 2_lt, A}})); +} + +FIXTURE_SCOPE_END() diff --git a/tests/cpp/core.cc b/tests/cpp/core.cc index 6598b88f..f8d0d8a1 100644 --- a/tests/cpp/core.cc +++ b/tests/cpp/core.cc @@ -345,7 +345,11 @@ CAF_TEST(triangle_peering) { self->send(leaf1, atom::get_v); sched.prioritize(leaf1); consume_message(); - self->receive([&](const buf& xs) { CAF_CHECK_EQUAL(xs, buf{}); }); + self->receive( + [&](const buf& xs) { + CAF_REQUIRE(xs.empty()); + } + ); // Shutdown. CAF_MESSAGE("Shutdown core actors."); anon_send_exit(core1, caf::exit_reason::user_shutdown); diff --git a/tests/cpp/core_actor.cc b/tests/cpp/core_actor.cc new file mode 100644 index 00000000..3b6016f9 --- /dev/null +++ b/tests/cpp/core_actor.cc @@ -0,0 +1,303 @@ +#define SUITE core_actor + +#include "broker/core_actor.hh" + +#include "test.hh" + +#include "caf/attach_stream_sink.hpp" +#include "caf/attach_stream_source.hpp" + +#include "broker/configuration.hh" +#include "broker/endpoint.hh" +#include "broker/logger.hh" + +using caf::actor; +using caf::node_id; + +// TODO: implement me + +// using namespace broker; +// using namespace broker::detail; +// +// using element_type = endpoint::stream_type::value_type; +// +// namespace { +// +// struct driver_state { +// using buf_type = std::vector; +// bool restartable = false; +// buf_type xs; +// static inline const char* name = "driver"; +// void reset() { +// xs = data_msgs({{"a", 0}, {"b", true}, {"a", 1}, {"a", 2}, {"b", false}, +// {"b", true}, {"a", 3}, {"b", false}, {"a", 4}, {"a", 5}}); +// } +// driver_state() { +// reset(); +// } +// }; +// +// using driver_actor_type = caf::stateful_actor; +// +// caf::behavior driver(driver_actor_type* self, const actor& sink, +// bool restartable) { +// self->state.restartable = restartable; +// auto ptr = caf::attach_stream_source( +// self, +// // Destination. +// sink, +// // Initialize state. +// [](caf::unit_t&) { +// // nop +// }, +// // Get next element. +// [=](caf::unit_t&, caf::downstream& out, size_t num) { +// auto& xs = self->state.xs; +// auto n = std::min(num, xs.size()); +// if (n == 0) +// return; +// for (size_t i = 0u; i < n; ++i) +// out.push(xs[i]); +// xs.erase(xs.begin(), xs.begin() + static_cast(n)); +// }, +// // Did we reach the end? +// [=](const caf::unit_t&) { +// auto& st = self->state; +// return !st.restartable && st.xs.empty(); +// } +// ).ptr(); +// return { +// [=](atom::restart) { +// self->state.reset(); +// self->state.restartable = false; +// ptr->push(); +// }, +// }; +// } +// +// struct consumer_state { +// std::vector xs; +// static inline const char* name = "consumer"; +// }; +// +// using consumer_actor_type = caf::stateful_actor; +// +// caf::behavior consumer(consumer_actor_type* self, filter_type filter, +// const actor& src) { +// self->send(self * src, atom::join_v, std::move(filter)); +// return { +// [=](const endpoint::stream_type& in) { +// caf::attach_stream_sink( +// self, +// // Input stream. +// in, +// // Initialize state. +// [](caf::unit_t&) { +// // nop +// }, +// // Process single element. +// [=](caf::unit_t&, element_type x) { +// self->state.xs.emplace_back(std::move(x)); +// }, +// // Cleanup. +// [](caf::unit_t&) { +// // nop +// } +// ); +// }, +// [=](atom::get) { +// return self->state.xs; +// }, +// }; +// } +// +// struct fixture : base_fixture { +// // Returns the core manager for given actor. +// auto& state(caf::actor hdl) { +// return deref(hdl).state; +// } +// +// // Returns the recorded consumer log for given actor. +// auto& log(caf::actor hdl) { +// return deref(hdl).state.xs; +// } +// +// auto id(caf::actor hdl) { +// return state(hdl).id(); +// } +// +// fixture() { +// using caf::make_uri; +// auto spawn_core = [&](auto id) { +// broker_options opts; +// opts.disable_ssl = true; +// auto hdl = sys.spawn(filter_type{"a", "b", "c"}); +// anon_send(core1, atom::no_events_v); +// run(); +// state(hdl).id(make_node_id(unbox(id))); +// if (state(hdl).filter() != filter_type{"a", "b", "c"}) +// FAIL("core " << id << " reports wrong filter: " << state(hdl).filter()); +// return hdl; +// }; +// core1 = spawn_core(make_uri("test:core1")); +// core2 = spawn_core(make_uri("test:core2")); +// core3 = spawn_core(make_uri("test:core3")); +// } +// +// ~fixture() { +// for (auto& hdl : {core1, core2, core3}) +// anon_send_exit(hdl, caf::exit_reason::user_shutdown); +// } +// +// template +// void stop(Ts&&... xs) { +// (anon_send_exit(xs, caf::exit_reason::user_shutdown), ...); +// } +// +// caf::actor core1; +// caf::actor core2; +// caf::actor core3; +// }; +// +// static constexpr bool T = true; +// +// static constexpr bool F = false; +// +// } // namespace +// +// FIXTURE_SCOPE(local_tests, fixture) +// +// // Simulates a simple setup with two cores, where data flows from core1 to +// // core2. +// TEST(local_peers) { +// MESSAGE("connect a consumer (leaf) to core2"); +// auto leaf = sys.spawn(consumer, filter_type{"b"}, core2); +// MESSAGE("core1: " << to_string(core1)); +// MESSAGE("core2: " << to_string(core2)); +// MESSAGE("leaf: " << to_string(leaf)); +// run(); +// CHECK_EQUAL(state(core1).worker_manager().num_paths(), 0u); +// CHECK_EQUAL(state(core2).worker_manager().num_paths(), 1u); +// MESSAGE("trigger handshake between peers"); +// inject((atom::peer, node_id, actor), +// from(self).to(core1).with(atom::peer_v, id(core2), core2)); +// run(); +// MESSAGE("core1 & core2 should report each other as peered"); +// using actor_list = std::vector; +// CHECK_EQUAL(state(core1).peer_handles(), actor_list({core2})); +// CHECK_EQUAL(state(core1).peer_filter(id(core2)), filter_type({"a", "b", "c"})); +// CHECK_EQUAL(state(core2).peer_handles(), actor_list({core1})); +// CHECK_EQUAL(state(core2).peer_filter(id(core1)), filter_type({"a", "b", "c"})); +// return; +// MESSAGE("spin up driver on core1"); +// auto d1 = sys.spawn(driver, core1, false); +// MESSAGE("driver: " << to_string(d1)); +// run(); +// MESSAGE("check log of the consumer after the driver is done"); +// CHECK_EQUAL(log(leaf), data_msgs({{"b", T}, {"b", F}, {"b", T}, {"b", F}})); +// MESSAGE("send message 'directly' from core1 to core2"); +// inject((atom::publish, endpoint_info, data_message), +// from(self).to(core1).with(atom::publish_v, +// endpoint_info{id(core2), caf::none}, +// make_data_message(topic("b"), data{true}))); +// run(); +// MESSAGE("check log of the consumer again"); +// CHECK_EQUAL(log(leaf), +// data_msgs({{"b", T}, {"b", F}, {"b", T}, {"b", F}, {"b", T}})); +// MESSAGE("unpeer core1 from core2"); +// anon_send(core1, atom::unpeer_v, core2); +// run(); +// MESSAGE("check whether both core1 and core2 report no more peers"); +// CHECK_EQUAL(state(core1).peer_handles().size(), 0u); +// CHECK_EQUAL(state(core2).peer_handles().size(), 0u); +// } +// +// // Simulates a simple triangle setup where core1 peers with core2, and core2 +// // peers with core3. Data flows from core1 to core2 and core3. +// TEST(triangle_peering) { +// MESSAGE("connect consumers for topic 'b' to all cores"); +// // The consumer at core1 never receives any data, because data isn't forwarded +// // to local subscribers. +// auto leaf1 = sys.spawn(consumer, filter_type{"b"}, core1); +// auto leaf2 = sys.spawn(consumer, filter_type{"b"}, core2); +// auto leaf3 = sys.spawn(consumer, filter_type{"b"}, core3); +// run(); +// MESSAGE("initiate handshake between core1 and core2"); +// inject((atom::peer, node_id, actor), +// from(self).to(core1).with(atom::peer_v, id(core2), core2)); +// // Check if core1 reports a pending peer. +// CHECK_EQUAL(state(core1).pending_connections().count(id(core2)), 1u); +// run(); +// MESSAGE("initiate handshake between core2 and core3"); +// inject((atom::peer, node_id, actor), +// from(self).to(core2).with(atom::peer_v, id(core3), core3)); +// CHECK_EQUAL(state(core2).pending_connections().count(id(core3)), 1u); +// run(); +// MESSAGE("check if all cores properly report the peering setup"); +// CHECK(state(core1).pending_connections().empty()); +// CHECK(state(core2).pending_connections().empty()); +// CHECK(state(core3).pending_connections().empty()); +// CHECK(state(core1).connected_to(core2)); +// CHECK(state(core2).connected_to(core1)); +// CHECK(state(core2).connected_to(core3)); +// CHECK(state(core3).connected_to(core2)); +// CHECK(not state(core1).connected_to(core3)); +// CHECK(not state(core3).connected_to(core1)); +// MESSAGE("attach and run driver on core1"); +// sys.spawn(driver, core1, false); +// run(); +// MESSAGE("check log of the consumers"); +// CHECK(log(leaf1).empty()); +// CHECK_EQUAL(log(leaf2), data_msgs({{"b", T}, {"b", F}, {"b", T}, {"b", F}})); +// CHECK_EQUAL(log(leaf3), data_msgs({{"b", T}, {"b", F}, {"b", T}, {"b", F}})); +// stop(leaf1, leaf2, leaf3); +// } +// +// // Simulates a simple setup where core1 peers with core2 and starts sending +// // data. After receiving a couple of messages, core2 terminates and core3 +// // starts peering. Core3 must receive all remaining messages. +// // peers with core3. Data flows from core1 to core2 and core3. +// TEST(sequenced_peering) { +// MESSAGE("connect consumers for topic 'b' to cores 2 and 3"); +// // The consumer at core1 never receives any data, because data isn't forwarded +// // to local subscribers. +// auto leaf1 = sys.spawn(consumer, filter_type{"b"}, core2); +// auto leaf2 = sys.spawn(consumer, filter_type{"b"}, core3); +// run(); +// MESSAGE("peer core1 to core2"); +// inject((atom::peer, node_id, actor), +// from(self).to(core1).with(atom::peer_v, id(core2), core2)); +// run(); +// CHECK(state(core1).connected_to(core2)); +// CHECK(state(core2).connected_to(core1)); +// CHECK(not state(core1).connected_to(core3)); +// CAF_MESSAGE("run the driver and check logs of the consumers"); +// auto d1 = sys.spawn(driver, core1, true); +// run(); +// CHECK_EQUAL(log(leaf1), data_msgs({{"b", T}, {"b", F}, {"b", T}, {"b", F}})); +// CHECK(log(leaf2).empty()); +// CAF_MESSAGE("kill core2 and make sure core1 no longer sees any peers"); +// anon_send_exit(core2, caf::exit_reason::kill); +// run(); +// CHECK(state(core1).tbl().empty()); +// CHECK(state(core1).pending_connections().empty()); +// MESSAGE("peer core3 to core1"); +// inject((atom::peer, node_id, actor), +// from(self).to(core3).with(atom::peer_v, id(core1), core1)); +// run(); +// CHECK(state(core1).connected_to(core3)); +// CHECK(state(core3).connected_to(core1)); +// CHECK(not state(core1).connected_to(core2)); +// CAF_MESSAGE("restart driver and check the logs again"); +// anon_send(d1, atom::restart_v); +// run(); +// CHECK_EQUAL(log(leaf1), data_msgs({{"b", T}, {"b", F}, {"b", T}, {"b", F}})); +// CHECK_EQUAL(log(leaf2), data_msgs({{"b", T}, {"b", F}, {"b", T}, {"b", F}})); +// stop(d1, leaf1, leaf2); +// } +// +// CAF_TEST_FIXTURE_SCOPE_END() + +TEST(todo) { + MESSAGE("implement me"); +} diff --git a/tests/cpp/detail/central_dispatcher.cc b/tests/cpp/detail/central_dispatcher.cc index f062e096..f38de66a 100644 --- a/tests/cpp/detail/central_dispatcher.cc +++ b/tests/cpp/detail/central_dispatcher.cc @@ -4,53 +4,90 @@ #include "test.hh" +#include "broker/detail/unipath_manager.hh" + using namespace broker; namespace { -struct testee_state { - detail::central_dispatcher dispatcher; +struct testee_state : public detail::central_dispatcher { + static inline const char* name = "testee"; + + caf::event_based_actor* self; detail::unipath_manager_ptr in; - detail::unipath_manager_ptr out; - detail::unipath_manager_ptr inout; + detail::unipath_data_sink_ptr out; - testee_state(caf::scheduled_actor* self) : dispatcher(self) { + testee_state(caf::event_based_actor* self) : self(self) { // nop } + + caf::behavior make_behavior() { + using namespace broker::detail; + return { + [this](caf::stream input) -> caf::result { + if (in == nullptr) { + in = make_unipath_source(this, input); + CHECK(!in->blocks_inputs()); + return caf::unit; + } else { + return make_error(caf::sec::runtime_error, "only one input allowed"); + } + }, + [this](atom::join, caf::actor consumer) -> caf::result { + if (out == nullptr) { + out = make_unipath_data_sink(this, {"test"}); + out->add_unchecked_outbound_path(consumer); + return caf::unit; + } else { + return make_error(caf::sec::runtime_error, "only one output allowed"); + } + }, + }; + } + + auto receivers() { + return std::vector{self->node()}; + } + + void dispatch(const data_message& msg) override { + if (out) + out->enqueue(msg); + } + + void dispatch(const command_message&) override { + FAIL("expected a data_msg"); + } + + void dispatch(node_message&&) override { + FAIL("expected a data_msg"); + } + + caf::event_based_actor* this_actor() noexcept override { + return self; + } + + endpoint_id this_endpoint() const override { + return self->node(); + } + + filter_type local_filter() const override { + return {}; + } + + alm::lamport_timestamp local_timestamp() const noexcept override { + return {}; + } + + void flush() override { + if (out) + out->push(); + } }; using testee_actor = caf::stateful_actor; -caf::behavior testee_impl(testee_actor* self){ - using namespace broker::detail; - return { - [self](caf::stream handshake) -> caf::result { - auto& st = self->state; - if (st.in == nullptr) { - st.in = make_data_source(&st.dispatcher); - CHECK(!st.in->blocks_inputs()); - st.in->add_unchecked_inbound_path(handshake); - return caf::unit; - } else { - return make_error(caf::sec::runtime_error, "only one input allowed"); - } - }, - [self](atom::join, caf::actor consumer) -> caf::result { - auto& st = self->state; - if (st.out == nullptr) { - st.out = make_peer_manager(&st.dispatcher, nullptr); - st.out->filter({"test"}); - st.out->add_unchecked_outbound_path(consumer); - st.dispatcher.add(st.out); - return caf::unit; - } else { - return make_error(caf::sec::runtime_error, "only one output allowed"); - } - }, - }; -} - struct consumer_state { + static inline const char* name = "consumer"; std::vector buf; }; @@ -59,16 +96,15 @@ using consumer_actor = caf::stateful_actor; caf::behavior consumer_impl(consumer_actor* self, caf::actor testee) { self->send(testee, atom::join_v, self); return { - [self](caf::stream in) { + [self](caf::stream in) { caf::attach_stream_sink( self, in, // Initialization step. [](caf::unit_t&) {}, // Processing step. - [self](caf::unit_t&, node_message x) { + [self](caf::unit_t&, data_message x) { auto& buf = self->state.buf; - REQUIRE(is_data_message(x)); - buf.emplace_back(get_data(x)); + buf.emplace_back(move_data(x)); if (buf.size() % 100 == 0) MESSAGE("consumed " << buf.size() << " data message"); }); @@ -76,7 +112,13 @@ caf::behavior consumer_impl(consumer_actor* self, caf::actor testee) { }; } -void producer_impl(consumer_actor* self, caf::actor testee) { +struct producer_state { + static inline const char* name = "producer"; +}; + +using producer_actor = caf::stateful_actor; + +void producer_impl(producer_actor* self, caf::actor testee) { caf::attach_stream_source( self, testee, [self](size_t& pos) { pos = 0; }, [self](size_t& pos, caf::downstream& out, size_t hint) { @@ -100,7 +142,7 @@ struct fixture : test_coordinator_fixture { caf::actor aut; fixture() { - aut = sys.spawn(testee_impl); + aut = sys.spawn(); } ~fixture() { diff --git a/tests/cpp/detail/channel.cc b/tests/cpp/detail/channel.cc new file mode 100644 index 00000000..50571a77 --- /dev/null +++ b/tests/cpp/detail/channel.cc @@ -0,0 +1,592 @@ +#define SUITE detail.channel + +#include "broker/detail/channel.hh" + +#include "test.hh" + +#include +#include +#include + +using namespace broker; + +namespace { + +// -- local types -------------------------------------------------------------- + +using channel_type = detail::channel; + +struct consumer_backend; +struct fixture; + +using consumer_type = channel_type::consumer; +using producer_type = channel_type::producer; + +// -- consumer boilerplate code ------------------------------------------------ + +struct consumer_backend { + std::string id; + std::string input; + std::string output; + caf::event_based_actor* self = nullptr; + fixture* fix = nullptr; + bool closed = false; + bool fail_on_nil = false; + + consumer_backend() = default; + + explicit consumer_backend(std::string id) : id(std::move(id)) { + // nop + } + + void attach(caf::event_based_actor* self, fixture*fix) { + this->self = self; + this->fix = fix; + } + + void consume(consumer_type*, std::string x) { + input += x; + } + + error consume_nil(consumer_type*) { + input += '?'; + if (fail_on_nil) + return make_error(ec::unspecified, "I really wanted that data! 😭"); + return nil; + } + + template + void send(consumer_type*, const T& x); + + void close(consumer_type*, error) { + closed = true; + } +}; + +struct consumer_state { + consumer_backend backend; + consumer_type consumer; + consumer_state() : consumer(&backend) { + // nop + } +}; + +caf::behavior consumer_actor(caf::stateful_actor* self, + std::string id, fixture* fix); + +// -- fixture / producer boilerplate code -------------------------------------- + +caf::behavior producer_actor(caf::event_based_actor* self, + producer_type* state); + +struct fixture : base_fixture { + + struct outgoing_message { + caf::actor sender; + caf::actor receiver; + caf::message content; + template + outgoing_message(caf::actor sender, caf::actor receiver, Ts&&... xs) + : sender(std::move(sender)), + receiver(std::move(receiver)), + content(caf::make_message(std::forward(xs)...)) { + // nop + } + }; + + std::string producer_log; + + producer_type producer; + + caf::actor producer_hdl; + + fixture() : producer(this), rng(0xC00L) { + // nop + } + + void setup_actors(std::initializer_list consumer_names) { + producer_hdl = sys.spawn(producer_actor, &producer); + for (const auto& name : consumer_names) { + consumers.emplace(name, sys.spawn(consumer_actor, name, this)); + producer.add(name); + } + MESSAGE("setup: " << consumers); + } + + template + std::string render(const Paths& xs) { + if (xs.empty()) + return "[]"; + std::string result = "["; + auto i = xs.begin(); + result += i->hdl; + for (++i; i != xs.end(); ++i) { + result += ", "; + result += i->hdl; + } + result += ']'; + return result; + } + + template + void send(producer_type*, const std::string& dst, const T& x) { + producer_log += '\n'; + producer_log += dst; + producer_log += " <- "; + producer_log += caf::deep_to_string(x); + if (auto i = consumers.find(dst); i != consumers.end()) + outgoing_messages.emplace_back(producer_hdl, i->second, + channel_type::producer_message{x}); + } + + template + void broadcast(producer_type*, const T& x) { + producer_log += '\n'; + producer_log += render(producer.paths()); + producer_log += " <- "; + producer_log += caf::deep_to_string(x); + for (auto& kvp : consumers) + outgoing_messages.emplace_back(producer_hdl, kvp.second, + channel_type::producer_message{x}); + } + + void drop(producer_type*, std::string hdl, ec) { + consumers.erase(hdl); + } + + void handshake_completed(producer_type*, const std::string&) { + // nop + } + + // Uses a simulated transport channel that's beyond terrible. Randomly + // reorders all messages and loses messages according to `loss_rate`. + void ship(double loss_rate = 0) { + assert(loss_rate < 1); + if (outgoing_messages.empty()) + return; + std::shuffle(outgoing_messages.begin(), outgoing_messages.end(), rng); + if (loss_rate > 0) { + auto num_message = outgoing_messages.size(); + auto lost = static_cast(ceil(num_message * loss_rate)); + assert(num_message >= lost); + auto new_size = std::max(num_message - lost, size_t{1}); + auto i = outgoing_messages.begin() + new_size; + outgoing_messages.erase(i, outgoing_messages.end()); + } + for (auto& msg : outgoing_messages) + caf::send_as(msg.sender, msg.receiver, std::move(msg.content)); + outgoing_messages.clear(); + } + + std::string render_buffer(consumer_type& ref) { + std::string result; + for (auto& x : ref.buf()) + if (x.content) + result += *x.content; + else + result += '?'; + return result; + } + + void tick() { + using actor_type = caf::stateful_actor; + producer.tick(); + for (const auto& kvp : consumers) + deref(kvp.second).state.consumer.tick(); + } + + void ship_run_tick(double loss_rate = 0) { + ship(loss_rate); + run(); + tick(); + } + + consumer_type& get(const std::string& id) { + auto i = consumers.find(id); + if (i == consumers.end()) + FAIL("unable to retrieve state for consumer " << id); + using actor_type = caf::stateful_actor; + return deref(i->second).state.consumer; + } + + std::map consumers; + std::vector outgoing_messages; + std::minstd_rand rng; +}; + +// -- actor implementations ---------------------------------------------------- + +struct producer_visitor { + producer_type* ch; + const std::string& src; + + void operator()(channel_type::cumulative_ack& msg) { + ch->handle_ack(src, msg.seq); + } + + void operator()(channel_type::nack& msg) { + ch->handle_nack(src, msg.seqs); + } +}; + +caf::behavior producer_actor(caf::event_based_actor* self, + producer_type* state) { + return { + [state](std::string& src, channel_type::consumer_message& msg) { + producer_visitor f{state, src}; + caf::visit(f, msg); + }, + }; +} + +template +void consumer_backend::send(consumer_type*, const T& x) { + if (!output.empty()) + output += '\n'; + output += caf::deep_to_string(x); + if (self && fix) + fix->outgoing_messages.emplace_back(self, fix->producer_hdl, id, + channel_type::consumer_message{x}); +} + +struct consumer_visitor { + consumer_type* ch; + + void operator()(channel_type::handshake& msg) { + ch->handle_handshake(msg.offset, msg.heartbeat_interval); + } + + void operator()(channel_type::heartbeat& msg) { + ch->handle_heartbeat(msg.seq); + } + + void operator()(channel_type::event& msg) { + ch->handle_event(msg.seq, msg.content); + } + + void operator()(channel_type::retransmit_failed& msg) { + ch->handle_retransmit_failed(msg.seq); + } +}; + +caf::behavior consumer_actor(caf::stateful_actor* self, + std::string id, fixture* fix) { + self->state.backend.id = std::move(id); + self->state.backend.attach(self, fix); + return { + [self](channel_type::producer_message& msg) { + auto& st = self->state; + consumer_visitor f{&st.consumer}; + caf::visit(f, msg); + if (st.backend.closed) + self->quit(); + }, + }; +} + +} // namespace + +// -- ye olde tests ------------------------------------------------------------ + +FIXTURE_SCOPE(channel_tests, fixture) + +TEST(adding consumers triggers handshakes) { + producer.add("A"); + CHECK_EQUAL(producer.seq(), 1u); + producer.produce("abc"); + CHECK_EQUAL(producer.seq(), 2u); + producer.produce("def"); + CHECK_EQUAL(producer.seq(), 3u); + producer.add("B"); + producer.produce("ghi"); + CHECK_EQUAL(producer.seq(), 4u); + CHECK_EQUAL(producer.buf().size(), 3u); + CHECK_EQUAL(producer_log, R"( +A <- handshake(1, 5) +[A] <- event(2, "abc") +[A] <- event(3, "def") +B <- handshake(3, 5) +[A, B] <- event(4, "ghi"))"); +} + +TEST(ACKs delete elements from the buffer) { + producer.add("A"); + producer.add("B"); + producer.add("C"); + producer.produce("a"); + CHECK_EQUAL(producer.buf().back().seq, 2u); + producer.produce("b"); + CHECK_EQUAL(producer.buf().back().seq, 3u); + producer.produce("c"); + CHECK_EQUAL(producer.buf().back().seq, 4u); + producer.produce("d"); + CHECK_EQUAL(producer.buf().back().seq, 5u); + CHECK_EQUAL(producer.buf().size(), 4u); + producer.handle_ack("A", 3); + CHECK_EQUAL(producer.buf().size(), 4u); + producer.handle_ack("B", 4); + CHECK_EQUAL(producer.buf().size(), 4u); + producer.handle_ack("C", 5); + CHECK_EQUAL(producer.buf().size(), 2u); + CHECK_EQUAL(producer.buf().front().seq, 4u); + producer.handle_ack("A", 5); + CHECK_EQUAL(producer.buf().size(), 1u); + CHECK_EQUAL(producer.buf().front().seq, 5u); + producer.handle_ack("B", 5); + CHECK_EQUAL(producer.buf().size(), 0u); +} + +TEST(NACKs cause the producer to send messages again) { + producer.add("A"); + producer.add("B"); + producer.produce("a"); + producer.produce("b"); + producer.produce("c"); + producer.produce("d"); + CHECK_EQUAL(producer.buf().size(), 4u); + producer_log.clear(); + MESSAGE("sending NACK for 0 re-sends the handshake"); + producer.handle_nack("A", {0}); + CHECK_EQUAL(producer_log, "\nA <- handshake(1, 5)"); + producer_log.clear(); + MESSAGE("sending NACK for sequence number N re-sends the event"); + producer.handle_nack("B", {2, 4}); + CHECK_EQUAL(producer_log, R"( +B <- event(2, "a") +B <- event(4, "c"))"); + producer_log.clear(); + MESSAGE("sending NACK for unknown sequence numbers sends errors"); + producer.handle_ack("A", 5); + producer.handle_ack("B", 5); + CHECK_EQUAL(producer.buf().size(), 0u); + producer.handle_nack("B", {2, 4}); + CHECK_EQUAL(producer_log, R"( +B <- retransmit_failed(2) +B <- retransmit_failed(4))"); +} + +TEST(consumers process events in order) { + consumer_backend cb{"A"}; + consumer_type consumer{&cb}; + consumer.handle_handshake(0, 3); + consumer.handle_event(4, "d"); + CHECK_EQUAL(consumer.buf().size(), 1u); + consumer.handle_event(5, "e"); + CHECK_EQUAL(consumer.buf().size(), 2u); + consumer.handle_event(5, "e"); + CHECK_EQUAL(consumer.buf().size(), 2u); + consumer.handle_event(2, "b"); + CHECK_EQUAL(consumer.buf().size(), 3u); + consumer.handle_event(3, "c"); + CHECK_EQUAL(consumer.buf().size(), 4u); + consumer.handle_event(1, "a"); + CHECK_EQUAL(consumer.buf().size(), 0u); + CHECK_EQUAL(cb.input, "abcde"); + consumer.handle_event(1, "a"); + CHECK_EQUAL(consumer.buf().size(), 0u); + CHECK_EQUAL(cb.input, "abcde"); +} + +TEST(consumers process nil events if retransmits fail) { + consumer_backend cb{"A"}; + consumer_type consumer{&cb}; + consumer.handle_handshake(0, 3); + consumer.handle_event(4, "d"); + consumer.handle_event(6, "f"); + CAF_MESSAGE("failed retransmits cause holes in the buffer"); + consumer.handle_retransmit_failed(5); + CAF_CHECK_EQUAL(render_buffer(consumer), "d?f"); + CAF_MESSAGE("retransmit_failed has no effect on already received messages"); + consumer.handle_event(2, "b"); + consumer.handle_retransmit_failed(2); + CAF_CHECK_EQUAL(render_buffer(consumer), "bd?f"); + CAF_MESSAGE("messages that arrive before processing lost messages count"); + consumer.handle_retransmit_failed(3); + CAF_CHECK_EQUAL(render_buffer(consumer), "b?d?f"); + consumer.handle_event(3, "c"); + CAF_CHECK_EQUAL(render_buffer(consumer), "bcd?f"); + CAF_MESSAGE("the consumer calls consume and consume_nil as needed"); + consumer.handle_event(1, "a"); + CHECK_EQUAL(cb.input, "abcd?f"); + CHECK_EQUAL(cb.closed, false); + CAF_MESSAGE("the consumer stops and closes if consume_nil returns an error"); + cb.fail_on_nil = true; + consumer.handle_event(9, "i"); + consumer.handle_retransmit_failed(8); + consumer.handle_event(7, "g"); + CHECK_EQUAL(cb.input, "abcd?fg?"); + CHECK_EQUAL(cb.closed, true); +} + +TEST(consumers buffer events until receiving the handshake) { + consumer_backend cb{"A"}; + consumer_type consumer{&cb}; + consumer.handle_event(3, "a"); + consumer.handle_event(4, "b"); + consumer.handle_event(5, "c"); + consumer.handle_handshake(2, 3); + CHECK_EQUAL(consumer.buf().size(), 0u); + CHECK_EQUAL(cb.input, "abc"); +} + +TEST(consumers send cumulative ACK messages) { + consumer_backend cb{"A"}; + consumer_type consumer{&cb}; + consumer.handle_handshake(1, 1); + cb.output.clear(); + MESSAGE("each tick triggers an ACK when setting heartbeat interval to 1"); + consumer.tick(); + CHECK_EQUAL(cb.output, "cumulative_ack(1)"); + consumer.tick(); + CHECK_EQUAL(cb.output, "cumulative_ack(1)\ncumulative_ack(1)"); + cb.output.clear(); + MESSAGE("after some events, the ACK contains the last received seq ID"); + consumer.handle_event(2, "a"); + consumer.handle_event(3, "b"); + consumer.tick(); + CHECK_EQUAL(cb.input, "ab"); + CHECK_EQUAL(cb.output, "cumulative_ack(3)"); +} + +TEST(consumers send NACK messages when receiving incomplete data) { + consumer_backend cb{"A"}; + consumer_type consumer{&cb}; + consumer.nack_timeout(3); + CHECK_EQUAL(consumer.num_ticks(), 0u); + MESSAGE("the consumer sends a NACK after making no progress for two ticks"); + consumer.handle_handshake(1, 5); + CHECK_EQUAL(cb.output, "cumulative_ack(1)"); + cb.output.clear(); + consumer.tick(); + CHECK_EQUAL(consumer.num_ticks(), 1u); + CHECK_EQUAL(consumer.idle_ticks(), 0u); + consumer.handle_event(5, "d"); + consumer.handle_event(3, "b"); + consumer.handle_event(8, "g"); + consumer.tick(); + CHECK_EQUAL(consumer.num_ticks(), 2u); + CHECK_EQUAL(cb.input, ""); + CHECK_EQUAL(consumer.idle_ticks(), 1u); + CHECK_EQUAL(cb.output, ""); + consumer.tick(); + CHECK_EQUAL(consumer.num_ticks(), 3u); + CHECK_EQUAL(cb.input, ""); + CHECK_EQUAL(consumer.idle_ticks(), 2u); + CHECK_EQUAL(cb.output, ""); + consumer.tick(); + CHECK_EQUAL(consumer.num_ticks(), 4u); + CHECK_EQUAL(cb.input, ""); + CHECK_EQUAL(consumer.idle_ticks(), 0u); + CHECK_EQUAL(cb.output, "nack([2, 4, 6, 7])"); + MESSAGE("the consumer sends an ack every five ticks, even without progress"); + cb.output.clear(); + consumer.tick(); + CHECK_EQUAL(consumer.num_ticks(), 5u); + CHECK_EQUAL(cb.input, ""); + CHECK_EQUAL(consumer.idle_ticks(), 1u); + CHECK_EQUAL(cb.output, "cumulative_ack(1)"); +} + +TEST(producers become idle after all consumers ACKed all messages) { + setup_actors({"A", "B", "C", "D"}); + producer.produce("a"); + producer.produce("b"); + producer.produce("c"); + producer.produce("d"); + ship_run_tick(); + producer.produce("e"); + producer.produce("f"); + producer.produce("g"); + producer.produce("h"); + ship_run_tick(); + producer.produce("i"); + producer.produce("j"); + producer.produce("k"); + producer.produce("l"); + ship(); + run(); + while (!producer.idle()) { + tick(); + ship(); + run(); + } + CHECK_EQUAL(producer.buf().size(), 0u); + CHECK_EQUAL(get("A").backend().input, "abcdefghijkl"); + CHECK_EQUAL(get("B").backend().input, "abcdefghijkl"); + CHECK_EQUAL(get("C").backend().input, "abcdefghijkl"); + CHECK_EQUAL(get("D").backend().input, "abcdefghijkl"); +} + +TEST(messages arrive eventually - even with 33 percent loss rate) { + producer.connection_timeout_factor(12); + // Essentially the same test as above, but with a loss rate of 33%. + setup_actors({"A", "B", "C", "D"}); + CHECK_EQUAL(get("A").backend().input, ""); + CHECK_EQUAL(get("B").backend().input, ""); + CHECK_EQUAL(get("C").backend().input, ""); + CHECK_EQUAL(get("D").backend().input, ""); + producer.produce("a"); + producer.produce("b"); + producer.produce("c"); + producer.produce("d"); + ship_run_tick(0.33); + producer.produce("e"); + producer.produce("f"); + producer.produce("g"); + producer.produce("h"); + ship_run_tick(0.33); + producer.produce("i"); + producer.produce("j"); + producer.produce("k"); + producer.produce("l"); + ship(0.33); + run(); + for (size_t round = 1; !producer.idle(); ++round) { + if (round == 100) + FAIL("system didn't reach a stable state after 100 rounds"); + tick(); + ship(0.33); + run(); + } + CHECK_EQUAL(producer.buf().size(), 0u); + CHECK_EQUAL(get("A").backend().input, "abcdefghijkl"); + CHECK_EQUAL(get("B").backend().input, "abcdefghijkl"); + CHECK_EQUAL(get("C").backend().input, "abcdefghijkl"); + CHECK_EQUAL(get("D").backend().input, "abcdefghijkl"); +} + +TEST(messages arrive eventually - even with 66 percent loss rate) { + producer.connection_timeout_factor(24); + // Essentially the same test again, but with a loss rate of 66%. + setup_actors({"A", "B", "C", "D"}); + producer.produce("a"); + producer.produce("b"); + producer.produce("c"); + producer.produce("d"); + ship_run_tick(0.66); + producer.produce("e"); + producer.produce("f"); + producer.produce("g"); + producer.produce("h"); + ship_run_tick(0.66); + producer.produce("i"); + producer.produce("j"); + producer.produce("k"); + producer.produce("l"); + ship(0.66); + run(); + for (size_t round = 1; !producer.idle(); ++round) { + if (round == 500) + FAIL("system didn't reach a stable state after 200 rounds"); + tick(); + ship(0.66); + run(); + } + CHECK_EQUAL(producer.buf().size(), 0u); + CHECK_EQUAL(get("A").backend().input, "abcdefghijkl"); + CHECK_EQUAL(get("B").backend().input, "abcdefghijkl"); + CHECK_EQUAL(get("C").backend().input, "abcdefghijkl"); + CHECK_EQUAL(get("D").backend().input, "abcdefghijkl"); +} + +FIXTURE_SCOPE_END() diff --git a/tests/cpp/detail/data_generator.cc b/tests/cpp/detail/data_generator.cc index b93bc5b8..09613554 100644 --- a/tests/cpp/detail/data_generator.cc +++ b/tests/cpp/detail/data_generator.cc @@ -223,7 +223,8 @@ TEST(vector data) { TEST(roundtrip with meta_data_writer) { detail::meta_data_writer writer{sink}; auto x = vector{1, 2, "a", "bc"}; - CHECK_EQUAL(writer(x), caf::none); + CHECK_EQUAL(writer(data{x}), caf::none); + MESSAGE("writer produced " << buf.size() << " Bytes"); auto y_data = generate(); if (!holds_alternative(y_data)) { CAF_ERROR("generator did not produce a vector"); diff --git a/tests/cpp/detail/iterator_range.cc b/tests/cpp/detail/iterator_range.cc new file mode 100644 index 00000000..ad17fd74 --- /dev/null +++ b/tests/cpp/detail/iterator_range.cc @@ -0,0 +1,34 @@ +#define SUITE detail.iterator_range + +#include "broker/detail/iterator_range.hh" + +#include "test.hh" + +using namespace broker::detail; + +namespace { + +struct fixture { + std::vector xs{1, 2, 3}; + + std::vector ys{3, 2, 1}; +}; + +} // namespace + +FIXTURE_SCOPE(iterator_range_tests, fixture) + +TEST(iterator ranges wrap iterators) { + CHECK_EQUAL(make_iterator_range(xs).begin(), xs.begin()); + CHECK_EQUAL(make_iterator_range(xs).end(), xs.end()); + CHECK(!make_iterator_range(xs).empty()); +} + +TEST(iterator ranges are comparable) { + CHECK_EQUAL(make_iterator_range(xs), make_iterator_range(xs)); + CHECK_NOT_EQUAL(make_iterator_range(ys), make_iterator_range(xs)); + CHECK_NOT_EQUAL(make_iterator_range(xs), make_iterator_range(ys)); + CHECK_EQUAL(make_iterator_range(ys), make_iterator_range(ys)); +} + +FIXTURE_SCOPE_END() diff --git a/tests/cpp/detail/meta_data_writer.cc b/tests/cpp/detail/meta_data_writer.cc index 5d718fb5..a42089f8 100644 --- a/tests/cpp/detail/meta_data_writer.cc +++ b/tests/cpp/detail/meta_data_writer.cc @@ -36,7 +36,7 @@ struct fixture { buf.size() - read_pos}; T result{}; CHECK_EQUAL(detail::read_value(source, result), caf::none); - read_pos = buf.size() - source.remaining(); + read_pos = static_cast(source.current() - buf.data()); return result; } @@ -51,36 +51,42 @@ CAF_TEST_FIXTURE_SCOPE(meta_data_writer_tests, fixture) CAF_TEST(default constructed data) { push(data{}); + CHECK_EQUAL(buf.size(), 1u); CHECK_EQUAL(pull(), data::type::none); CHECK(at_end()); } CAF_TEST(boolean data) { push(data{true}); + CHECK_EQUAL(buf.size(), 1u); CHECK_EQUAL(pull(), data::type::boolean); CHECK(at_end()); } CAF_TEST(count data) { push(data{count{42}}); + CHECK_EQUAL(buf.size(), 1u); CHECK_EQUAL(pull(), data::type::count); CHECK(at_end()); } CAF_TEST(integer data) { push(data{integer{42}}); + CHECK_EQUAL(buf.size(), 1u); CHECK_EQUAL(pull(), data::type::integer); CHECK(at_end()); } CAF_TEST(real data) { push(data{4.2}); + CHECK_EQUAL(buf.size(), 1u); CHECK_EQUAL(pull(), data::type::real); CHECK(at_end()); } CAF_TEST(string data) { push(data{"hello world"}); + CHECK_EQUAL(buf.size(), 5u); CHECK_EQUAL(pull(), data::type::string); CHECK_EQUAL(pull(), 11u); CHECK(at_end()); @@ -88,36 +94,42 @@ CAF_TEST(string data) { CAF_TEST(address data) { push(data{address{}}); + CHECK_EQUAL(buf.size(), 1u); CHECK_EQUAL(pull(), data::type::address); CHECK(at_end()); } CAF_TEST(subnet data) { push(data{subnet{address{}, 24}}); + CHECK_EQUAL(buf.size(), 1u); CHECK_EQUAL(pull(), data::type::subnet); CHECK(at_end()); } CAF_TEST(port data) { push(data{port{8080, port::protocol::tcp}}); + CHECK_EQUAL(buf.size(), 1u); CHECK_EQUAL(pull(), data::type::port); CHECK(at_end()); } CAF_TEST(timestamp data) { push(data{timestamp{}}); + CHECK_EQUAL(buf.size(), 1u); CHECK_EQUAL(pull(), data::type::timestamp); CHECK(at_end()); } CAF_TEST(timespan data) { push(data{timespan{}}); + CHECK_EQUAL(buf.size(), 1u); CHECK_EQUAL(pull(), data::type::timespan); CHECK(at_end()); } CAF_TEST(enum_value data) { push(data{enum_value{"foobar"}}); + CHECK_EQUAL(buf.size(), 5u); CHECK_EQUAL(pull(), data::type::enum_value); CHECK_EQUAL(pull(), 6u); CHECK(at_end()); @@ -129,6 +141,7 @@ CAF_TEST(set data) { xs.emplace(integer{2}); xs.emplace(integer{3}); push(data{xs}); + CHECK_EQUAL(buf.size(), 8u); CHECK_EQUAL(pull(), data::type::set); CHECK_EQUAL(pull(), 3u); CHECK_EQUAL(pull(), data::type::integer); @@ -143,6 +156,7 @@ CAF_TEST(table data) { xs.emplace(integer{2}, "hello world"); xs.emplace(integer{3}, address{}); push(data{xs}); + CHECK_EQUAL(buf.size(), 15u); CHECK_EQUAL(pull(), data::type::table); CHECK_EQUAL(pull(), 3u); CHECK_EQUAL(pull(), data::type::integer); @@ -161,6 +175,7 @@ CAF_TEST(vector data) { xs.emplace_back(std::string{"hello world"}); xs.emplace_back(12.34); push(data{xs}); + CHECK_EQUAL(buf.size(), 12u); CHECK_EQUAL(pull(), data::type::vector); CHECK_EQUAL(pull(), 3u); CHECK_EQUAL(pull(), data::type::integer); @@ -170,4 +185,79 @@ CAF_TEST(vector data) { CHECK(at_end()); } +CAF_TEST(put_command) { + internal_command cmd{0, {}, put_command{data{"hello"}, data{"broker"}, nil}}; + push(cmd); + CHECK_EQUAL(buf.size(), 11u); + CHECK_EQUAL(pull(), + internal_command::type::put_command); + CHECK_EQUAL(pull(), data::type::string); + CHECK_EQUAL(pull(), 5u); + CHECK_EQUAL(pull(), data::type::string); + CHECK_EQUAL(pull(), 6u); + CHECK(at_end()); +} + +CAF_TEST(put_unique_command) { + internal_command cmd{0, + {}, + put_unique_command{data{"hello"}, data{"broker"}, nil, + entity_id::nil(), 0}}; + push(cmd); + CHECK_EQUAL(pull(), + internal_command::type::put_unique_command); + // We expect meta data for `key`, `value`, and `req_id`. + CHECK_EQUAL(pull(), data::type::string); + CHECK_EQUAL(pull(), 5u); + CHECK_EQUAL(pull(), data::type::string); + CHECK_EQUAL(pull(), 6u); + CHECK(at_end()); +} + +CAF_TEST(erase_command) { + internal_command cmd{0, {}, erase_command{data{"foobar"}}}; + push(cmd); + CHECK_EQUAL(pull(), + internal_command::type::erase_command); + CHECK_EQUAL(pull(), data::type::string); + CHECK_EQUAL(pull(), 6u); + CHECK(at_end()); +} + +CAF_TEST(add_command) { + internal_command cmd{0, + {}, + add_command{data{"key"}, data{"value"}, + data::type::table, nil, entity_id::nil()}}; + push(cmd); + CHECK_EQUAL(pull(), + internal_command::type::add_command); + CHECK_EQUAL(pull(), data::type::string); + CHECK_EQUAL(pull(), 3u); + CHECK_EQUAL(pull(), data::type::string); + CHECK_EQUAL(pull(), 5u); + CHECK(at_end()); +} + +CAF_TEST(subtract_command) { + internal_command cmd{ + 0, {}, subtract_command{data{"key"}, data{"value"}, nil}}; + push(cmd); + CHECK_EQUAL(pull(), + internal_command::type::subtract_command); + CHECK_EQUAL(pull(), data::type::string); + CHECK_EQUAL(pull(), 3u); + CHECK_EQUAL(pull(), data::type::string); + CHECK_EQUAL(pull(), 5u); + CHECK(at_end()); +} + +CAF_TEST(clear_command) { + internal_command cmd{0, {}, clear_command{}}; + push(cmd); + CHECK_EQUAL(pull(), + internal_command::type::clear_command); + CHECK(at_end()); +} + CAF_TEST_FIXTURE_SCOPE_END() diff --git a/tests/cpp/detail/peer_handshake.cc b/tests/cpp/detail/peer_handshake.cc new file mode 100644 index 00000000..0dd3462d --- /dev/null +++ b/tests/cpp/detail/peer_handshake.cc @@ -0,0 +1,312 @@ +#define SUITE detail.peer_handshake + +#include "broker/detail/peer_handshake.hh" + +#include "test.hh" + +using namespace broker; + +namespace { + +broker::endpoint_id make_peer_id(uint8_t num) { + std::array host_id; + host_id.fill(num); + return caf::make_node_id(num, host_id); +} + +class mock_transport : public peer, public detail::unipath_manager::observer { +public: + mock_transport(caf::event_based_actor* self) : self(self), hs(this) { + // nop + } + + caf::event_based_actor* this_actor() noexcept override { + return self; + } + + endpoint_id this_endpoint() const override { + return make_peer_id(1); + } + + caf::actor remote_hdl() const override { + return hdl; + } + + detail::peer_handshake* handshake() noexcept override { + return &hs; + } + + void handshake_failed(error) override { + log.emplace_back("failed"); + } + + bool finalize_handshake() override { + log.emplace_back("finalized"); + return true; + } + + friend void intrusive_ptr_add_ref(mock_transport* ptr) noexcept { + ptr->ref(); + } + + friend void intrusive_ptr_release(mock_transport* ptr) noexcept { + ptr->deref(); + } + + caf::event_based_actor* self; + + caf::actor hdl; + + detail::peer_handshake hs; + + bool has_inbound_path = false; + + bool has_outbound_path = false; + + std::vector log; + +private: + bool make_path(bool& flag, const char* line) { + if (!flag) { + log.emplace_back(line); + flag = true; + return true; + } else { + return false; + } + } + + void add_transport_ref() noexcept override { + ref(); + } + + void release_transport_ref() noexcept override { + deref(); + } +}; + +using mock_transport_ptr = caf::intrusive_ptr; + +struct mock_transport_state { + using message_type = int; + + mock_transport_state(caf::event_based_actor* self) : self_ptr(self) { + transport = caf::make_counted(self); + } + + auto self() const noexcept { + return self_ptr; + } + + endpoint_id id() const { + return make_peer_id(1); + } + + auto& handshake() { + return transport->hs; + } + + caf::behavior make_behavior() { + return { + [this](atom::run) { + if (!f) + FAIL("received run but no function was defined by the fixture"); + f(*this); + f = nullptr; + }, + }; + } + + bool originator_start_peering(endpoint_id peer_id, caf::actor peer_hdl) { + transport->hdl = peer_hdl; + return handshake().originator_start_peering(peer_id, {}); + } + + bool responder_start_peering(endpoint_id peer_id, caf::actor peer_hdl) { + transport->hdl = peer_hdl; + return handshake().responder_start_peering(peer_id); + } + + mock_transport_ptr transport; + + caf::event_based_actor* self_ptr; + + std::function f; + + static inline const char* name = "actor-under-test"; +}; + +class aut_type : public caf::stateful_actor { +public: + using super = caf::stateful_actor; + + explicit aut_type(caf::actor_config& cfg) : super(cfg) { + // nop + } + + caf::behavior make_behavior() override { + return state.make_behavior(); + } +}; + +using fsm = detail::peer_handshake::fsm; + +struct fixture : time_aware_fixture> { + endpoint_id A = make_peer_id(1); + endpoint_id B = make_peer_id(2); + + caf::actor aut; + + fixture() { + aut = sys.spawn(); + } + + auto& aut_state() { + return deref(aut).state; + } + + // Utility to run some piece of code inside the AUT. + void aut_exec(std::function f) { + aut_state().f = std::move(f); + inject((atom::run), from(self).to(aut).with(atom::run_v)); + } + + auto& log() { + return aut_state().transport->log; + } + + bool log_includes(std::vector lines) { + auto in_log = [this](const std::string& line) { + auto& log = aut_state().transport->log; + return std::find(log.begin(), log.end(), line) != log.end(); + }; + if (std::all_of(lines.begin(), lines.end(), in_log)) { + return true; + } else { + MESSAGE("log_includes check failed for log " << log()); + return false; + } + } + + bool log_excludes(std::vector lines) { + auto not_in_log = [this](const std::string& line) { + return std::find(log().begin(), log().end(), line) == log().end(); + }; + if (std::all_of(lines.begin(), lines.end(), not_in_log)) { + return true; + } else { + MESSAGE("log_excludes check failed for log " << log()); + return false; + } + } +}; + +caf::behavior dummy_peer() { + return { + [](atom::peer, atom::init, endpoint_id, + caf::actor) -> caf::result { + return {atom::peer_v, atom::ok_v, make_peer_id(2)}; + }, + }; +} + +} // namespace + +#define AUT_EXEC(stmt) \ + aut_exec([&](mock_transport_state& state) { \ + [[maybe_unused]] auto& handshake = state.handshake(); \ + [[maybe_unused]] auto& transport = *state.transport; \ + stmt; \ + }) + +FIXTURE_SCOPE(peer_handshake_tests, fixture) + +TEST(calling start_peering on the originator twice fails the handshake) { + auto responder = sys.spawn(dummy_peer); + AUT_EXEC(CHECK_EQUAL(handshake.state(), fsm::init_state)); + MESSAGE("start_peering transitions to 'started' and sends an init message"); + AUT_EXEC(CHECK(state.originator_start_peering(B, responder))); + AUT_EXEC(CHECK_EQUAL(handshake.state(), fsm::started)); + expect((atom::peer, atom::init, endpoint_id, caf::actor), + from(aut).to(responder).with(_, _, A, aut)); + expect((atom::peer, atom::ok, endpoint_id), + from(responder).to(aut).with(_, _, B)); + AUT_EXEC(CHECK_EQUAL(handshake.state(), fsm::started)); + MESSAGE("calling start_peering again is an error"); + AUT_EXEC(CHECK(!state.originator_start_peering(B, responder))); + AUT_EXEC(CHECK_EQUAL(handshake.state(), fsm::fail_state)); + AUT_EXEC(CHECK(handshake.failed())); +} + +TEST(the originator creates both streams in handle_open_stream_msg) { + auto responder = sys.spawn(dummy_peer); + AUT_EXEC(CHECK(state.originator_start_peering(B, responder))); + AUT_EXEC(CHECK(!transport.has_inbound_path)); + AUT_EXEC(CHECK(!transport.has_outbound_path)); + AUT_EXEC(CHECK(handshake.originator_handle_open_stream_msg())); + AUT_EXEC(CHECK(transport.has_inbound_path)); + AUT_EXEC(CHECK(transport.has_outbound_path)); + CHECK(log_includes({"add input slot", "add output slot"})); + CHECK(log_excludes({"finalized"})); +} + +TEST(calling handle_open_stream_msg on the originator twice is an error) { + auto responder = sys.spawn(dummy_peer); + AUT_EXEC(CHECK(state.originator_start_peering(B, responder))); + AUT_EXEC(CHECK(handshake.originator_handle_open_stream_msg())); + AUT_EXEC(CHECK(!handshake.originator_handle_open_stream_msg())); + CHECK(log_includes({"add input slot", "add output slot", "failed"})); +} + +TEST(the originator triggers callbacks on success) { + auto responder = sys.spawn(dummy_peer); + AUT_EXEC(CHECK(state.originator_start_peering(B, responder))); + AUT_EXEC(CHECK(handshake.originator_handle_open_stream_msg())); + AUT_EXEC(CHECK(handshake.handle_ack_open_msg())); + CHECK(log_includes({"add input slot", "add output slot", "finalized"})); + CHECK(log_excludes({"failed"})); +} + +TEST(calling start_peering on the responder twice fails the handshake) { + auto originator = sys.spawn(dummy_peer); + AUT_EXEC(CHECK_EQUAL(handshake.state(), fsm::init_state)); + AUT_EXEC(CHECK(state.responder_start_peering(B, originator))); + AUT_EXEC(CHECK_EQUAL(handshake.state(), fsm::started)); + AUT_EXEC(CHECK(!state.responder_start_peering(B, originator))); + AUT_EXEC(CHECK_EQUAL(handshake.state(), fsm::fail_state)); + AUT_EXEC(CHECK(handshake.failed())); + CHECK(log_includes({"failed"})); +} + +TEST(the responder opens the output stream first) { + auto originator = sys.spawn(dummy_peer); + AUT_EXEC(CHECK(state.responder_start_peering(B, originator))); + AUT_EXEC(CHECK(!transport.has_inbound_path)); + AUT_EXEC(CHECK(transport.has_outbound_path)); + AUT_EXEC(CHECK(handshake.responder_handle_open_stream_msg())); + AUT_EXEC(CHECK(transport.has_inbound_path)); + AUT_EXEC(CHECK(transport.has_outbound_path)); + AUT_EXEC(CHECK(handshake.handle_ack_open_msg())); + AUT_EXEC(CHECK(transport.has_inbound_path)); + AUT_EXEC(CHECK(transport.has_outbound_path)); + CHECK(log_includes({"add input slot", "add output slot", "finalized"})); + CHECK(log_excludes({"failed"})); +} + +TEST(the responder accepts messages from the originator in any order) { + // Same test as above, but ack_open_msg and open_stream_msg are swapped. + auto originator = sys.spawn(dummy_peer); + AUT_EXEC(CHECK(state.responder_start_peering(B, originator))); + AUT_EXEC(CHECK(!transport.has_inbound_path)); + AUT_EXEC(CHECK(transport.has_outbound_path)); + AUT_EXEC(CHECK(handshake.handle_ack_open_msg())); + AUT_EXEC(CHECK(!transport.has_inbound_path)); + AUT_EXEC(CHECK(transport.has_outbound_path)); + AUT_EXEC(CHECK(handshake.responder_handle_open_stream_msg())); + AUT_EXEC(CHECK(transport.has_inbound_path)); + AUT_EXEC(CHECK(transport.has_outbound_path)); + CHECK(log_includes({"add input slot", "add output slot", "finalized"})); + CHECK(log_excludes({"failed"})); +} + +FIXTURE_SCOPE_END() diff --git a/tests/cpp/detail/unipath_manager.cc b/tests/cpp/detail/unipath_manager.cc new file mode 100644 index 00000000..393c17ac --- /dev/null +++ b/tests/cpp/detail/unipath_manager.cc @@ -0,0 +1,183 @@ +#define SUITE detail.unipath_manager + +#include "broker/detail/unipath_manager.hh" + +#include "test.hh" + +#include "broker/alm/stream_transport.hh" +#include "broker/detail/peer_handshake.hh" + +using namespace broker; + +using fsm = detail::peer_handshake::fsm; + +namespace { + +broker::endpoint_id make_peer_id(uint8_t num) { + std::array host_id; + host_id.fill(num); + return caf::make_node_id(num, host_id); +} + +class testee_state : public alm::stream_transport { +public: + using super = stream_transport; + + static inline const char* name = "testee"; + + testee_state(caf::event_based_actor* self, endpoint_id id) : super(self) { + super::id(std::move(id)); + mgr = detail::make_peer_manager(this, this); + } + + void publish_locally(const data_message& msg) override { + buf.emplace_back(msg); + super::publish_locally(msg); + } + + std::vector shortest_path(const endpoint_id& to) { + if (auto ptr = alm::shortest_path(tbl(), to)) + return *ptr; + return {}; + } + + caf::behavior make_behavior() override { + return caf::message_handler{ + [this](atom::run) { f(*this); }, + } + .or_else(super::make_behavior()); + } + + void make_pending(endpoint_id id) { + pending_.emplace(id, mgr); + } + + bool finalize_handshake(detail::peer_manager* ptr) override { + handshake_callback_invoked = true; + return super::finalize_handshake(ptr); + } + + std::function f; + + detail::peer_manager_ptr mgr; + + std::vector buf; + + bool handshake_callback_invoked = false; +}; + +using testee_actor = caf::stateful_actor; + +struct fixture; + +struct executor { + fixture* fix; + caf::actor hdl; +}; + +struct fixture : time_aware_fixture> { + endpoint_id orig_id; + endpoint_id resp_id; + caf::actor orig_hdl; + caf::actor resp_hdl; + + fixture() { + orig_id = make_peer_id(1); + orig_hdl = sys.spawn(orig_id); + resp_id = make_peer_id(2); + resp_hdl = sys.spawn(resp_id); + REQUIRE(orig_id < resp_id); + sched.run(); + aut_exec(orig_hdl, + [this](testee_state& state) { state.make_pending(resp_id); }); + aut_exec(resp_hdl, + [this](testee_state& state) { state.make_pending(orig_id); }); + } + + ~fixture() { + anon_send_exit(orig_hdl, caf::exit_reason::user_shutdown); + anon_send_exit(resp_hdl, caf::exit_reason::user_shutdown); + } + + auto& state(const caf::actor& hdl) { + return deref(hdl).state; + } + + void aut_exec(caf::actor hdl, std::function f) { + state(hdl).f = std::move(f); + inject((atom::run), from(self).to(hdl).with(atom::run_v)); + } +}; + +template +void operator<<(executor exec, F fun) { + auto g = [f{std::move(fun)}](testee_state& state) { + f(state, state.mgr->handshake()); + }; + exec.fix->aut_exec(exec.hdl, std::move(g)); +} + +} // namespace + +#define ORIG_EXEC executor{this, orig_hdl} << [&](auto& state, auto& handshake) + +#define ORIG_CHECK(stmt) \ + ORIG_EXEC { \ + CHECK(stmt); \ + } + +#define ORIG_CHECK_EQ(lhs, rhs) \ + ORIG_EXEC { \ + CHECK_EQUAL(lhs, rhs); \ + } + +#define RESP_EXEC executor{this, resp_hdl} << [&](auto& state, auto& handshake) + +#define RESP_CHECK(stmt) \ + RESP_EXEC { \ + CHECK(stmt); \ + } + +#define RESP_CHECK_EQ(lhs, rhs) \ + RESP_EXEC { \ + CHECK_EQUAL(lhs, rhs); \ + } + +FIXTURE_SCOPE(unipath_manager_tests, fixture) + +TEST(calling start_peering on the originator twice fails the handshake) { + ORIG_CHECK_EQ(handshake.state(), fsm::init_state); + RESP_CHECK_EQ(handshake.state(), fsm::init_state); + MESSAGE("start_peering transitions to 'started' and sends an init message"); + ORIG_CHECK(handshake.originator_start_peering(resp_id, resp_hdl, {})); + ORIG_CHECK_EQ(handshake.state(), fsm::started); + ORIG_CHECK_EQ(handshake.state(), fsm::started); + MESSAGE("calling start_peering again is an error"); + ORIG_CHECK(!handshake.originator_start_peering(resp_id, resp_hdl, {})); + ORIG_CHECK_EQ(handshake.state(), fsm::fail_state); + ORIG_CHECK(handshake.failed()); +} + +TEST(the originator creates both streams in handle_open_stream_msg) { + ORIG_CHECK(handshake.originator_start_peering(resp_id, resp_hdl, {})); + ORIG_CHECK(!handshake.has_inbound_path()); + ORIG_CHECK(!handshake.has_outbound_path()); + expect((atom::peer, atom::init, endpoint_id, caf::actor), + from(orig_hdl).to(resp_hdl).with(_, _, orig_id, orig_hdl)); + expect((caf::open_stream_msg), from(resp_hdl).to(orig_hdl).with(_)); + ORIG_CHECK(handshake.has_inbound_path()); + ORIG_CHECK(handshake.has_outbound_path()); +} + +TEST(peer managers trigger callbacks on success) { + ORIG_CHECK(handshake.originator_start_peering(resp_id, resp_hdl, {})); + expect((atom::peer, atom::init, endpoint_id, caf::actor), + from(orig_hdl).to(resp_hdl).with(_, _, orig_id, orig_hdl)); + sched.run(); + ORIG_CHECK(handshake.done()); + ORIG_CHECK(state.handshake_callback_invoked); + RESP_CHECK(handshake.done()); + RESP_CHECK(state.handshake_callback_invoked); +} + +FIXTURE_SCOPE_END() diff --git a/tests/cpp/domain_options.cc b/tests/cpp/domain_options.cc new file mode 100644 index 00000000..72e92073 --- /dev/null +++ b/tests/cpp/domain_options.cc @@ -0,0 +1,28 @@ +#define SUITE domain_options + +#include "broker/domain_options.hh" + +#include "test.hh" + +#include + +using namespace broker; + +TEST(domain options can save their values in settings) { + using caf::get_or; + caf::settings xs; + domain_options opts; + opts.disable_forwarding = true; + opts.save(xs); + CHECK_EQUAL(get_or(xs, "broker.disable-forwarding", false), true); +} + +TEST(domain options can load their values from settings) { + using caf::get_or; + caf::settings xs; + caf::put(xs, "broker.disable-forwarding", true); + domain_options opts; + CHECK_EQUAL(opts.disable_forwarding, false); + opts.load(xs); + CHECK_EQUAL(opts.disable_forwarding, true); +} diff --git a/tests/cpp/gateway.cc b/tests/cpp/gateway.cc new file mode 100644 index 00000000..a351b515 --- /dev/null +++ b/tests/cpp/gateway.cc @@ -0,0 +1,240 @@ +#define SUITE gateway + +#include "broker/gateway.hh" + +#include "test.hh" + +#include "broker/alm/peer.hh" +#include "broker/alm/stream_transport.hh" + +// TODO: implement me + +// using namespace broker; +// +// namespace { +// +// // -- actor type: peer with stream transport ----------------------------------- +// +// using peer_id = endpoint_id; +// +// class peer_manager : public alm::stream_transport { +// public: +// using super = alm::stream_transport; +// +// peer_manager(caf::event_based_actor* self) : super(self) { +// // nop +// } +// +// auto hdl() noexcept { +// return caf::actor_cast(self()); +// } +// +// using super::ship_locally; +// +// void ship_locally(const data_message& msg) { +// buf.emplace_back(msg); +// super::ship_locally(msg); +// } +// +// std::vector shortest_path(const peer_id& to) { +// if (auto ptr = alm::shortest_path(tbl(), to)) +// return *ptr; +// return {}; +// } +// +// std::vector buf; +// }; +// +// struct peer_actor_state { +// caf::intrusive_ptr mgr; +// static inline const char* name = "peer"; +// }; +// +// using peer_actor_type = caf::stateful_actor; +// +// caf::behavior peer_actor(peer_actor_type* self, peer_id id) { +// auto& mgr = self->state.mgr; +// mgr = caf::make_counted(self); +// mgr->id(std::move(id)); +// return mgr->make_behavior(); +// } +// +// // -- fixture ------------------------------------------------------------------ +// +// +// // This fixture configures the following topology: +// // +// // +// // internal external +// // <-------------------------> <-------------------------> +// // +// // +// // +---+ +---+ +---+ +// // +-----+ D +---+ +---+ H +-----------+ K | +// // | +---+ | | +---+ +---+ +// // +-+-+ | | +// // +-----+ B | | +-------+ | +// // | +---+ +--+ +--+ +// // +-+-+ +---+ | | +---+ +// // | A | | E +------+ G +------+ I | +// // +-+-+ +---+ | | +---+ +// // | +---+ +--+ +--+ +// // +-----+ C | | +-------+ | +// // +-+-+ | | +// // | +---+ | | +---+ +---+ +// // +-----+ F +---+ +---+ J +-----------+ L | +// // +---+ +---+ +---+ +// // +// +// #define PEER_ID(var, num) peer_id var = make_peer_id(num) +// #define PEER_EXPAND(var) std::make_pair(std::string{#var}, var) +// +// struct fixture : test_coordinator_fixture<> { +// static endpoint_id make_peer_id(uint8_t num) { +// std::array host_id; +// host_id.fill(num); +// return caf::make_node_id(num, host_id); +// } +// +// PEER_ID(A, 1); +// PEER_ID(B, 2); +// PEER_ID(C, 3); +// PEER_ID(D, 4); +// PEER_ID(E, 5); +// PEER_ID(F, 6); +// PEER_ID(G, 7); +// PEER_ID(H, 8); +// PEER_ID(I, 9); +// PEER_ID(J, 10); +// PEER_ID(K, 11); +// PEER_ID(L, 12); +// +// fixture() { +// peers["internal"] = sys.spawn(peer_actor, G); +// peers["external"] = sys.spawn(peer_actor, G); +// gateway::setup(peers["internal"], peers["external"]); +// // Note: skips G on purpose. This ID is used by `internal` and `external`. +// std::vector> cfg{ +// PEER_EXPAND(A), PEER_EXPAND(B), PEER_EXPAND(C), PEER_EXPAND(D), +// PEER_EXPAND(E), PEER_EXPAND(F), PEER_EXPAND(H), PEER_EXPAND(I), +// PEER_EXPAND(J), PEER_EXPAND(K), PEER_EXPAND(L), +// }; +// for (const auto& [name, id] : cfg) { +// names[id] = name; +// peers[name] = sys.spawn(peer_actor, id); +// } +// run(); +// } +// +// ~fixture() { +// for (auto& kvp : peers) +// anon_send_exit(kvp.second, caf::exit_reason::kill); +// } +// +// auto& get(const caf::actor& hdl) { +// return *deref(hdl).state.mgr; +// } +// +// auto& get(const endpoint_id& id) { +// return get(peers[names[id]]); +// } +// +// template +// void for_each_peer(Fun fun) { +// for (const auto& id : {A, B, C, D, E, F, H, I, J, K, L}) +// fun(get(id)); +// run(); +// } +// +// void connect_peers() { +// std::map> connections{ +// {"A", {"B", "C"}}, +// {"B", {"A", "D"}}, +// {"C", {"A", "F"}}, +// {"D", {"B", "internal"}}, +// {"E", {"internal"}}, +// {"F", {"C", "internal"}}, +// {"H", {"K", "external"}}, +// {"I", {"external"}}, +// {"J", {"L", "external"}}, +// {"K", {"H"}}, +// {"L", {"J"}}, +// }; +// auto link_id = [this](const std::string& name) { +// return get(peers[name]).id(); +// }; +// for (auto& [id, links] : connections) +// for (auto& link : links) +// anon_send(peers[id], atom::peer_v, link_id(link), peers[link]); +// run(); +// } +// +// std::map names; +// std::map peers; +// }; +// +// } // namespace +// +// #define CHECK_DISTANCE(src, dst, val) \ +// CHECK_EQUAL(alm::distance_to(get(src).tbl(), dst), optional{val}) +// +// FIXTURE_SCOPE(gateway_tests, fixture) +// +// TEST(gateways separate internal and external domain) { +// connect_peers(); +// MESSAGE("peer A only sees peers from the internal domain"); +// CHECK_DISTANCE(A, B, 1); +// CHECK_DISTANCE(A, C, 1); +// CHECK_DISTANCE(A, D, 2); +// CHECK_DISTANCE(A, E, 4); +// CHECK_DISTANCE(A, F, 2); +// CHECK_DISTANCE(A, G, 3); +// CHECK_DISTANCE(A, H, nil); +// CHECK_DISTANCE(A, I, nil); +// CHECK_DISTANCE(A, J, nil); +// CHECK_DISTANCE(A, K, nil); +// CHECK_DISTANCE(A, L, nil); +// MESSAGE("peer I only sees peers from the external domain"); +// CHECK_DISTANCE(I, A, nil); +// CHECK_DISTANCE(I, B, nil); +// CHECK_DISTANCE(I, C, nil); +// CHECK_DISTANCE(I, D, nil); +// CHECK_DISTANCE(I, E, nil); +// CHECK_DISTANCE(I, F, nil); +// CHECK_DISTANCE(I, G, 1); +// CHECK_DISTANCE(I, H, 2); +// CHECK_DISTANCE(I, J, 2); +// CHECK_DISTANCE(I, K, 3); +// CHECK_DISTANCE(I, L, 3); +// } +// +// TEST(gateways forward messages between the domains) { +// for_each_peer([](auto& state) { state.subscribe({"foo", "bar"}); }); +// connect_peers(); +// MESSAGE("publish to 'foo' on A"); +// anon_send(peers["A"], atom::publish_v, make_data_message("foo", 42)); +// run(); +// MESSAGE("publish to 'bar' on I"); +// anon_send(peers["I"], atom::publish_v, make_data_message("bar", 23)); +// run(); +// MESSAGE("all peers must have received messages from both domains"); +// using log_t = std::vector; +// log_t log{make_data_message("foo", 42), make_data_message("bar", 23)}; +// CHECK_EQUAL(get(A).buf, log_t{make_data_message("bar", 23)}); +// CHECK_EQUAL(get(B).buf, log); +// CHECK_EQUAL(get(C).buf, log); +// CHECK_EQUAL(get(D).buf, log); +// CHECK_EQUAL(get(E).buf, log); +// CHECK_EQUAL(get(F).buf, log); +// CHECK_EQUAL(get(H).buf, log); +// CHECK_EQUAL(get(I).buf, log_t{make_data_message("foo", 42)}); +// CHECK_EQUAL(get(J).buf, log); +// CHECK_EQUAL(get(K).buf, log); +// CHECK_EQUAL(get(L).buf, log); +// } +// +// FIXTURE_SCOPE_END() + +TEST(todo) { + MESSAGE("implement me"); +} diff --git a/tests/cpp/integration.cc b/tests/cpp/integration.cc index 1c8b6064..73fcfb88 100644 --- a/tests/cpp/integration.cc +++ b/tests/cpp/integration.cc @@ -35,6 +35,7 @@ #include #include "broker/configuration.hh" +#include "broker/core_actor.hh" #include "broker/endpoint.hh" #include "broker/error.hh" #include "broker/peer_info.hh" @@ -53,19 +54,6 @@ using caf::io::connection_handle; namespace { -configuration make_config() { - broker_options options; - options.disable_ssl = true; - configuration cfg(options); - if (auto err = cfg.parse(caf::test::engine::argc(), - caf::test::engine::argv())) - CAF_FAIL("parsing the config failed: " << to_string(err)); - cfg.set("caf.middleman.network-backend", "testing"); - cfg.set("caf.scheduler.policy", "testing"); - cfg.set("caf.logger.inline-output", true); - return cfg; -} - struct peer_fixture; // Holds state shared by all peers. There exists exactly one global fixture. @@ -126,11 +114,17 @@ struct peer_fixture { // Stores the interval between two credit rounds. caf::timespan credit_round_interval; + // Returns the core manager for given core actor. + auto& state(caf::actor hdl) { + auto ptr = caf::actor_cast(hdl); + return dynamic_cast(*ptr).state; + } + // Initializes this peer and registers it at parent. peer_fixture(global_fixture* parent_ptr, std::string peer_name) : parent(parent_ptr), name(std::move(peer_name)), - ep(make_config()), + ep(base_fixture::make_config()), sys(ep.system()), sched(dynamic_cast(sys.scheduler())), mm(sys.middleman()), @@ -139,6 +133,10 @@ struct peer_fixture { parent->peers.emplace(name, this); // Run initialization code exec_loop(); + // Give the core actor a recognizable ID. + state(ep.core()).id(caf::make_node_id(unbox(caf::make_uri("test:" + name)))); + // Register at parent. + parent->peers.emplace(name, this); } ~peer_fixture() { @@ -241,15 +239,25 @@ bool global_fixture::try_exec() { } void global_fixture::exec_loop() { - /* - while (try_exec()) + auto try_trigger_timeout = [this] { + std::vector ts; + for (auto& kvp : peers) { + auto& tac = kvp.second->sched.clock(); + if (!tac.schedule().empty()) + ts.emplace_back(tac.schedule().begin()->first - tac.now()); + } + if (!ts.empty()) { + auto dt = std::min_element(ts.begin(), ts.end()); + for (auto& kvp : peers) + kvp.second->sched.clock().advance_time(*dt); + return true; + } else { + return false; + } + }; + auto exec = [](auto& kvp) { return kvp.second->try_exec(); }; + while (std::any_of(peers.begin(), peers.end(), exec) || try_trigger_timeout()) ; // rinse and repeat - // */ - std::vector xs; - for (auto& kvp : peers) - xs.emplace_back(kvp.second); - exec_all_fixtures(xs.begin(), xs.end()); - // */ } // A fixture for simple setups consisting of three nodes. @@ -269,6 +277,7 @@ struct triangle_fixture : global_fixture { base_fixture::deinit_socket_api(); } + // Connect mercury to venus and earth. void connect_peers() { MESSAGE("prepare connections"); auto server_handle = mercury.make_accept_handle(); @@ -313,12 +322,12 @@ CAF_TEST(topic_prefix_matching_async_subscribe) { CAF_REQUIRE_EQUAL(mercury_peers.size(), 2u); CAF_CHECK_EQUAL(mercury_peers.front().status, peer_status::peered); CAF_CHECK_EQUAL(mercury_peers.back().status, peer_status::peered); - MESSAGE("assume one peer for venus"); + MESSAGE("assume two peers for venus"); venus.loop_after_next_enqueue(); auto venus_peers = venus.ep.peers(); CAF_REQUIRE_EQUAL(venus_peers.size(), 1u); CAF_CHECK_EQUAL(venus_peers.front().status, peer_status::peered); - MESSAGE("assume one peer for earth"); + MESSAGE("assume two peers for earth"); earth.loop_after_next_enqueue(); auto earth_peers = earth.ep.peers(); CAF_REQUIRE_EQUAL(earth_peers.size(), 1u); @@ -326,29 +335,28 @@ CAF_TEST(topic_prefix_matching_async_subscribe) { MESSAGE("subscribe to 'zeek/events' on venus"); venus.subscribe_to("zeek/events"); MESSAGE("subscribe to 'zeek/events/failures' on earth"); - earth.subscribe_to("zeek/events/failures"); + earth.subscribe_to("zeek/events/errors"); MESSAGE("verify subscriptions"); - auto filter = [](std::initializer_list xs) -> std::vector { - return xs; - }; mercury.loop_after_next_enqueue(); CAF_CHECK_EQUAL(mercury.ep.peer_subscriptions(), - filter({"zeek/events", "zeek/events/failures"})); + filter_type({"zeek/events"})); venus.loop_after_next_enqueue(); - CAF_CHECK_EQUAL(venus.ep.peer_subscriptions(), filter({})); + CAF_CHECK_EQUAL(venus.ep.peer_subscriptions(), + filter_type({"zeek/events/errors"})); earth.loop_after_next_enqueue(); - CAF_CHECK_EQUAL(earth.ep.peer_subscriptions(), filter({})); - MESSAGE("publish to 'zeek/events/(logging|failures)' on mercury"); - mercury.publish("zeek/events/failures", "oops", "sorry!"); - mercury.publish("zeek/events/logging", 123, 456); + CAF_CHECK_EQUAL(earth.ep.peer_subscriptions(), + filter_type({"zeek/events"})); + MESSAGE("publish to 'zeek/events/(data|errors)' on mercury"); + mercury.publish("zeek/events/errors", "oops", "sorry!"); + mercury.publish("zeek/events/data", 123, 456); MESSAGE("verify published data"); CAF_CHECK_EQUAL(mercury.data, data_msgs({})); - CAF_CHECK_EQUAL(venus.data, data_msgs({{"zeek/events/failures", "oops"}, - {"zeek/events/failures", "sorry!"}, - {"zeek/events/logging", 123}, - {"zeek/events/logging", 456}})); - CAF_CHECK_EQUAL(earth.data, data_msgs({{"zeek/events/failures", "oops"}, - {"zeek/events/failures", "sorry!"}})); + CAF_CHECK_EQUAL(venus.data, data_msgs({{"zeek/events/errors", "oops"}, + {"zeek/events/errors", "sorry!"}, + {"zeek/events/data", 123}, + {"zeek/events/data", 456}})); + CAF_CHECK_EQUAL(earth.data, data_msgs({{"zeek/events/errors", "oops"}, + {"zeek/events/errors", "sorry!"}})); venus.loop_after_next_enqueue(); venus.ep.unpeer("mercury", 4040); earth.loop_after_next_enqueue(); @@ -365,12 +373,12 @@ CAF_TEST(topic_prefix_matching_make_subscriber) { CAF_REQUIRE_EQUAL(mercury_peers.size(), 2u); CAF_CHECK_EQUAL(mercury_peers.front().status, peer_status::peered); CAF_CHECK_EQUAL(mercury_peers.back().status, peer_status::peered); - MESSAGE("assume one peer for venus"); + MESSAGE("assume two peers for venus"); venus.loop_after_next_enqueue(); auto venus_peers = venus.ep.peers(); CAF_REQUIRE_EQUAL(venus_peers.size(), 1u); CAF_CHECK_EQUAL(venus_peers.front().status, peer_status::peered); - MESSAGE("assume one peer for earth"); + MESSAGE("assume two peers for earth"); earth.loop_after_next_enqueue(); auto earth_peers = earth.ep.peers(); CAF_REQUIRE_EQUAL(earth_peers.size(), 1u); @@ -381,43 +389,41 @@ CAF_TEST(topic_prefix_matching_make_subscriber) { venus_s1.set_rate_calculation(false); venus_s2.set_rate_calculation(false); exec_loop(); - MESSAGE("subscribe to 'zeek/events/failures' on earth"); - auto earth_s1 = earth.ep.make_subscriber({"zeek/events/failures"}); - auto earth_s2 = earth.ep.make_subscriber({"zeek/events/failures"}); + MESSAGE("subscribe to 'zeek/events/errors' on earth"); + auto earth_s1 = earth.ep.make_subscriber({"zeek/events/errors"}); + auto earth_s2 = earth.ep.make_subscriber({"zeek/events/errors"}); earth_s1.set_rate_calculation(false); earth_s2.set_rate_calculation(false); exec_loop(); MESSAGE("verify subscriptions"); - auto filter = [](std::initializer_list xs) -> std::vector { - return xs; - }; mercury.loop_after_next_enqueue(); CAF_CHECK_EQUAL(mercury.ep.peer_subscriptions(), - filter({"zeek/events", "zeek/events/failures"})); + filter_type({"zeek/events"})); venus.loop_after_next_enqueue(); - CAF_CHECK_EQUAL(venus.ep.peer_subscriptions(), filter({})); + CAF_CHECK_EQUAL(venus.ep.peer_subscriptions(), + filter_type({"zeek/events/errors"})); earth.loop_after_next_enqueue(); - CAF_CHECK_EQUAL(earth.ep.peer_subscriptions(), filter({})); - MESSAGE("publish to 'zeek/events/(logging|failures)' on mercury"); - mercury.publish("zeek/events/failures", "oops", "sorry!"); - mercury.publish("zeek/events/logging", 123, 456); + CAF_CHECK_EQUAL(earth.ep.peer_subscriptions(), filter_type({"zeek/events"})); + MESSAGE("publish to 'zeek/events/(data|errors)' on mercury"); + mercury.publish("zeek/events/errors", "oops", "sorry!"); + mercury.publish("zeek/events/data", 123, 456); MESSAGE("verify published data"); CAF_CHECK_EQUAL(venus_s1.poll(), - data_msgs({{"zeek/events/failures", "oops"}, - {"zeek/events/failures", "sorry!"}, - {"zeek/events/logging", 123}, - {"zeek/events/logging", 456}})); + data_msgs({{"zeek/events/errors", "oops"}, + {"zeek/events/errors", "sorry!"}, + {"zeek/events/data", 123}, + {"zeek/events/data", 456}})); CAF_CHECK_EQUAL(venus_s2.poll(), - data_msgs({{"zeek/events/failures", "oops"}, - {"zeek/events/failures", "sorry!"}, - {"zeek/events/logging", 123}, - {"zeek/events/logging", 456}})); + data_msgs({{"zeek/events/errors", "oops"}, + {"zeek/events/errors", "sorry!"}, + {"zeek/events/data", 123}, + {"zeek/events/data", 456}})); CAF_CHECK_EQUAL(earth_s1.poll(), - data_msgs({{"zeek/events/failures", "oops"}, - {"zeek/events/failures", "sorry!"}})); + data_msgs({{"zeek/events/errors", "oops"}, + {"zeek/events/errors", "sorry!"}})); CAF_CHECK_EQUAL(earth_s2.poll(), - data_msgs({{"zeek/events/failures", "oops"}, - {"zeek/events/failures", "sorry!"}})); + data_msgs({{"zeek/events/errors", "oops"}, + {"zeek/events/errors", "sorry!"}})); exec_loop(); venus.loop_after_next_enqueue(); venus.ep.unpeer("mercury", 4040); @@ -462,11 +468,24 @@ std::vector event_log(std::initializer_list xs) { return {xs}; } -std::vector event_log(const std::vector& xs) { +std::vector event_log(const std::vector& xs, + bool make_unique = false) { + // For the purpose of this test, we only care about the peer_* statuses. + auto predicate = [](const auto& x) { + if constexpr (std::is_same, status>::value) { + auto c = x.code(); + return c == sc::peer_added || c == sc::peer_removed || c == sc::peer_lost; + } else { + return true; + } + }; std::vector ys; ys.reserve(xs.size()); for (auto& x : xs) - ys.emplace_back(x); + if (caf::visit(predicate, x)) + ys.emplace_back(x); + if (make_unique) + ys.erase(std::unique(ys.begin(), ys.end()), ys.end()); return ys; } @@ -539,7 +558,7 @@ CAF_TEST(connection_retry) { venus.ep.peer_nosync("mercury", 4040, std::chrono::seconds(1)); MESSAGE("spawn helper that starts listening on mercury:4040 eventually"); mercury.sys.spawn([&](caf::event_based_actor* self) -> caf::behavior { - self->delayed_send(self, std::chrono::seconds(2), broker::atom::ok_v); + self->delayed_send(self, std::chrono::seconds(2), caf::ok_atom_v); return { [&](caf::ok_atom) { MESSAGE("start listening on mercury:4040"); @@ -561,7 +580,7 @@ CAF_TEST(connection_retry) { exec_loop(); MESSAGE("check event logs"); CAF_CHECK_EQUAL(event_log(mercury_es.poll()), event_log({sc::peer_added})); - CAF_CHECK_EQUAL(event_log(venus_es.poll()), + CAF_CHECK_EQUAL(event_log(venus_es.poll(), true), event_log({ec::peer_unavailable, sc::peer_added})); MESSAGE("disconnect venus from mercury"); venus.loop_after_next_enqueue(); diff --git a/tests/cpp/master.cc b/tests/cpp/master.cc index 137ce46f..a2c9fd3c 100644 --- a/tests/cpp/master.cc +++ b/tests/cpp/master.cc @@ -218,8 +218,8 @@ TEST(master_with_clone) { CHECK_EQUAL(value_of(ds_earth.get("test")), data{123}); // --- phase 5: peer from earth to mars -------------------------------------- auto foo_master = "foo" / topics::master_suffix; -// Initiate handshake between core1 and core2. - earth.self->send(core1, atom::peer_v, core2_proxy); + // Initiate handshake between core1 and core2. + earth.self->send(core1, atom::peer_v, core2_proxy.node(), core2_proxy); run(tick_interval); // --- phase 6: attach a clone on mars --------------------------------------- mars.sched.inline_next_enqueue(); @@ -236,8 +236,16 @@ TEST(master_with_clone) { expect_on(mars, (atom::local, internal_command), from(_).to(ds_mars.frontend())); auto run_until_idle = [&] { - // Calls idle() on masters and clones after switching to the ALM backend. - run(tick_interval); + auto idle = [&] { + return earth.deref(ms_earth).state.idle() + && mars.deref(cl_mars).state.idle(); + }; + size_t iteration = 0; + do { + if (++iteration == 100) + FAIL("system reached no idle state within 100 ticks"); + run(tick_interval); + } while (!idle()); }; run_until_idle(); MESSAGE("once clone and master are idle, they are in sync"); diff --git a/tests/cpp/mixin/connector.cc b/tests/cpp/mixin/connector.cc new file mode 100644 index 00000000..7d8b3816 --- /dev/null +++ b/tests/cpp/mixin/connector.cc @@ -0,0 +1,147 @@ +#define SUITE mixin.connector + +#include "broker/mixin/connector.hh" + +#include "test.hh" + +#include + +using namespace std::string_literals; + +using namespace broker; + +namespace { + +using str_set = std::set; + +struct connector_mock_base { + connector_mock_base(caf::event_based_actor* self) : self_(self) { + // nop + } + + virtual ~connector_mock_base() { + // nop + } + + auto self() { + return self_; + } + + auto id() { + return self_->node(); + } + + void start_peering(const endpoint_id& remote_id, const caf::actor& hdl, + caf::response_promise promise) { + promise.deliver(remote_id, hdl); + } + + template + caf::behavior make_behavior(Fs... fs) { + return {std::move(fs)...}; + } + + template + bool dispatch_to(Ts&&...) { + CAF_FAIL("connector_mock_base::" << __func__ << " called"); + return false; + } + + template + void unpeer(Ts&&...) { + CAF_FAIL("connector_mock_base::" << __func__ << " called"); + } + + template + void cannot_remove_peer(Ts&&...) { + CAF_FAIL("connector_mock_base::" << __func__ << " called"); + } + + template + void peer_unavailable(const Ts&...) { + // nop + } + + virtual void peer_disconnected(const endpoint_id&, const caf::actor&, + const error&) { + // nop + } + + virtual void peer_removed(const endpoint_id&, const caf::actor&) { + // nop + } + + virtual caf::behavior make_behavior() { + return {}; + } + + caf::event_based_actor* self_; +}; + +caf::behavior dummy_peer() { + return { + [](atom::get, atom::id) { return "dummy"s; }, + }; +} + +struct dummy_mm_state { + size_t i = 0; + static inline const char* name = "dummy-mm"; +}; + +caf::behavior dummy_mm(caf::stateful_actor* self, size_t n, + caf::actor peer) { + return { + [=](caf::connect_atom, const std::string&, uint16_t) -> caf::message { + if (++self->state.i >= n) + return caf::make_message(peer.node(), + caf::actor_cast(peer), + str_set{}); + return caf::make_message(make_error(caf::sec::cannot_connect_to_node)); + }, + }; +} + +struct aut_state : mixin::connector { + using super = mixin::connector; + + aut_state(caf::event_based_actor* self) : super(self) { + // nop + } +}; + +using aut_type = caf::stateful_actor; + +caf::behavior aut_impl(aut_type* self) { + return self->state.make_behavior(); +} + +struct fixture : test_coordinator_fixture<> { + fixture() { + aut = sys.spawn(aut_impl); + peer = sys.spawn(dummy_peer); + } + + void set_mm(size_t tries_before_success) { + mm = sys.spawn(dummy_mm, tries_before_success, peer); + deref(aut).state.cache().mm(mm); + } + + caf::actor aut; + caf::actor mm; + caf::actor peer; +}; + +} // namespace + +FIXTURE_SCOPE(connector_tests, fixture) + +TEST(the connector asks the middleman for actor handles) { + set_mm(0); + self->send(aut, atom::peer_v, network_info{"localhost", 8080}); + expect((atom::peer, network_info), from(self).to(aut)); + expect((atom::connect, std::string, uint16_t), from(aut).to(mm)); + expect((endpoint_id, caf::strong_actor_ptr, str_set), from(mm).to(aut)); +} + +FIXTURE_SCOPE_END() diff --git a/tests/cpp/mixin/data_store_manager.cc b/tests/cpp/mixin/data_store_manager.cc new file mode 100644 index 00000000..98f2ca04 --- /dev/null +++ b/tests/cpp/mixin/data_store_manager.cc @@ -0,0 +1,176 @@ +#define SUITE mixin.data_store_manager + +#include "broker/mixin/data_store_manager.hh" + +#include "test.hh" + +#include + +#include "broker/alm/peer.hh" +#include "broker/alm/stream_transport.hh" + +// TODO: implement me + +// using broker::alm::peer; +// using broker::alm::stream_transport; +// +// using namespace broker; +// +// namespace { +// +// using peer_id = endpoint_id; +// +// using message_type = generic_node_message; +// +// using clone_actor_type = caf::stateful_actor; +// +// endpoint_id operator""_e(const char* cstr, size_t len) { +// auto res = caf::make_uri(caf::string_view{cstr, len}); +// return caf::make_node_id(unbox(res)); +// } +// +// class peer_manager : public mixin::data_store_manager { +// public: +// using super = mixin::data_store_manager; +// +// peer_manager(endpoint::clock* clock, caf::event_based_actor* self) +// : super(self, clock) { +// // nop +// } +// +// using super::ship_locally; +// +// void ship_locally(const data_message& msg) { +// buf.emplace_back(msg); +// super::ship_locally(msg); +// } +// +// std::vector buf; +// }; +// +// struct peer_actor_state { +// caf::intrusive_ptr mgr; +// }; +// +// using peer_actor_type = caf::stateful_actor; +// +// caf::behavior peer_actor(peer_actor_type* self, endpoint::clock* clock, +// peer_id id) { +// auto& mgr = self->state.mgr; +// mgr = caf::make_counted(clock, self); +// mgr->id(std::move(id)); +// return mgr->make_behavior(); +// } +// +// struct fixture : test_coordinator_fixture<> { +// using peer_ids = std::vector; +// +// endpoint_id A; +// +// endpoint_id B; +// +// auto& get(const peer_id& id) { +// return *deref(peers[id]).state.mgr; +// } +// +// fixture() : clock(&sys, true) { +// A = "node:a"_e; +// B = "node:b"_e; +// for (auto& id : peer_ids{A, B}) +// peers[id] = sys.spawn(peer_actor, &clock, id); +// anon_send(peers[A], atom::peer_v, peer_id{B}, peers[B]); +// run(); +// BROKER_ASSERT(get(A).connected_to(peers[B])); +// } +// +// ~fixture() { +// for (auto& kvp : peers) +// anon_send_exit(kvp.second, caf::exit_reason::user_shutdown); +// run(); +// } +// +// endpoint::clock clock; +// +// std::map peers; +// }; +// +// } // namespace +// +// FIXTURE_SCOPE(data_store_manager_tests, fixture) +// +// TEST(peers propagate new masters) { +// auto res = self->request(peers[A], caf::infinite, atom::store_v, +// atom::master_v, atom::attach_v, "kono", +// backend::memory, backend_options({{"foo", 4.2}})); +// consume_messages(); +// MESSAGE("data store managers respond with the actor handle of the master"); +// caf::actor kono_master; +// res.receive( +// [&](const caf::actor& hdl) { +// auto i = get(A).masters().find("kono"); +// REQUIRE_NOT_EQUAL(i, get(A).masters().end()); +// CHECK_EQUAL(i->second, hdl); +// kono_master = hdl; +// }, +// [&](const caf::error& err) { FAIL(err); }); +// MESSAGE("repeated attach messages return the original actor handle"); +// inject((atom::store, atom::master, atom::attach, std::string, backend, +// backend_options), +// from(self).to(peers[A]).with(atom::store_v, atom::master_v, +// atom::attach_v, "kono", backend::memory, +// backend_options({{"foo", 4.2}}))); +// expect((caf::actor), from(peers[A]).to(self).with(kono_master)); +// MESSAGE("data store managers respond to get messages"); +// inject((atom::store, atom::master, atom::get, std::string), +// from(self).to(peers[A]).with(atom::store_v, atom::master_v, +// atom::get_v, "kono")); +// expect((caf::actor), from(peers[A]).to(self).with(kono_master)); +// MESSAGE("only node A stores a handle to the master"); +// CHECK_EQUAL(get(A).masters().count("kono"), 1u); +// CHECK_EQUAL(get(B).masters().count("kono"), 0u); +// CHECK_EQUAL(get(A).clones().count("kono"), 0u); +// CHECK_EQUAL(get(B).clones().count("kono"), 0u); +// MESSAGE("node B has access to the remote master"); +// CHECK_EQUAL(get(A).has_remote_master("kono"), false); +// CHECK_EQUAL(get(B).has_remote_master("kono"), true); +// } +// +// TEST(clones wait for remote masters to appear) { +// auto res +// = self->request(peers[B], caf::infinite, atom::store_v, atom::clone_v, +// atom::attach_v, "kono", 1.0, 1.0, 1.0); +// consume_messages(); +// caf::actor clone; +// res.receive( +// [&](const caf::actor& hdl) { +// clone = hdl; +// REQUIRE(clone); +// CHECK_EQUAL(deref(clone).state.input.producer(), +// entity_id::nil()); +// }, +// [&](const caf::error& err) { CHECK(err == ec::no_such_master); }); +// MESSAGE("initially, no master exists and the clone waits for one to appear"); +// CHECK_EQUAL(get(A).masters().count("kono"), 0u); +// CHECK_EQUAL(get(B).masters().count("kono"), 0u); +// CHECK_EQUAL(get(A).clones().count("kono"), 0u); +// CHECK_EQUAL(get(B).clones().count("kono"), 1u); +// CHECK_EQUAL(get(A).has_remote_master("kono"), false); +// CHECK_EQUAL(get(B).has_remote_master("kono"), false); +// MESSAGE("after spawning a master, the clone attaches to it eventually"); +// caf::anon_send(peers[A], atom::store_v, atom::master_v, atom::attach_v, +// "kono", backend::memory, backend_options({{"foo", 4.2}})); +// consume_messages(); +// auto& state = deref(clone).state; +// for (size_t round = 0; round < 100 && !state.has_master(); ++round) { +// trigger_timeouts(); +// consume_messages(); +// } +// CHECK(state.has_master()); +// CHECK_NOT_EQUAL(state.input.producer(), entity_id::nil()); +// } +// +// FIXTURE_SCOPE_END() + +TEST(todo) { + MESSAGE("implement me"); +} diff --git a/tests/cpp/mixin/notifier.cc b/tests/cpp/mixin/notifier.cc new file mode 100644 index 00000000..9700d32a --- /dev/null +++ b/tests/cpp/mixin/notifier.cc @@ -0,0 +1,186 @@ +#define SUITE mixin.notifier + +#include "broker/mixin/notifier.hh" + +#include "test.hh" + +#include "broker/alm/peer.hh" +#include "broker/alm/stream_transport.hh" + +using broker::alm::peer; +using broker::alm::stream_transport; + +// TODO: implement me + +// using namespace broker; +// +// namespace { +// +// using peer_id = caf::node_id; +// +// struct dummy_cache { +// template +// void fetch(const caf::actor&, OnSuccess f, OnError g) { +// if (enabled) +// f(network_info{"localhost", 8080}); +// else +// g(make_error(caf::sec::cannot_connect_to_node)); +// } +// +// optional find(const caf::actor&) { +// if (enabled) +// return network_info{"localhost", 8080}; +// return nil; +// } +// +// bool enabled = true; +// }; +// +// template +// class cache_holder : public Base { +// public: +// template +// explicit cache_holder(Ts&&... xs) : Base(std::forward(xs)...) { +// // nop +// } +// +// auto& cache() noexcept { +// return cache_; +// } +// +// private: +// dummy_cache cache_; +// }; +// +// class stream_peer_manager +// : public mixin::notifier> { +// public: +// using super = extended_base; +// +// stream_peer_manager(caf::event_based_actor* self) : super(self) { +// // nop +// } +// +// using super::ship_locally; +// +// void ship_locally(const data_message& msg) { +// if (is_internal(get_topic(msg))) { +// if (auto status = status_view::make(get_data(msg))) +// log.emplace_back(to_string(status.code())); +// else if (auto err = error_view::make(get_data(msg))) +// log.emplace_back(to_string(err.code())); +// } +// super::ship_locally(msg); +// } +// +// std::vector log; +// }; +// +// struct stream_peer_actor_state { +// static inline const char* name = "stream_peer_actor"; +// caf::intrusive_ptr mgr; +// bool connected_to(const caf::actor& hdl) const noexcept { +// return mgr->connected_to(hdl); +// } +// const auto& log() const noexcept { +// return mgr->log; +// } +// }; +// +// using stream_peer_actor_type = caf::stateful_actor; +// +// caf::behavior stream_peer_actor(stream_peer_actor_type* self, caf::node_id id) { +// auto& mgr = self->state.mgr; +// mgr = caf::make_counted(self); +// mgr->id(std::move(id)); +// return mgr->make_behavior(); +// } +// +// struct subscriber_state { +// static inline const char* name = "subscriber"; +// std::vector log; +// }; +// +// using subscriber_type = caf::stateful_actor; +// +// caf::behavior subscriber(subscriber_type* self, caf::actor aut) { +// return { +// [=](atom::local, status& x) { +// if (self->current_sender() == aut) +// self->state.log.emplace_back(to_string(x.code())); +// }, +// }; +// } +// +// struct fixture : test_coordinator_fixture<> { +// using peer_ids = std::vector; +// +// auto make_id(caf::string_view str) { +// return caf::make_node_id(unbox(caf::make_uri(str))); +// } +// +// fixture() { +// id_a = make_id("test:a"); +// id_b = make_id("test:b"); +// for (auto& id : peer_ids{id_a, id_b}) +// peers[id] = sys.spawn(stream_peer_actor, id); +// run(); +// } +// +// ~fixture() { +// for (auto& kvp : peers) +// anon_send_exit(kvp.second, caf::exit_reason::user_shutdown); +// anon_send_exit(logger, caf::exit_reason::user_shutdown); +// } +// +// auto& get(const peer_id& id) { +// return deref(peers[id]).state; +// } +// +// auto& log(const peer_id& id) { +// return get(id).log(); +// } +// +// template +// std::vector make_log(Ts&&... xs) { +// return {std::forward(xs)...}; +// } +// +// peer_id id_a; +// +// peer_id id_b; +// +// std::map peers; +// +// caf::actor logger; +// }; +// +// } // namespace +// +// FIXTURE_SCOPE(notifier_tests, fixture) +// +// TEST(connect and graceful disconnect emits peer_added and peer_lost) { +// anon_send(peers[id_a], atom::peer_v, id_b, peers[id_b]); +// run(); +// CHECK_EQUAL(log(id_a), make_log("endpoint_discovered", "peer_added")); +// anon_send_exit(peers[id_b], caf::exit_reason::user_shutdown); +// run(); +// CHECK_EQUAL(log(id_a), make_log("endpoint_discovered", "peer_added", +// "peer_lost", "endpoint_unreachable")); +// } +// +// TEST(connect and ungraceful disconnect emits peer_added and peer_lost) { +// anon_send(peers[id_a], atom::peer_v, id_b, peers[id_b]); +// run(); +// CHECK_EQUAL(log(id_a), make_log("endpoint_discovered", "peer_added")); +// anon_send_exit(peers[id_b], caf::exit_reason::kill); +// run(); +// CHECK_EQUAL(log(id_a), make_log("endpoint_discovered", "peer_added", +// "peer_lost", "endpoint_unreachable")); +// } +// +// FIXTURE_SCOPE_END() + +TEST(todo) { + MESSAGE("implement me"); +} diff --git a/tests/cpp/publisher.cc b/tests/cpp/publisher.cc index ded7d4b4..4c82eefe 100644 --- a/tests/cpp/publisher.cc +++ b/tests/cpp/publisher.cc @@ -41,12 +41,15 @@ using stream_type = stream; namespace { +using buf_type = std::vector; + struct consumer_state { - std::vector xs; + buf_type xs; }; -behavior consumer(stateful_actor* self, - filter_type ts, const actor& src) { +using consumer_actor = stateful_actor; + +behavior consumer(consumer_actor* self, filter_type ts, const actor& src) { self->send(self * src, atom::join_v, std::move(ts)); return { [=](const stream_type& in) { @@ -68,93 +71,104 @@ behavior consumer(stateful_actor* self, } ); }, - [=](atom::get) { return self->state.xs; }, }; } +struct fixture : base_fixture { + // Returns the core manager for given actor. + auto& state(caf::actor hdl) { + return deref(hdl).state; + } + + fixture() { + core1 = ep.core(); + core2 = sys.spawn(filter_type{"z"}); + anon_send(core1, atom::no_events_v); + anon_send(core2, atom::no_events_v); + run(); + core1_id = caf::make_node_id(unbox(caf::make_uri("test:core1"))); + core2_id = caf::make_node_id(unbox(caf::make_uri("test:core2"))); + state(core1).id(core1_id); + state(core2).id(core2_id); + } + + ~fixture() { + anon_send_exit(core1, exit_reason::user_shutdown); + anon_send_exit(core2, exit_reason::user_shutdown); + } + + caf::node_id core1_id; + caf::node_id core2_id; + caf::actor core1; + caf::actor core2; +}; + } // namespace -CAF_TEST_FIXTURE_SCOPE(publisher_tests, base_fixture) +CAF_TEST_FIXTURE_SCOPE(publisher_tests, fixture) CAF_TEST(blocking_publishers) { // Spawn/get/configure core actors. - broker_options options; - options.disable_ssl = true; - auto core1 = ep.core(); - auto core2 = sys.spawn(filter_type{"a"}, options, nullptr); - anon_send(core1, atom::subscribe_v, filter_type{"a"}); - anon_send(core1, atom::no_events_v); - anon_send(core2, atom::no_events_v); - self->send(core1, atom::peer_v, core2); + anon_send(core2, atom::subscribe_v, filter_type{"a"}); + run(); + inject((atom::peer, caf::node_id, caf::actor), + from(self).to(core1).with(atom::peer_v, core2_id, core2)); // Connect a consumer (leaf) to core2, which receives only a subset of 'a'. auto leaf = sys.spawn(consumer, filter_type{"a/b"}, core2); run(); - { // Lifetime scope of our publishers. - // Spin up two publishers: one for "a" and one for "a/b". - auto pub1 = ep.make_publisher("a"); - auto pub2 = ep.make_publisher("a/b"); - pub1.drop_all_on_destruction(); - pub2.drop_all_on_destruction(); - auto d1 = pub1.worker(); - auto d2 = pub2.worker(); - run(); - // Data flows from our publishers to core1 to core2 and finally to leaf. - using buf = std::vector; - // First, set of published messages gets filtered out at core2. - pub1.publish(0); - run(); - // Second, set of published messages gets delivered to leaf. - pub2.publish(true); - run(); - // Third, set of published messages gets again filtered out at core2. - pub1.publish({1, 2, 3}); - run(); - // Fourth, set of published messages gets delivered to leaf again. - pub2.publish({false, true}); - run(); - // Check log of the consumer. - self->send(leaf, atom::get_v); - sched.prioritize(leaf); - consume_message(); - self->receive( - [](const buf& xs) { - auto expected = data_msgs({{"a/b", true}, {"a/b", false}, - {"a/b", true}}); - CAF_REQUIRE_EQUAL(xs, expected); - } - ); - } - // Shutdown. - CAF_MESSAGE("Shutdown core actors."); - anon_send_exit(core1, exit_reason::user_shutdown); - anon_send_exit(core2, exit_reason::user_shutdown); + // Spin up two publishers: one for "a" and one for "a/b". + auto pub1 = ep.make_publisher("a"); + auto pub2 = ep.make_publisher("a/b"); + pub1.drop_all_on_destruction(); + pub2.drop_all_on_destruction(); + auto d1 = pub1.worker(); + auto d2 = pub2.worker(); + run(); + // Data flows from our publishers to core1 to core2 and finally to leaf. + using buf = std::vector; + // First, set of published messages gets filtered out at core2. + pub1.publish(0); + run(); + // Second, set of published messages gets delivered to leaf. + pub2.publish(true); + run(); + // Third, set of published messages gets again filtered out at core2. + pub1.publish({1, 2, 3}); + run(); + // Fourth, set of published messages gets delivered to leaf again. + pub2.publish({false, true}); + run(); + // Check log of the consumer. + auto expected = data_msgs({{"a/b", true}, {"a/b", false}, {"a/b", true}}); + CAF_CHECK_EQUAL(deref(leaf).state.xs, expected); anon_send_exit(leaf, exit_reason::user_shutdown); } CAF_TEST(nonblocking_publishers) { // Spawn/get/configure core actors. - broker_options options; - options.disable_ssl = true; - auto core1 = ep.core(); - auto core2 = sys.spawn(filter_type{"a", "b", "c"}, - options, nullptr); - anon_send(core1, atom::subscribe_v, filter_type{"a", "b", "c"}); - anon_send(core1, atom::no_events_v); - anon_send(core2, atom::no_events_v); - self->send(core1, atom::peer_v, core2); + anon_send(core2, atom::subscribe_v, filter_type{"a", "b", "c"}); + run(); + inject((atom::peer, caf::node_id, caf::actor), + from(self).to(core1).with(atom::peer_v, core2_id, core2)); // Connect a consumer (leaf) to core2. auto leaf = sys.spawn(consumer, filter_type{"b"}, core2); run(); // publish_all uses thread communication which would deadlock when using our // test_scheduler. We avoid this by pushing the call to publish_all to its // own thread. - using buf_type = std::vector; ep.publish_all_nosync( // Initialize send buffer with 10 elements. [](buf_type& xs) { - xs = data_msgs({{"a", 0}, {"b", true}, {"a", 1}, {"a", 2}, - {"b", false}, {"b", true}, {"a", 3}, - {"b", false}, {"a", 4}, {"a", 5}}); + xs = data_msgs({{"a", 0}, + {"b", true}, + {"a", 1}, + {"a", 2}, + {"b", false}, + {"b", true}, + {"a", 3}, + {"b", false}, + {"a", 4}, + {"a", 5}}); }, // Get next element. [](buf_type& xs, downstream& out, size_t num) { @@ -164,27 +178,13 @@ CAF_TEST(nonblocking_publishers) { xs.erase(xs.begin(), xs.begin() + static_cast(n)); }, // Did we reach the end?. - [](const buf_type& xs) { - return xs.empty(); - } - ); + [](const buf_type& xs) { return xs.empty(); }); // Communication is identical to the driver-driven test in test/cpp/core.cc run(); // Check log of the consumer. - self->send(leaf, atom::get_v); - sched.prioritize(leaf); - consume_message(); - self->receive( - [](const buf_type& xs) { - auto expected = data_msgs({{"b", true}, {"b", false}, - {"b", true}, {"b", false}}); - CAF_REQUIRE_EQUAL(xs, expected); - } - ); - // Shutdown. - CAF_MESSAGE("Shutdown core actors."); - anon_send_exit(core1, exit_reason::user_shutdown); - anon_send_exit(core2, exit_reason::user_shutdown); + auto expected + = data_msgs({{"b", true}, {"b", false}, {"b", true}, {"b", false}}); + CAF_CHECK_EQUAL(deref(leaf).state.xs, expected); anon_send_exit(leaf, exit_reason::user_shutdown); } diff --git a/tests/cpp/publisher_id.cc b/tests/cpp/publisher_id.cc deleted file mode 100644 index 80e708d6..00000000 --- a/tests/cpp/publisher_id.cc +++ /dev/null @@ -1,23 +0,0 @@ -#define SUITE publisher_id - -#include "broker/publisher_id.hh" - -#include "test.hh" - -using namespace broker; - -namespace { - -struct fixture { - -}; - -} // namespace - -FIXTURE_SCOPE(publisher_id_tests, fixture) - -TEST(todo) { - // implement me -} - -FIXTURE_SCOPE_END() diff --git a/tests/cpp/ssl.cc b/tests/cpp/ssl.cc index 7ce07549..d92685b3 100644 --- a/tests/cpp/ssl.cc +++ b/tests/cpp/ssl.cc @@ -19,25 +19,19 @@ using namespace broker; namespace { -#if CAF_VERSION < 1800 -#define CFG_PREFIX -#else -#define CFG_PREFIX "caf." -#endif - configuration make_config(std::string cert_id) { configuration cfg; if (auto err = cfg.parse(caf::test::engine::argc(), caf::test::engine::argv())) CAF_FAIL("parsing the config failed: " << to_string(err)); - cfg.set(CFG_PREFIX "logger.inline-output", true); + cfg.set("caf.logger.inline-output", true); if (cert_id.size()) { auto test_dir = getenv("BROKER_TEST_DIR"); CAF_REQUIRE(test_dir); auto cd = std::string(test_dir) + "/cpp/certs/"; - cfg.set(CFG_PREFIX "openssl.cafile", cd + "ca.pem"); - cfg.set(CFG_PREFIX "openssl.certificate", cd + "cert." + cert_id + ".pem"); - cfg.set(CFG_PREFIX "openssl.key", cd + "key." + cert_id + ".pem"); + cfg.set("caf.openssl.cafile", cd + "ca.pem"); + cfg.set("caf.openssl.certificate", cd + "cert." + cert_id + ".pem"); + cfg.set("caf.openssl.key", cd + "key." + cert_id + ".pem"); MESSAGE("using certififcate " << cfg.openssl_certificate << ", key " << cfg.openssl_key); } @@ -78,15 +72,19 @@ struct ssl_auth_fixture { CAF_TEST_FIXTURE_SCOPE(ssl_auth_use_cases, ssl_auth_fixture) CAF_TEST(authenticated_session) { -MESSAGE("prepare authenticated connection"); + MESSAGE("prepare authenticated connection"); + MESSAGE("mercury: " << mercury_auth.ep.node_id()); + MESSAGE("venus: " << venus_auth.ep.node_id()); auto mercury_auth_es = mercury_auth.ep.make_subscriber({"/broker/test"}); auto venus_auth_es = venus_auth.ep.make_subscriber({"/broker/test"}); MESSAGE("mercury_auth listen"); auto p = mercury_auth.ep.listen("127.0.0.1", 0); MESSAGE("venus_auth peer with mecury_auth on port " << p); - auto b = venus_auth.ep.peer("127.0.0.1", p); - CAF_REQUIRE(b); + auto venus_peered = venus_auth.ep.peer("127.0.0.1", p); + CAF_REQUIRE(venus_peered); + auto mercury_peered = mercury_auth.ep.await_peer(venus_auth.ep.node_id()); + CAF_REQUIRE(mercury_peered); data_message ping{"/broker/test", "ping"}; data_message pong{"/broker/test", "pong"}; @@ -130,4 +128,3 @@ CAF_TEST(authenticated_failure_wrong_ssl_peer) { } CAF_TEST_FIXTURE_SCOPE_END() - diff --git a/tests/cpp/store.cc b/tests/cpp/store.cc index f45726c6..012dcafa 100644 --- a/tests/cpp/store.cc +++ b/tests/cpp/store.cc @@ -127,6 +127,11 @@ TEST(proxy) { REQUIRE_EQUAL(resp.answer, error{ec::no_such_key}); auto key_id = proxy.keys(); auto key_resp = proxy.receive(); - CAF_REQUIRE_EQUAL(key_resp.id, key_id); - CAF_REQUIRE_EQUAL(value_of(key_resp.answer), data(set{"foo"})); + REQUIRE_EQUAL(key_resp.id, key_id); + REQUIRE_EQUAL(value_of(key_resp.answer), data(set{"foo"})); + MESSAGE("master: put unique and receive the response"); + auto put_unique_req = proxy.put_unique("foobar", 123); + auto put_unique_res = proxy.receive(); + CHECK_EQUAL(put_unique_res.id, put_unique_req); + CHECK_EQUAL(value_of(put_unique_res.answer), data{true}); } diff --git a/tests/cpp/store_event.cc b/tests/cpp/store_event.cc index b7e78dc4..366a5b8c 100644 --- a/tests/cpp/store_event.cc +++ b/tests/cpp/store_event.cc @@ -55,7 +55,7 @@ TEST(insert events consist of key value and expiry) { CHECK_EQUAL(view.key(), "foo"s); CHECK_EQUAL(view.value(), "bar"s); CHECK_EQUAL(view.expiry(), 500_ns); - CHECK_EQUAL(view.publisher(), publisher_id{}); + CHECK_EQUAL(view.publisher(), entity_id{}); } MESSAGE("nil as fifth element is interpreted as no expiry"); { @@ -66,7 +66,7 @@ TEST(insert events consist of key value and expiry) { CHECK_EQUAL(view.key(), "foo"s); CHECK_EQUAL(view.value(), "bar"s); CHECK_EQUAL(view.expiry(), nil); - CHECK_EQUAL(view.publisher(), publisher_id{}); + CHECK_EQUAL(view.publisher(), entity_id{}); } MESSAGE("elements six and seven denote the publisher"); { @@ -77,7 +77,7 @@ TEST(insert events consist of key value and expiry) { CHECK_EQUAL(view.key(), "foo"s); CHECK_EQUAL(view.value(), "bar"s); CHECK_EQUAL(view.expiry(), nil); - CHECK_EQUAL(view.publisher(), (publisher_id{node, 42})); + CHECK_EQUAL(view.publisher(), (entity_id{node, 42})); } MESSAGE("make returns an invalid view for malformed data"); { @@ -98,7 +98,7 @@ TEST(update events consist of key value and expiry) { CHECK_EQUAL(view.old_value(), "bar"s); CHECK_EQUAL(view.new_value(), "baz"s); CHECK_EQUAL(view.expiry(), 500_ns); - CHECK_EQUAL(view.publisher(), publisher_id{}); + CHECK_EQUAL(view.publisher(), entity_id{}); } MESSAGE("nil as sixth element is interpreted as no expiry"); { @@ -110,7 +110,7 @@ TEST(update events consist of key value and expiry) { CHECK_EQUAL(view.old_value(), "bar"s); CHECK_EQUAL(view.new_value(), "baz"s); CHECK_EQUAL(view.expiry(), nil); - CHECK_EQUAL(view.publisher(), publisher_id{}); + CHECK_EQUAL(view.publisher(), entity_id{}); } MESSAGE("elements six and seven denote the publisher"); { @@ -122,7 +122,7 @@ TEST(update events consist of key value and expiry) { CHECK_EQUAL(view.old_value(), "bar"s); CHECK_EQUAL(view.new_value(), "baz"s); CHECK_EQUAL(view.expiry(), nil); - CHECK_EQUAL(view.publisher(), (publisher_id{node, 42})); + CHECK_EQUAL(view.publisher(), (entity_id{node, 42})); } MESSAGE("make returns an invalid view for malformed data"); { @@ -140,7 +140,7 @@ TEST(erase events contain the key and optionally a publisher ID) { REQUIRE(view); CHECK_EQUAL(view.store_id(), "x"s); CHECK_EQUAL(view.key(), "foo"s); - CHECK_EQUAL(view.publisher(), publisher_id{}); + CHECK_EQUAL(view.publisher(), entity_id{}); } MESSAGE("elements three and four denote the publisher"); { @@ -149,7 +149,7 @@ TEST(erase events contain the key and optionally a publisher ID) { REQUIRE(view); CHECK_EQUAL(view.store_id(), "x"s); CHECK_EQUAL(view.key(), "foo"s); - CHECK_EQUAL(view.publisher(), (publisher_id{node, 42})); + CHECK_EQUAL(view.publisher(), (entity_id{node, 42})); } MESSAGE("make returns an invalid view for malformed data"); { diff --git a/tests/cpp/subscriber.cc b/tests/cpp/subscriber.cc index c0157352..09e969c2 100644 --- a/tests/cpp/subscriber.cc +++ b/tests/cpp/subscriber.cc @@ -57,20 +57,42 @@ void driver(event_based_actor* self, const actor& sink) { [](const buf_type& xs) { return xs.empty(); }); } +struct fixture : base_fixture { + // Returns the core manager for given actor. + auto& state(caf::actor hdl) { + return deref(hdl).state; + } + + fixture() { + core1 = sys.spawn(filter_type{"a", "b", "c"}); + core2 = ep.core(); + anon_send(core1, atom::no_events_v); + anon_send(core2, atom::no_events_v); + run(); + core1_id = caf::make_node_id(unbox(caf::make_uri("test:core1"))); + core2_id = caf::make_node_id(unbox(caf::make_uri("test:core2"))); + state(core1).id(core1_id); + state(core2).id(core2_id); + } + + ~fixture() { + anon_send_exit(core1, exit_reason::user_shutdown); + anon_send_exit(core2, exit_reason::user_shutdown); + } + + caf::node_id core1_id; + caf::node_id core2_id; + caf::actor core1; + caf::actor core2; +}; + } // namespace -CAF_TEST_FIXTURE_SCOPE(subscriber_tests, base_fixture) +CAF_TEST_FIXTURE_SCOPE(subscriber_tests, fixture) CAF_TEST(blocking_subscriber) { - // Spawn/get/configure core actors. - broker_options options; - options.disable_ssl = true; - auto core1 = sys.spawn(filter_type{"a", "b", "c"}, - options, nullptr); - auto core2 = ep.core(); anon_send(core2, atom::subscribe_v, filter_type{"a", "b", "c"}); - anon_send(core1, atom::no_events_v); - anon_send(core2, atom::no_events_v); + self->send(core1, atom::peer_v, core2_id, core2); run(); // Connect a consumer (leaf) to core2. // auto leaf = sys.spawn(consumer, filter_type{"b"}, core2); @@ -81,7 +103,6 @@ CAF_TEST(blocking_subscriber) { CAF_MESSAGE("core2: " << to_string(core2)); CAF_MESSAGE("leaf: " << to_string(leaf)); // Initiate handshake between core1 and core2. - self->send(core1, atom::peer_v, core2); run(); // Spin up driver on core1. auto d1 = sys.spawn(driver, core1); @@ -92,25 +113,11 @@ CAF_TEST(blocking_subscriber) { auto expected = data_msgs({{"b", true}, {"b", false}, {"b", true}, {"b", false}}); CAF_CHECK_EQUAL(sub.poll(), expected); - // Shutdown. - CAF_MESSAGE("Shutdown core actors."); - anon_send_exit(core1, exit_reason::user_shutdown); - anon_send_exit(core2, exit_reason::user_shutdown); - anon_send_exit(leaf, exit_reason::user_shutdown); - anon_send_exit(d1, exit_reason::user_shutdown); } CAF_TEST(nonblocking_subscriber) { - // Spawn/get/configure core actors. - broker_options options; - options.disable_ssl = true; - auto core1 = sys.spawn(filter_type{"a", "b", "c"}, - options, nullptr); - auto core2 = ep.core(); - anon_send(core1, atom::no_events_v); - anon_send(core2, atom::no_events_v); anon_send(core2, atom::subscribe_v, filter_type{"a", "b", "c"}); - self->send(core1, atom::peer_v, core2); + self->send(core1, atom::peer_v, core2_id, core2); run(); // Connect a subscriber (leaf) to core2. using buf = std::vector; @@ -134,10 +141,6 @@ CAF_TEST(nonblocking_subscriber) { auto expected = data_msgs({{"b", true}, {"b", false}, {"b", true}, {"b", false}}); CAF_REQUIRE_EQUAL(result, expected); - // Shutdown. - CAF_MESSAGE("Shutdown core actors."); - anon_send_exit(core1, exit_reason::user_shutdown); - anon_send_exit(core2, exit_reason::user_shutdown); } CAF_TEST_FIXTURE_SCOPE_END() diff --git a/tests/cpp/system/README b/tests/cpp/system/README new file mode 100644 index 00000000..152bf4e7 --- /dev/null +++ b/tests/cpp/system/README @@ -0,0 +1,5 @@ +Unlike the other test suites bundled in `broker-test`, the suites in this +directory perform system testing. + +All suites use `broker::endpoint` directly and there is some level of +nondeterminism due to threading. diff --git a/tests/cpp/system/shutdown.cc b/tests/cpp/system/shutdown.cc new file mode 100644 index 00000000..0b6c4243 --- /dev/null +++ b/tests/cpp/system/shutdown.cc @@ -0,0 +1,252 @@ +// Checks whether Broker instances shut down gracefully, i.e., Broker endpoints +// ship queued events before closing remote connections. + +#define SUITE system.shutdown + +#include "test.hh" + +#include "broker/endpoint.hh" + +#include +#include +#include +#include +#include +#include + +using namespace broker; + +namespace { + +using string_list = std::vector; + +configuration make_config(std::string file_path_template) { + configuration cfg; +#if CAF_VERSION < 1800 + using caf::atom; + cfg.set("scheduler.max-threads", 2); + cfg.set("logger.console-verbosity", caf::atom("quiet")); + cfg.set("logger.file-name", std::move(file_path_template)); +#else + cfg.set("caf.scheduler.max-threads", 2); + cfg.set("caf.logger.console.verbosity", "quiet"); + cfg.set("caf.logger.file.path", std::move(file_path_template)); + //cfg.set("caf.logger.file.verbosity", "trace"); + //cfg.set("caf.logger.file.excluded-components", std::vector{}); + +#endif + return cfg; +} + +template +[[noreturn]] void hard_error(const Ts&... xs) { + (std::cerr << ... << xs) << '\n'; + abort(); +} + +// Drop-in replacement for std::barrier (based on the TS API as of 2020). +class barrier { +public: + explicit barrier(ptrdiff_t num_threads) + : num_threads_(num_threads), count_(0) { + // nop + } + + void arrive_and_wait() { + std::unique_lock guard{mx_}; + if (++count_ == num_threads_) { + cv_.notify_all(); + return; + } + cv_.wait(guard, [this] { return count_.load() == num_threads_; }); + } + +private: + size_t num_threads_; + std::mutex mx_; + std::atomic count_; + std::condition_variable cv_; +}; + +// Allows threads to wait on a boolean condition. Unlike promise, allows +// calling `set_true` multiple times without side effects. +class beacon { +public: + beacon() : value_(false) { + // nop + } + + void set_true() { + std::unique_lock guard{mx_}; + value_ = true; + cv_.notify_all(); + } + + void wait() { + std::unique_lock guard{mx_}; + cv_.wait(guard, [this] { return value_.load(); }); + } + +private: + std::mutex mx_; + std::atomic value_; + std::condition_variable cv_; +}; + +auto code_of(const error& err) { +#if CAF_VERSION<1800 + if (err.category() != caf::atom("broker")) + return ec::unspecified; +#else + if (err.category() != caf::type_id_v) + return ec::unspecified; +#endif + return static_cast(err.code()); +} + +auto normalize_status_log(const std::vector& xs) { + auto stringify = [](const data_message& msg) { + std::string result = get_topic(msg).string(); + result += ": "; + result += to_string(get_data(msg)); + return result; + }; + std::vector lines; + lines.reserve(xs.size()); + for (auto& x : xs) { + if (auto err = to(get_data(x))) + lines.emplace_back(to_string(code_of(*err))); + else if (auto stat = to(get_data(x))) + lines.emplace_back(to_string(stat->code())); + else + lines.emplace_back(stringify(x)); + } + return lines; +} + +struct fixture { + fixture() { + t0 = broker::now(); + } + + std::string log_path_template(const char* test_name, + const char* endpoint_name) { + std::string result; + result += broker::to_string(t0); + result += ' '; + result += test_name; + result += ' '; + result += endpoint_name; + result += ".log"; + return result; + } + + broker::timestamp t0; +}; + +} // namespace + +FIXTURE_SCOPE(system_shutdown_tests, fixture) + +// Spins up two Broker endpoints, attaches subscribers for status and error +// events and shuts both endpoints down immediately after peering. The +// subscribers should receive all events (from discovery to disconnecting) of +// the short-lived endpoints. +TEST(status listeners receive peering events) { + MESSAGE("status subscribers receive discovery and peering events"); + auto ep1_log = std::make_shared>(); + auto ep2_log = std::make_shared>(); + auto port_promise = std::promise{}; + auto port_future = port_promise.get_future(); + barrier checkpoint{2}; // Makes sure that the endpoint in t2 shuts down first. + auto t1 = std::thread{[&]() mutable { + endpoint ep{make_config(log_path_template("peering-events", "ep1"))}; + ep.subscribe_nosync( + {topics::statuses}, [](caf::unit_t&) {}, + [ep1_log](caf::unit_t&, data_message msg) { + ep1_log->emplace_back(std::move(msg)); + }, + [](caf::unit_t&, const error&) {}); + auto port = ep.listen("127.0.0.1", 0); + if (port == 0) + hard_error("endpoint::listen failed"); + MESSAGE("first endpoint listening on port " << port); + port_promise.set_value(port); + checkpoint.arrive_and_wait(); + }}; + auto t2 = std::thread{[&, port{port_future.get()}] { + /*lifetime scope of ep*/ { + endpoint ep{make_config(log_path_template("peering-events", "ep2"))}; + ep.subscribe_nosync( + {topics::statuses}, [](caf::unit_t&) {}, + [ep2_log](caf::unit_t&, data_message msg) { + ep2_log->emplace_back(std::move(msg)); + }, + [](caf::unit_t&, const error&) {}); + if (!ep.peer("127.0.0.1", port)) + hard_error("endpoint::peer failed"); + MESSAGE("second endpoint peered to 127.0.0.1:" << port); + } + checkpoint.arrive_and_wait(); + }}; + t1.join(); + t2.join(); + MESSAGE("both endpoints were shut down"); + // Now, ep2 actively closed the peering and should report `peer_removed`, + // whereas ep1 should `peer_lost` instead. + CHECK_EQUAL(normalize_status_log(*ep1_log), string_list({ + "endpoint_discovered", + "peer_added", + "peer_lost", + "endpoint_unreachable", + })); + CHECK_EQUAL(normalize_status_log(*ep2_log), string_list({ + "endpoint_discovered", + "peer_added", + "peer_removed", + "endpoint_unreachable", + })); +} + +TEST(endpoints send published data before terminating) { + MESSAGE("status subscribers receive discovery and peering events"); + auto ep1_log = std::make_shared>(); + auto port_promise = std::promise{}; + auto port_future = port_promise.get_future(); + auto beacon_ptr = std::make_shared(); // Blocks t1 until data arrived. + auto t1 = std::thread{[&]() mutable { + endpoint ep{make_config(log_path_template("publish", "ep1"))}; + ep.subscribe_nosync( + {"/foo/bar"}, [](caf::unit_t&) {}, + [ep1_log, beacon_ptr](caf::unit_t&, data_message msg) { + ep1_log->emplace_back(std::move(msg)); + beacon_ptr->set_true(); + }, + [](caf::unit_t&, const error&) {}); + auto port = ep.listen("127.0.0.1", 0); + if (port == 0) + hard_error("endpoint::listen failed"); + MESSAGE("first endpoint listening on port " << port); + port_promise.set_value(port); + beacon_ptr->wait(); + }}; + auto t2 = std::thread{[&, port{port_future.get()}] { + /*lifetime scope of ep*/ { + endpoint ep{make_config(log_path_template("publish", "ep2"))}; + if (!ep.peer("127.0.0.1", port)) + hard_error("endpoint::listen failed"); + MESSAGE("second endpoint peered to 127.0.0.1:" << port); + ep.publish("/foo/bar", data{"hello world"}); + } + }}; + t1.join(); + t2.join(); + MESSAGE("both endpoints were shut down"); + // Now, ep2 actively closed the peering and should report `peer_removed`, + // whereas ep1 should `peer_lost` instead. + CHECK_EQUAL(normalize_status_log(*ep1_log), string_list({ + "/foo/bar: hello world", + })); +} + +FIXTURE_SCOPE_END() diff --git a/tests/cpp/test.cc b/tests/cpp/test.cc index 6346dbdd..18fd8e60 100644 --- a/tests/cpp/test.cc +++ b/tests/cpp/test.cc @@ -2,6 +2,8 @@ #include "test.hh" +#include + #include #include @@ -13,18 +15,44 @@ #include "broker/core_actor.hh" #ifdef BROKER_WINDOWS +#undef ERROR // The Windows headers fail if this macro is predefined. #include "Winsock2.h" #endif using namespace caf; using namespace broker; +namespace { + +template +broker::endpoint_id random_endpoint_id(RandomNumberGenerator& rng) { + using array_type = caf::hashed_node_id::host_id_type; + using value_type = array_type::value_type; + std::uniform_int_distribution<> d{0, std::numeric_limits::max()}; + array_type result; + for (auto& x : result) + x = static_cast(d(rng)); + return caf::make_node_id(d(rng), result); +} + +}// + base_fixture::base_fixture() : ep(make_config()), sys(ep.system()), self(sys), sched(dynamic_cast(sys.scheduler())) { init_socket_api(); + // This somewhat convoluted way to fill the ids map makes sure that we fill + // up the map in a way that the values are sorted by their ID. + std::minstd_rand rng{0xDEADC0DE}; + std::vector id_list; + for (auto i = 0; i < 'Z' - 'A'; ++i) + id_list.emplace_back(random_endpoint_id(rng)); + std::sort(id_list.begin(), id_list.end()); + char id = 'A'; + for (auto& val : id_list) + ids[id++] = val; } base_fixture::~base_fixture() { @@ -51,6 +79,13 @@ void base_fixture::deinit_socket_api() { #endif } +char base_fixture::id_by_value(const broker::endpoint_id& value) { + for (const auto& [key, val] : ids) + if (val == value) + return key; + FAIL("value not found: " << value); +} + configuration base_fixture::make_config() { broker_options options; options.disable_ssl = true; @@ -73,6 +108,7 @@ void base_fixture::consume_message() { } int main(int argc, char** argv) { + caf::init_global_meta_objects(); broker::configuration::init_global_state(); //if (! broker::logger::file(broker::logger::debug, "broker-unit-test.log")) // return 1; diff --git a/tests/cpp/test.hh b/tests/cpp/test.hh index 6ce32dd3..975e2a28 100644 --- a/tests/cpp/test.hh +++ b/tests/cpp/test.hh @@ -14,9 +14,13 @@ #include #include "broker/configuration.hh" +#include "broker/detail/channel.hh" #include "broker/endpoint.hh" +#include "broker/fwd.hh" #include +#include +#include // -- test setup macros -------------------------------------------------------- @@ -50,6 +54,52 @@ #define CHECK_FAIL CAF_CHECK_FAIL #define FAIL CAF_FAIL +// -- custom message types for channel.cc -------------------------------------- + +using string_channel = broker::detail::channel; + +struct producer_msg { + std::string source; + string_channel::producer_message content; +}; + +struct consumer_msg { + std::string source; + string_channel::consumer_message content; +}; + +// -- ID block for all message types in test suites ---------------------------- + +CAF_BEGIN_TYPE_ID_BLOCK(broker_test, caf::id_block::broker::end) + + CAF_ADD_TYPE_ID(broker_test, (consumer_msg)) + CAF_ADD_TYPE_ID(broker_test, (producer_msg)) + CAF_ADD_TYPE_ID(broker_test, (std::vector)) + CAF_ADD_TYPE_ID(broker_test, (string_channel::consumer_message)) + CAF_ADD_TYPE_ID(broker_test, (string_channel::cumulative_ack)) + CAF_ADD_TYPE_ID(broker_test, (string_channel::event)) + CAF_ADD_TYPE_ID(broker_test, (string_channel::handshake)) + CAF_ADD_TYPE_ID(broker_test, (string_channel::heartbeat)) + CAF_ADD_TYPE_ID(broker_test, (string_channel::nack)) + CAF_ADD_TYPE_ID(broker_test, (string_channel::producer_message)) + CAF_ADD_TYPE_ID(broker_test, (string_channel::retransmit_failed)) + +CAF_END_TYPE_ID_BLOCK(broker_test) + +// -- inspection support ------------------------------------------------------- + +template +bool inspect(Inspector& f, producer_msg& x) { + return f.object(x).fields(f.field("source", x.source), + f.field("content", x.content)); +} + +template +bool inspect(Inspector& f, consumer_msg& x) { + return f.object(x).fields(f.field("source", x.source), + f.field("content", x.content)); +} + // -- fixtures ----------------------------------------------------------------- struct empty_fixture_base {}; @@ -108,6 +158,7 @@ public: caf::actor_system& sys; caf::scoped_actor self; scheduler_type& sched; + std::map ids; using super::run; @@ -119,6 +170,8 @@ public: static void deinit_socket_api(); + char id_by_value(const broker::endpoint_id& value); + /// Dereferences `hdl` and downcasts it to `T`. template T& deref(const Handle& hdl) { @@ -128,7 +181,6 @@ public: return dynamic_cast(*ptr); } -private: static broker::configuration make_config(); }; diff --git a/tests/foo.txt b/tests/foo.txt new file mode 100644 index 00000000..5716ca59 --- /dev/null +++ b/tests/foo.txt @@ -0,0 +1 @@ +bar diff --git a/tests/micro-benchmark/CMakeLists.txt b/tests/micro-benchmark/CMakeLists.txt new file mode 100644 index 00000000..0de37a18 --- /dev/null +++ b/tests/micro-benchmark/CMakeLists.txt @@ -0,0 +1,18 @@ +find_package(benchmark REQUIRED) + +add_executable(micro-benchmark + "src/main.cc" + "src/routing-table.cc" + "src/serialization.cc" + "src/streaming.cc" +) + +target_include_directories(micro-benchmark PRIVATE "include") + +target_link_libraries(micro-benchmark PRIVATE benchmark::benchmark_main) + +if (ENABLE_STATIC) + target_link_libraries(micro-benchmark PRIVATE broker_static) +else () + target_link_libraries(micro-benchmark PRIVATE broker) +endif () diff --git a/tests/micro-benchmark/include/main.hh b/tests/micro-benchmark/include/main.hh new file mode 100644 index 00000000..65f0f1f5 --- /dev/null +++ b/tests/micro-benchmark/include/main.hh @@ -0,0 +1,349 @@ +#pragma once + +#include "broker/data.hh" +#include "broker/fwd.hh" +#include "broker/message.hh" +#include "broker/time.hh" + +#include +#include + +#include + +// -- forward declarations ----------------------------------------------------- + +struct legacy_node_message; + +using uuid = caf::uuid; + +struct uuid_multipath_tree; + +class uuid_multipath; +class uuid_multipath_group; +class uuid_multipath_node; + +using uuid_node_message + = caf::cow_tuple; + +// -- type IDs ----------------------------------------------------------------- + +#define MICRO_BENCH_ADD_TYPE(type) CAF_ADD_TYPE_ID(micro_benchmarks, type) + +CAF_BEGIN_TYPE_ID_BLOCK(micro_benchmarks, caf::id_block::broker::end) + + MICRO_BENCH_ADD_TYPE((caf::stream)) + MICRO_BENCH_ADD_TYPE((legacy_node_message)) + MICRO_BENCH_ADD_TYPE((std::vector)) + + MICRO_BENCH_ADD_TYPE((caf::stream)) + MICRO_BENCH_ADD_TYPE((std::vector)) + MICRO_BENCH_ADD_TYPE((uuid_multipath)) + MICRO_BENCH_ADD_TYPE((uuid_node_message)) + +CAF_END_TYPE_ID_BLOCK(micro_benchmarks) + +// -- custom types ------------------------------------------------------------- + +/// A `node_message` as it used to be pre-ALM. +struct legacy_node_message { + + /// Content of the message. + broker::node_message_content content; + + /// Time-to-life counter. + uint16_t ttl; +}; + +template +bool inspect(Inspector& f, legacy_node_message& x) { + return f.object(x).fields(f.field("content", x.content), + f.field("ttl", x.ttl)); +} + +using uuid = caf::uuid; + +class uuid_multipath_node; + +struct uuid_multipath_tree { + uuid_multipath_tree(uuid id, bool is_receiver); + ~uuid_multipath_tree(); + uuid_multipath_node* root; + broker::detail::monotonic_buffer_resource mem; +}; + +class uuid_multipath_group { +public: + friend class uuid_multipath; + friend class uuid_multipath_node; + + using iterator = broker::alm::node_iterator; + + using const_iterator = broker::alm::node_iterator; + + uuid_multipath_group() noexcept = default; + + uuid_multipath_group(const uuid_multipath_group&) = delete; + + uuid_multipath_group& operator=(const uuid_multipath_group&) = delete; + + ~uuid_multipath_group(); + + size_t size() const noexcept { + return size_; + } + + bool empty() const noexcept { + return size_ == 0; + } + + iterator begin() noexcept { + return iterator{first_}; + } + + const_iterator begin() const noexcept { + return const_iterator{first_}; + } + + iterator end() noexcept { + return iterator{nullptr}; + } + + const_iterator end() const noexcept { + return const_iterator{nullptr}; + } + + bool equals(const uuid_multipath_group& other) const noexcept; + + bool contains(uuid what) const noexcept; + + std::pair + emplace(broker::detail::monotonic_buffer_resource& mem, uuid id, + bool is_receiver); + + bool emplace(uuid_multipath_node* node); + +private: + template + std::pair emplace_impl(uuid id, + MakeNewNode make_new_node); + + size_t size_ = 0; + uuid_multipath_node* first_ = nullptr; +}; + + +class uuid_multipath_node { +public: + friend class uuid_multipath; + friend class uuid_multipath_group; + friend class broker::alm::node_iterator; + friend class broker::alm::node_iterator; + friend struct uuid_multipath_tree; + + uuid_multipath_node(uuid id, bool is_receiver) noexcept + : id_(id), is_receiver_(is_receiver) { + // nop + } + + uuid_multipath_node() = delete; + + uuid_multipath_node(const uuid_multipath_node&) = delete; + + uuid_multipath_node& operator=(const uuid_multipath_node&) = delete; + + ~uuid_multipath_node(); + + const uuid& id() const noexcept { + return id_; + } + + bool is_receiver() const noexcept { + return is_receiver_; + } + + auto& nodes() noexcept { + return down_; + } + + const auto& nodes() const noexcept { + return down_; + } + + bool equals(const uuid_multipath_node& other) const noexcept; + + bool contains(uuid what) const noexcept; + +private: + template + bool save_children(Inspector& f) { + if (f.begin_sequence(down_.size())) + for (auto& child : down_) + if (!child.save(f)) + return false; + return f.end_sequence(); + } + + template + bool save(Inspector& f) { + // We are lying to the inspector about the type, because multipath_node and + // multipath_group are internal implementation details. + return f.begin_object(caf::type_id_v, + caf::type_name_v) + && f.begin_field("id") // + && f.apply(id_) // [...] + && f.end_field() // + && f.begin_field("is_receiver") // + && f.apply(is_receiver_) // [...] + && f.end_field() // + && f.begin_field("nodes") // + && save_children(f) // [...] + && f.end_field() // + && f.end_object(); + } + + template + bool load_children(broker::detail::monotonic_buffer_resource& mem, + Inspector& f) { + size_t n = 0; + if (f.begin_sequence(n)) { + for (size_t i = 0; i < n; ++i) { + auto child = broker::detail::new_instance(mem, uuid{}, false); + if (!child->load(mem, f)) { + return false; + } else if (!down_.emplace(child)) { + f.emplace_error(caf::sec::field_invariant_check_failed, + "a multipath may not contain duplicates"); + return false; + } + } + } + return f.end_sequence(); + } + + template + bool load(broker::detail::monotonic_buffer_resource& mem, Inspector& f) { + return f.begin_object(caf::type_id_v, + caf::type_name_v) + && f.begin_field("id") // + && f.apply(id_) // [...] + && f.end_field() // + && f.begin_field("is_receiver") // + && f.apply(is_receiver_) // [...] + && f.end_field() // + && f.begin_field("nodes") // + && load_children(mem, f) // [...] + && f.end_field() // + && f.end_object(); + } + + uuid id_; + bool is_receiver_; + uuid_multipath_node* right_ = nullptr; + uuid_multipath_group down_; +}; + +class uuid_multipath { +public: + using tree_ptr = std::shared_ptr; + + uuid_multipath(); + + uuid_multipath(uuid id, bool is_receiver); + + uuid_multipath(const tree_ptr& t, uuid_multipath_node* h); + + explicit uuid_multipath(const tree_ptr& tptr) + : uuid_multipath(tptr, tptr->root) { + // nop + } + + uuid_multipath(uuid_multipath&& other) noexcept = default; + + uuid_multipath(const uuid_multipath& other) = default; + + uuid_multipath& operator=(uuid_multipath&& other) noexcept = default; + + uuid_multipath& operator=(const uuid_multipath& other) = default; + + auto id() const noexcept { + return head_->id_; + } + + auto is_receiver() const noexcept { + return head_->is_receiver_; + } + + bool equals(const uuid_multipath& other) const noexcept; + + bool contains(uuid what) const noexcept; + + size_t num_nodes() const noexcept { + return head_->down_.size(); + } + + template + friend bool inspect(Inspector& f, uuid_multipath& x) { + if constexpr (Inspector::is_loading) + return x.head_->load(x.tree_->mem, f); + else + return x.head_->save(f); + } + +private: + auto emplace(uuid id, bool is_receiver) { + return head_->down_.emplace(tree_->mem, id, is_receiver); + } + + std::shared_ptr tree_; + + uuid_multipath_node* head_; +}; + +/// @relates multipath +inline bool operator==(const uuid_multipath& x, const uuid_multipath& y) { + return x.equals(y); +} + +/// @relates multipath +inline bool operator!=(const uuid_multipath& x, const uuid_multipath& y) { + return !(x == y); +} + +// -- benchmark utilities ------------------------------------------------------ + +class generator { +public: + generator(); + + broker::endpoint_id next_endpoint_id(); + + static auto make_endpoint_id() { + generator g; + return g.next_endpoint_id(); + } + + broker::count next_count(); + + caf::uuid next_uuid(); + + std::string next_string(size_t length); + + broker::timestamp next_timestamp(); + + // Generates events for one of three possible types: + // 1. Trivial data consisting of a number and a string. + // 2. More complex data that resembles a line in a conn.log. + // 3. Large tables of size 100 by 10, filled with random strings. + broker::data next_data(size_t event_type); + + static auto make_data(size_t event_type) { + generator g; + return g.next_data(event_type); + } + +private: + std::minstd_rand rng_; + broker::timestamp ts_; +}; + +void run_streaming_benchmark(); diff --git a/tests/micro-benchmark/src/main.cc b/tests/micro-benchmark/src/main.cc new file mode 100644 index 00000000..c0c72d53 --- /dev/null +++ b/tests/micro-benchmark/src/main.cc @@ -0,0 +1,263 @@ +#include "main.hh" + +#include "broker/configuration.hh" +#include "broker/data.hh" + +#include + +#include + +#include + +using namespace broker; +using namespace std::literals; + +// -- custom types ------------------------------------------------------------- + +uuid_multipath_tree::uuid_multipath_tree(uuid id, bool is_receiver) { + root = broker::detail::new_instance(mem, id, is_receiver); +} + +uuid_multipath_tree::~uuid_multipath_tree() { + // nop; we can simply "wink out" the tree structure. +} + +uuid_multipath_group::~uuid_multipath_group() { + delete first_; +} + +bool +uuid_multipath_group::equals(const uuid_multipath_group& other) const noexcept { + auto eq = [](const auto& lhs, const auto& rhs) { return lhs.equals(rhs); }; + return std::equal(begin(), end(), other.begin(), other.end(), eq); +} + +bool uuid_multipath_group::contains(uuid id) const noexcept { + auto pred = [&id](const uuid_multipath_node& node) { + return node.contains(id); + }; + return std::any_of(begin(), end(), pred); +} + +template +std::pair +uuid_multipath_group::emplace_impl(uuid id, MakeNewNode make_new_node) { + if (size_ == 0) { + first_ = make_new_node(); + size_ = 1; + return {first_, true}; + } else { + // Insertion sorts by ID. + BROKER_ASSERT(first_ != nullptr); + if (first_->id_ == id) { + return {first_, false}; + } else if (first_->id_ > id) { + ++size_; + auto new_node = make_new_node(); + new_node->right_ = first_; + first_ = new_node; + return {new_node, true}; + } + auto pos = first_; + auto next = pos->right_; + while (next != nullptr) { + if (next->id_ == id) { + return {next, false}; + } else if (next->id_ > id) { + ++size_; + auto new_node = make_new_node(); + pos->right_ = new_node; + new_node->right_ = next; + return {new_node, true}; + } else { + pos = next; + next = next->right_; + } + } + ++size_; + auto new_node = make_new_node(); + BROKER_ASSERT(pos->right_ == nullptr); + pos->right_ = new_node; + return {new_node, true}; + } +} + +std::pair +uuid_multipath_group::emplace(detail::monotonic_buffer_resource& mem, uuid id, + bool is_receiver) { + auto make_new_node = [&mem, id, is_receiver] { + return broker::detail::new_instance(mem, id, + is_receiver); + }; + return emplace_impl(id, make_new_node); +} + +bool uuid_multipath_group::emplace(uuid_multipath_node* new_node) { + auto make_new_node = [new_node] { return new_node; }; + return emplace_impl(new_node->id_, make_new_node).second; +} + +uuid_multipath_node::~uuid_multipath_node() { + delete right_; +} + +bool +uuid_multipath_node::equals(const uuid_multipath_node& other) const noexcept { + return id_ == other.id_ && down_.equals(other.down_); +} + +bool uuid_multipath_node::contains(uuid what) const noexcept { + return id_ == what || down_.contains(what); +} + +uuid_multipath::uuid_multipath() { + tree_ = std::make_shared(uuid{}, false); + head_ = tree_->root; +} + +uuid_multipath::uuid_multipath(uuid id, bool is_receiver) { + tree_ = std::make_shared(id, is_receiver); + head_ = tree_->root; +} + +uuid_multipath::uuid_multipath(const tree_ptr& t, uuid_multipath_node* h) + : tree_(t), head_(h) { + // nop +} + +bool uuid_multipath::equals(const uuid_multipath& other) const noexcept { + return head_->equals(*other.head_); +} + +bool uuid_multipath::contains(uuid what) const noexcept { + return head_->contains(what); +} + +// -- benchmark utilities ------------------------------------------------------ + +namespace { + +struct vector_builder { + vector* vec; +}; + +template +vector_builder&& operator<<(vector_builder&& builder, T&& value) { + builder.vec->emplace_back(std::forward(value)); + return std::move(builder); +} + +template +vector_builder& operator<<(vector_builder& builder, T&& value) { + builder.vec->emplace_back(std::forward(value)); + return builder; +} + +auto add_to(vector& vec) { + return vector_builder{&vec}; +} + +timestamp brokergenesis() { + // Broker started its life on Jul 9, 2014, 5:16 PM GMT+2 with the first commit + // by Jon Siwek. This function returns a UNIX timestamp for that time. + return clock::from_time_t(1404918960); +} + +} // namespace + +generator::generator() : rng_(0xB7E57), ts_(brokergenesis()) { + // nop +} + +endpoint_id generator::next_endpoint_id() { + using array_type = caf::hashed_node_id::host_id_type; + using value_type = array_type::value_type; + std::uniform_int_distribution<> d{0, std::numeric_limits::max()}; + array_type result; + for (auto& x : result) + x = static_cast(d(rng_)); + return caf::make_node_id(d(rng_), result); +} + +count generator::next_count() { + std::uniform_int_distribution d; + return d(rng_); +} + +caf::uuid generator::next_uuid() { + return caf::uuid::random(rng_()); +} + +std::string generator::next_string(size_t length) { + std::string_view charset + = "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz"; + std::uniform_int_distribution d{0, charset.size() - 1}; + std::string result; + result.resize(length); + for (auto& c : result) + c = charset[d(rng_)]; + return result; +} + +timestamp generator::next_timestamp() { + std::uniform_int_distribution d{1, 100}; + ts_ += std::chrono::seconds(d(rng_)); + return ts_; +} + +data generator::next_data(size_t event_type) { + vector result; + switch (event_type) { + case 1: { + add_to(result) << 42 << "test"s; + break; + } + case 2: { + address a1; + address a2; + convert("1.2.3.4", a1); + convert("3.4.5.6", a2); + add_to(result) << next_timestamp() << next_string(10) + << vector{a1, port(4567, port::protocol::tcp), a2, + port(80, port::protocol::tcp)} + << enum_value("tcp") << next_string(10) + << std::chrono::duration_cast(3140ms) + << next_count() << next_count() << next_string(5) << true + << false << next_count() << next_string(10) << next_count() + << next_count() << next_count() << next_count() + << set({next_string(10), next_string(10)}); + break; + } + case 3: { + table m; + for (int i = 0; i < 100; ++i) { + set s; + for (int j = 0; j < 10; ++j) + s.insert(next_string(5)); + m[next_string(15)] = std::move(s); + } + add_to(result) << next_timestamp() << std::move(m); + break; + } + default: { + std::cerr << "event type must be 1, 2, or 3; got " << event_type << '\n'; + throw std::logic_error("invalid event type"); + } + } + return data{std::move(result)}; +} + +int main(int argc, char** argv) { + caf::init_global_meta_objects(); + configuration::init_global_state(); + run_streaming_benchmark(); + benchmark::Initialize(&argc, argv); + if (benchmark::ReportUnrecognizedArguments(argc, argv)) { + return EXIT_FAILURE; + } else { + benchmark::RunSpecifiedBenchmarks(); + return EXIT_SUCCESS; + } +} diff --git a/tests/micro-benchmark/src/routing-table.cc b/tests/micro-benchmark/src/routing-table.cc new file mode 100644 index 00000000..f330fa4f --- /dev/null +++ b/tests/micro-benchmark/src/routing-table.cc @@ -0,0 +1,398 @@ +#include "broker/alm/multipath.hh" +#include "broker/alm/routing_table.hh" +#include "broker/endpoint.hh" + +#include + +#include +#include +#include + +using broker::endpoint_id; +using broker::alm::vector_timestamp; + +namespace { + +struct linear_routing_table_row { + using path_type = std::vector; + + using versioned_path_type = std::pair; + + endpoint_id id; + + caf::actor hdl; + + std::vector versioned_paths; + + linear_routing_table_row() = default; + linear_routing_table_row(linear_routing_table_row&&) = default; + linear_routing_table_row(const linear_routing_table_row&) = default; + linear_routing_table_row& operator=(linear_routing_table_row&&) = default; + linear_routing_table_row& operator=(const linear_routing_table_row&) = default; + + explicit linear_routing_table_row(endpoint_id id) : id(std::move(id)) { + versioned_paths.reserve(32); + } + + linear_routing_table_row(endpoint_id id, caf::actor hdl) + : id(std::move(id)), hdl(std::move(hdl)) { + versioned_paths.reserve(32); + } +}; + +struct linear_routing_table { + std::vector rows; + + const std::vector* shortest_path(const endpoint_id& peer) const { + auto pred = [&peer](const auto& row) { return row.id == peer; }; + if (auto i = std::find_if(rows.begin(), rows.end(), pred); i != rows.end()) + return std::addressof(i->versioned_paths.front().first); + else + return nullptr; + } + + linear_routing_table_row* find_row(const endpoint_id& peer) { + auto pred = [&peer](const auto& row) { return row.id == peer; }; + if (auto i = std::find_if(rows.begin(), rows.end(), pred); i != rows.end()) + return std::addressof(*i); + else + return nullptr; + } + + linear_routing_table_row& operator[](const endpoint_id& peer) { + auto pred = [&peer](const auto& row) { return row.id == peer; }; + if (auto i = std::find_if(rows.begin(), rows.end(), pred); + i != rows.end()) { + return *i; + } else { + rows.emplace_back(peer); + return rows.back(); + } + } +}; + +auto shortest_path(const linear_routing_table& tbl, const endpoint_id& peer) { + return tbl.shortest_path(peer); +} + +struct sorted_linear_routing_table { + std::vector rows; + + struct row_less_t { + bool operator()(const endpoint_id& x, + const linear_routing_table_row& y) const noexcept { + return x < y.id; + } + + bool operator()(const linear_routing_table_row& x, + const endpoint_id& y) const noexcept { + return x.id < y; + } + }; + + static constexpr auto row_less = row_less_t{}; + + const std::vector* shortest_path(const endpoint_id& peer) const { + if (auto i = std::lower_bound(rows.begin(), rows.end(), peer, row_less); + i != rows.end() && i->id == peer) + return std::addressof(i->versioned_paths.front().first); + else + return nullptr; + } + + linear_routing_table_row* find_row(const endpoint_id& peer) { + if (auto i = std::lower_bound(rows.begin(), rows.end(), peer, row_less); + i != rows.end() && i->id == peer) + return std::addressof(*i); + else + return nullptr; + } + + linear_routing_table_row& operator[](const endpoint_id& peer) { + if (auto i = std::lower_bound(rows.begin(), rows.end(), peer, row_less); + i != rows.end()) { + if (i->id == peer) { + return *i; + } else { + return *rows.emplace(i, peer); + } + } else { + rows.emplace_back(peer); + return rows.back(); + } + } +}; + +auto shortest_path(const sorted_linear_routing_table& tbl, + const endpoint_id& peer) { + return tbl.shortest_path(peer); +} + +template +bool add_or_update_path_impl(TableType& tbl, const endpoint_id& peer, + std::vector&& path, + vector_timestamp&& ts) { + auto& row = tbl[peer]; + auto& paths = row.versioned_paths; + auto i = std::lower_bound(paths.begin(), paths.end(), path, + broker::alm::path_less); + if (i == paths.end()) { + paths.emplace_back(std::move(path), std::move(ts)); + return true; + } else if (i->first != path) { + paths.insert(i, std::make_pair(std::move(path), std::move(ts))); + return true; + } else { + if (i->second < ts) + i->second = std::move(ts); + return false; + } +} + +bool add_or_update_path(linear_routing_table& tbl, const endpoint_id& peer, + std::vector path, vector_timestamp ts) { + return add_or_update_path_impl(tbl, peer, std::move(path), std::move(ts)); +} + +bool add_or_update_path(sorted_linear_routing_table& tbl, + const endpoint_id& peer, std::vector path, + vector_timestamp ts) { + return add_or_update_path_impl(tbl, peer, std::move(path), std::move(ts)); +} + +// For the benchmarking, we use a simple tree structure topology: +// +// . . . .. . . . +// \ / \ / \ / \ / +// [n = 2] O --- O O --- O +// \ / \ / +// \ / \ / +// [n = 1] O ------- O +// \ / +// \ / +// \ / +// \ / +// [this node] O +// +// This results in a "network" with (0 > n > 11): +// - nodes: f(1) = 2, f(n) = f(n-1) + 2^n (2, 6, 14, ...) +// - connections: f(1) = 3, f(n) = f(n-1) + 2^(n-1)*3 (3, 9, 21, ...) + +using path_type = std::vector; + +struct id_generator { + using array_type = caf::hashed_node_id::host_id_type; + + id_generator() : rng(0xB7E57) { + // nop + } + + endpoint_id next() { + using value_type = array_type::value_type; + std::uniform_int_distribution<> d{0, + std::numeric_limits::max()}; + array_type result; + for (auto& x : result) + x = static_cast(d(rng)); + return caf::make_node_id(d(rng), result); + } + + std::minstd_rand rng; +}; + +template +class routing_table : public benchmark::Fixture { +public: + using table_type = TableType; + + routing_table() { + topologies.resize(10); + //v2_topologies.resize(10); + ids.resize(10); + receivers_10p.resize(10); + id_generator g; + auto on_new_node = [this](size_t level, const endpoint_id& leaf_id) { + ids[level].emplace_back(leaf_id); + }; + for (size_t index = 0; index < 10; ++index) + fill_tbl(topologies[index], g, {}, 0, index, on_new_node); + // for (size_t index = 0; index < 10; ++index) + // fill_tbl(v2_topologies[index], g, {}, 0, index, [](auto&&...) {}); + // Sanity checking +#ifndef NDEBUG + for (size_t index = 0; index < 10; ++index) + assert(ids[index].size() == (1 << (index + 1))); +#endif + std::vector flat_ids; + for (size_t index = 0; index < 10; ++index) { + auto& vec = ids[index]; + flat_ids.insert(flat_ids.end(), vec.begin(), vec.end()); + std::shuffle(flat_ids.begin(), flat_ids.end(), g.rng); + auto p10 = std::max(flat_ids.size() / 10, size_t{1}); + receivers_10p[index].assign(flat_ids.begin(), flat_ids.begin() + p10); + } + } + + // Topologies by level. + std::vector topologies; + + // IDs by level. + std::vector> ids; + + // Receivers for the different generate_paths setup (10 percent). + std::vector> receivers_10p; + + static auto make_vt(size_t n) { + broker::alm::vector_timestamp result; + result.resize(n); + return result; + } + + static auto make_vt(const path_type& p) { + return make_vt(p.size()); + } + + template + static void fill_tbl(Table& tbl, id_generator& g, const path_type& p, + size_t level, size_t max_level, OnNewNode on_new_node) { + auto next_path = [](const auto& src, auto id) { + auto result = src; + result.emplace_back(id); + return result; + }; + auto add_entry = [&](const auto& id, const path_type& new_path) { + add_or_update_path(tbl, id, new_path, make_vt(new_path)); + }; + // Add first leaf node. + auto leaf1_id = g.next(); + auto leaf1_path = next_path(p, leaf1_id); + on_new_node(level, leaf1_id); + add_entry(leaf1_id, leaf1_path); + // Add second leaf node. + auto leaf2_id = g.next(); + auto leaf2_path = next_path(p, leaf2_id); + on_new_node(level, leaf2_id); + add_entry(leaf2_id, leaf2_path); + // Add paths between leaf1 and leaf2. + add_entry(leaf2_id, next_path(leaf1_path, leaf2_id)); + add_entry(leaf1_id, next_path(leaf2_path, leaf1_id)); + // Enter next level. + if (level < max_level) { + fill_tbl(tbl, g, leaf1_path, level + 1, max_level, on_new_node); + fill_tbl(tbl, g, leaf2_path, level + 1, max_level, on_new_node); + } + } + + void add_or_update_path_bench(benchmark::State& state) { + auto max_level = static_cast(state.range(0)); + for (auto _ : state) { + id_generator g; + table_type tbl; + fill_tbl(tbl, g, {}, 0, max_level, [](auto&&...) {}); + benchmark::ClobberMemory(); + benchmark::DoNotOptimize(tbl); + } + } + + void shortest_path_bench(benchmark::State& state) { + auto index = static_cast(state.range(0)); + const auto& tbl = topologies[index]; + const auto& id = ids[index].front(); + for (auto _ : state) { + auto sp = shortest_path(tbl, id); + benchmark::DoNotOptimize(sp); + assert(sp != nullptr); + assert(sp->size() == index + 1); + } + } + + void generate_paths_1_bench(benchmark::State& state) { + auto index = static_cast(state.range(0)); + const auto& tbl = topologies[index]; + const auto& id = ids[index].front(); + std::vector receivers{id}; + for (auto _ : state) { + std::vector routes; + std::vector unreachables; + broker::alm::multipath::generate(receivers, tbl, routes, unreachables); + benchmark::ClobberMemory(); + benchmark::DoNotOptimize(routes); + assert(unreachables.empty()); + } + } + + void generate_paths_10_bench(benchmark::State& state) { + auto index = static_cast(state.range(0)); + const auto& tbl = topologies[index]; + const auto& receivers = receivers_10p[index]; + for (auto _ : state) { + std::vector routes; + std::vector unreachables; + broker::alm::multipath::generate(receivers, tbl, routes, unreachables); + benchmark::ClobberMemory(); + benchmark::DoNotOptimize(routes); + assert(unreachables.empty()); + } + } + + void erase_front_bench(benchmark::State& state) { + auto index = static_cast(state.range(0)); + const auto& id = ids[0].front(); + for (auto _ : state) { + state.PauseTiming(); + auto cpy = topologies[index]; + state.ResumeTiming(); + erase(cpy, id, [](auto&&...) {}); + benchmark::ClobberMemory(); + benchmark::DoNotOptimize(cpy); + } + } + + void erase_back_bench(benchmark::State& state) { + auto index = static_cast(state.range(0)); + const auto& id = ids[index].front(); + for (auto _ : state) { + state.PauseTiming(); + auto cpy = topologies[index]; + state.ResumeTiming(); + erase(cpy, id, [](auto&&...) {}); + benchmark::ClobberMemory(); + benchmark::DoNotOptimize(cpy); + } + } +}; + +} // namespace + +using default_routing_table = broker::alm::routing_table; + +#define BENCH_SETUP(TableType, Algorithm) \ + using TableType##_impl = routing_table; \ + BENCHMARK_DEFINE_F(TableType##_impl, Algorithm) \ + (benchmark::State & state) { \ + Algorithm##_bench(state); \ + } \ + BENCHMARK_REGISTER_F(TableType##_impl, Algorithm)->DenseRange(0, 9, 1); + +// -- adding entries to a routing table ---------------------------------------- + +BENCH_SETUP(default_routing_table, add_or_update_path) +BENCH_SETUP(default_routing_table, shortest_path) +BENCH_SETUP(default_routing_table, generate_paths_1) +BENCH_SETUP(default_routing_table, generate_paths_10) +BENCH_SETUP(default_routing_table, erase_front) +BENCH_SETUP(default_routing_table, erase_back) + +BENCH_SETUP(linear_routing_table, add_or_update_path) +BENCH_SETUP(linear_routing_table, shortest_path) +// BENCH_SETUP(linear_routing_table, generate_paths_1) +// BENCH_SETUP(linear_routing_table, generate_paths_10) +// BENCH_SETUP(linear_routing_table, erase_front) +// BENCH_SETUP(linear_routing_table, erase_back) + +BENCH_SETUP(sorted_linear_routing_table, add_or_update_path) +BENCH_SETUP(sorted_linear_routing_table, shortest_path) +// BENCH_SETUP(sorted_linear_routing_table, generate_paths_1) +// BENCH_SETUP(sorted_linear_routing_table, generate_paths_10) +// BENCH_SETUP(sorted_linear_routing_table, erase_front) +// BENCH_SETUP(sorted_linear_routing_table, erase_back) diff --git a/tests/micro-benchmark/src/serialization.cc b/tests/micro-benchmark/src/serialization.cc new file mode 100644 index 00000000..bc394a7b --- /dev/null +++ b/tests/micro-benchmark/src/serialization.cc @@ -0,0 +1,184 @@ +#include "main.hh" + +#include "broker/alm/multipath.hh" +#include "broker/endpoint.hh" +#include "broker/fwd.hh" +#include "broker/message.hh" + +#include + +#include +#include + +#include +#include +#include + +using namespace broker; + +namespace { + +using buffer_type = caf::binary_serializer::container_type; + +size_t max_size(size_t init) { + return init; +} + +template +size_t max_size(size_t init, const T& x, const Ts&... xs) { + auto combinator = [](size_t init, const buffer_type& buf) { + return std::max(init, buf.size()); + }; + return max_size(std::accumulate(x.begin(), x.end(), init, combinator), xs...); +} + +class serialization : public benchmark::Fixture { +public: + static constexpr size_t num_message_types = 3; + + template + using array_t = std::array; + + serialization() { + generator g; + dst = g.next_endpoint_id(); + for (size_t index = 0; index < num_message_types; ++index) { + dmsg[index] = make_data_message("/micro/benchmark", + g.next_data(index + 1)); + to_bytes(dmsg[index], dmsg_buf[index]); + nmsg[index] = make_node_message(dmsg[index], alm::multipath{dst}); + to_bytes(nmsg[index], nmsg_buf[index]); + legacy_nmsg[index] = legacy_node_message{dmsg[index], 20}; + to_bytes(legacy_nmsg[index], legacy_nmsg_buf[index]); + } + sink_buf.reserve(max_size(0, dmsg_buf, nmsg_buf, legacy_nmsg_buf)); + } + + template + void to_bytes(T&& what, buffer_type& storage) { + caf::binary_serializer sink{nullptr, storage}; + std::ignore = sink.apply(what); + } + + template + void from_bytes(const buffer_type& storage, T& what) { + caf::binary_deserializer source{nullptr, storage}; + std::ignore = source.apply(what); + } + + // Dummy node ID for a receiver. + endpoint_id dst; + + // One data message per type. + array_t dmsg; + + // Serialized versions of dmsg; + array_t dmsg_buf; + + // One node message per type. + array_t nmsg; + + // Serialized versions of dmsg; + array_t nmsg_buf; + + // One legacy node message per type. + array_t legacy_nmsg; + + // Serialized versions of legacy_dmsg; + array_t legacy_nmsg_buf; + + // A pre-allocated buffer for the benchmarks to serialize into. + buffer_type sink_buf; + + template + auto& get_msg(int signed_index) { + auto index = static_cast(signed_index); + if constexpr (std::is_same_v) { + return dmsg[index]; + } else if constexpr (std::is_same_v) { + return nmsg[index]; + } else { + static_assert(std::is_same_v); + return legacy_nmsg[index]; + } + } + + template + const buffer_type& get_buf(int signed_index) const { + auto index = static_cast(signed_index); + if constexpr (std::is_same_v) { + return dmsg_buf[index]; + } else if constexpr (std::is_same_v) { + return nmsg_buf[index]; + } else { + static_assert(std::is_same_v); + return legacy_nmsg_buf[index]; + } + } + + template + void run_serialization_bench(benchmark::State& state) { + const auto& msg = get_msg(state.range(0)); + caf::binary_serializer sink{nullptr, sink_buf}; + for (auto _ : state) { + sink.seek(0); + std::ignore = sink.apply(msg); + benchmark::DoNotOptimize(sink_buf); + } + } + + template + void run_deserialization_bench(benchmark::State& state) { + const auto& buf = get_buf(state.range(0)); + for (auto _ : state) { + T msg; + caf::binary_deserializer source{nullptr, buf}; + std::ignore = source.apply(msg); + benchmark::DoNotOptimize(msg); + } + } +}; + +} // namespace + +// -- saving and loading data messages ----------------------------------------- + +BENCHMARK_DEFINE_F(serialization, save_data_message)(benchmark::State& state) { + run_serialization_bench(state); +} + +BENCHMARK_REGISTER_F(serialization, save_data_message)->DenseRange(0, 2, 1); + +BENCHMARK_DEFINE_F(serialization, load_data_message)(benchmark::State& state) { + run_deserialization_bench(state); +} + +BENCHMARK_REGISTER_F(serialization, load_data_message)->DenseRange(0, 2, 1); + +// -- saving and loading node messages ----------------------------------------- + +BENCHMARK_DEFINE_F(serialization, save_node_message)(benchmark::State& state) { + run_serialization_bench(state); +} + +BENCHMARK_REGISTER_F(serialization, save_node_message)->DenseRange(0, 2, 1); + +BENCHMARK_DEFINE_F(serialization, load_node_message)(benchmark::State& state) { + run_deserialization_bench(state); +} + +BENCHMARK_REGISTER_F(serialization, load_node_message)->DenseRange(0, 2, 1); + +// -- saving and loading legacy node messages ---------------------------------- + +BENCHMARK_DEFINE_F(serialization, save_legacy_node_message)(benchmark::State& state) { + run_serialization_bench(state); +} + +BENCHMARK_REGISTER_F(serialization, save_legacy_node_message)->DenseRange(0, 2, 1); + +BENCHMARK_DEFINE_F(serialization, load_legacy_node_message)(benchmark::State& state) { + run_deserialization_bench(state); +} + +BENCHMARK_REGISTER_F(serialization, load_legacy_node_message)->DenseRange(0, 2, 1); diff --git a/tests/micro-benchmark/src/streaming.cc b/tests/micro-benchmark/src/streaming.cc new file mode 100644 index 00000000..c17e1492 --- /dev/null +++ b/tests/micro-benchmark/src/streaming.cc @@ -0,0 +1,427 @@ +#include "main.hh" + +#include "broker/message.hh" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +namespace { + +// -- print utility ------------------------------------------------------------ + +struct stats { + std::string_view benchmark_name; + std::vector runtime_measurements; // in seconds + + explicit stats(std::string_view name) : benchmark_name(name) { + // nop + } + + auto min() const noexcept { + if (runtime_measurements.empty()) + return 0.0; + else + return *std::min_element(runtime_measurements.begin(), + runtime_measurements.end()); + } + + auto max() const noexcept { + if (runtime_measurements.empty()) + return 0.0; + else + return *std::max_element(runtime_measurements.begin(), + runtime_measurements.end()); + } + + auto avg() const noexcept { + if (runtime_measurements.empty()) + return 0.0; + else + return std::accumulate(runtime_measurements.begin(), + runtime_measurements.end(), 0.0) + / runtime_measurements.size(); + } +}; + +struct setlw_t { + int n; +}; + +auto setlw(int n) { + return setlw_t{n}; +} + +std::ostream& operator<<(std::ostream& out, setlw_t token) { + return out << std::left << std::setw(token.n); +} + +struct setrw_t { + int n; +}; + +auto setrw(int n) { + return setrw_t{n}; +} + +std::ostream& operator<<(std::ostream& out, setrw_t token) { + return out << std::right << std::setw(token.n); +} + +struct padding { + size_t n; + char c; +}; + +std::ostream& operator<<(std::ostream& out, padding x) { + for (size_t i = 0; i < x.n; ++i) + out.put(x.c); + return out; +} + +using std::cout; + +struct layout { + static constexpr size_t col_size[] = {45, 15, 15, 15}; + + static size_t line_size() noexcept { + return std::accumulate(std::begin(col_size), std::end(col_size), size_t{0}); + } +}; + +void print_sep() { + cout << padding{layout::line_size(), '-'} << '\n'; +} + +void print_header() { + print_sep(); + cout << setlw(layout::col_size[0]) << "Benchmark" // Col 1. + << setrw(layout::col_size[1]) << "Time AVG" // Col 2. + << setrw(layout::col_size[2]) << "Time MIN" // Col 3. + << setrw(layout::col_size[3]) << "Time MAX" // Col 4. + << '\n'; + print_sep(); +} + +void print_result(const stats& xs) { + cout << setlw(layout::col_size[0]) << xs.benchmark_name // Col 1. + << setrw(layout::col_size[1] - 2) << xs.avg() << " s" // Col 2. + << setrw(layout::col_size[2] - 2) << xs.min() << " s" // Col 3. + << setrw(layout::col_size[3] - 2) << xs.max() << " s" // Col 4. + << '\n'; +} + +void print_footer() { + print_sep(); + cout.put('\n'); +} + +// -- synchronization ---------------------------------------------------------- + +// Drop-in replacement for std::barrier (based on the TS API as of 2020). +// TODO: tests/cpp/system/shutdown.cc also contains a barrier implementation, we +// could consolidate the two (or just wait for std::barrier). +class barrier { +public: + explicit barrier(ptrdiff_t num_threads) + : num_threads_(num_threads), count_(0) { + // nop + } + + void arrive_and_wait() { + std::unique_lock guard{mx_}; + auto new_count = ++count_; + if (new_count == num_threads_) { + cv_.notify_all(); + } else if (new_count > num_threads_) { + count_ = 1; + cv_.wait(guard, [this] { return count_.load() == num_threads_; }); + } else { + cv_.wait(guard, [this] { return count_.load() == num_threads_; }); + } + } + +private: + ptrdiff_t num_threads_; + std::mutex mx_; + std::atomic count_; + std::condition_variable cv_; +}; + +struct synchronizer { + using clock_type = std::chrono::steady_clock; + using time_point = clock_type::time_point; + using fractional_duration = std::chrono::duration; + time_point init; + time_point fin; + + stats& recorder; + + synchronizer(stats& recorder) : recorder(recorder) { + // nop + } + + auto runtime() const { + return std::chrono::duration_cast(fin - init); + } + + void start() { + init = clock_type::now(); + } + + void stop() { + fin = clock_type::now(); + recorder.runtime_measurements.emplace_back(runtime().count()); + } +}; + +// -- worker code -------------------------------------------------------------- + +template +struct impl { + // -- member types ----------------------------------------------------------- + + using value_type = ValueType; + + // -- consumer --------------------------------------------------------------- + + struct consumer_state { + consumer_state(caf::event_based_actor* self, synchronizer* sync, size_t num) + : self(self), sync(sync), num(num) { + // nop + } + + caf::event_based_actor* self; + + synchronizer* sync; + + size_t num; + + static inline const char* name = "broker.consumer"; + + caf::behavior make_behavior() { + return { + [this](caf::stream in) { + self->unbecome(); + return caf::attach_stream_sink( + self, in, + // Initializer. + [](size_t& received) { received = 0; }, + // Processing step. + [](size_t& received, value_type) { ++received; }, + // Finalizer. + [this]([[maybe_unused]] size_t& received, const caf::error&) { + assert(num == received); + self->quit(); + sync->stop(); + }); + }, + }; + } + }; + + using consumer_actor = caf::stateful_actor; + + // -- producer --------------------------------------------------------------- + + struct producer_state { + producer_state(caf::event_based_actor* self, synchronizer* sync, + caf::actor consumer, size_t num, value_type msg) + : self(self), + sync(sync), + consumer(std::move(consumer)), + num(num), + msg(std::move(msg)) { + // nop + } + + caf::event_based_actor* self; + synchronizer* sync; + caf::actor consumer; + size_t num; + value_type msg; + + static inline const char* name = "broker.producer"; + + caf::behavior make_behavior() { + sync->start(); + caf::attach_stream_source( + self, std::move(consumer), + // Initializer. + [](size_t& shipped) { shipped = 0; }, + // Generator. + [this](size_t& shipped, caf::downstream& out, size_t hint) { + if (auto n = std::min(hint, num - shipped); n > 0) { + for (size_t pushed = 0; pushed < n; ++pushed) + out.push(msg); + shipped += n; + } + }, + // Done predicate. + [this](const size_t& shipped) { return shipped >= num; }); + return {}; + } + }; + + using producer_actor = caf::stateful_actor; +}; + +// -- CAF setup ---------------------------------------------------------------- + +struct config : caf::actor_system_config { + config() { + config_file_path.clear(); + set("caf.scheduler.max-threads", 2); + set("caf.logger.console.verbosity", "quiet"); + set("caf.logger.file.verbosity", "quiet"); + } +}; + +// -- benchmarking utility ----------------------------------------------------- + +template +void run_single_system(std::string benchmark_name, size_t num, ValueType msg) { + using impl_type = impl; + using consumer_actor = typename impl_type::consumer_actor; + using producer_actor = typename impl_type::producer_actor; + stats recorder{benchmark_name}; + for (int i = 0; i < 10; ++i) { + config cfg; + synchronizer sync{recorder}; + caf::actor_system sys{cfg}; + auto consumer = sys.spawn(&sync, num); + sys.spawn(&sync, consumer, num, msg); + } + print_result(recorder); +} + +#if defined(CAF_MACOS) || defined(CAF_IOS) || defined(CAF_BSD) +constexpr int no_sigpipe_io_flag = 0; +#else +constexpr int no_sigpipe_io_flag = MSG_NOSIGNAL; +#endif + +std::pair make_socket_pair() { + int sockets[2]; + if (auto res = socketpair(AF_UNIX, SOCK_STREAM, 0, sockets); res != 0) { + perror("socketpair"); + abort(); + } else { + return {sockets[0], sockets[1]}; + } +} + +// Note: this setup uses some private CAF APIs and is bound to break eventually! +// Unfortunately, there's no way to set this up with the public API at the +// moment. Once there is an officially supported way we'll switch to that API +// instead. +template +void run_distributed(std::string benchmark_name, size_t num, ValueType msg) { + using impl_type = impl; + using consumer_actor = typename impl_type::consumer_actor; + using producer_actor = typename impl_type::producer_actor; + stats recorder{benchmark_name}; + for (int i = 0; i < 10; ++i) { + synchronizer sync{recorder}; + // Note: we use tie instead of `auto [fd0, fd1]` for lambda captures. + int fd0; + int fd1; + std::tie(fd0, fd1) = make_socket_pair(); + // Spin up the "server" with a consumer. + auto t0 = std::thread{[fd0, sptr{&sync}, num] { + config cfg; + cfg.load(); + cfg.set("caf.middleman.workers", 0); + caf::actor_system sys{cfg}; + auto consumer = sys.spawn(sptr, num); + std::set dummy; + auto scribe = sys.middleman().backend().new_scribe(fd0); + auto basp = sys.middleman().get_named_broker("BASP"); + auto cptr = caf::actor_cast(consumer); + caf::anon_send(basp, caf::publish_atom_v, std::move(scribe), + uint16_t{1234}, std::move(cptr), std::move(dummy)); + }}; + // Spin up the "client" with the producer. + auto t1 = std::thread{[fd1, sptr{&sync}, num, msg] { + config cfg; + cfg.load(); + caf::actor_system sys{cfg}; + std::set dummy; + auto scribe = sys.middleman().backend().new_scribe(fd1); + auto basp = sys.middleman().get_named_broker("BASP"); + caf::actor consumer; + { + caf::scoped_actor self{sys}; + self + ->request(basp, caf::infinite, caf::connect_atom_v, std::move(scribe), + uint16_t{1234}) + .receive( + [&consumer](caf::node_id&, caf::strong_actor_ptr& ptr, + std::set&) { + if (ptr) { + consumer = caf::actor_cast(ptr); + } else { + std::cerr << "*** CAF returned an invalid consumer handle\n"; + abort(); + } + }, + [](caf::error& err) { + std::cerr << "*** failed to connect to the consumer: " + << caf::to_string(err) << '\n'; + abort(); + }); + } + sys.spawn(sptr, consumer, num, msg); + }}; + t0.join(); + t1.join(); + } + print_result(recorder); +} + +} // namespace + +void run_streaming_benchmark() { + using namespace broker; + constexpr size_t n = 100'000; + cout << std::fixed << std::setprecision(6); // Microsecond resolution. + generator g; + auto nid = g.next_endpoint_id(); + auto uid = g.next_uuid(); + print_header(); + // TODO: allow users to specify the index range. We currently only enable + // message type 1 since it's reasonably fast. + for (size_t index = 0; index < 1; ++index) { + auto suffixed = [index](std::string str) { + str += std::to_string(index); + return str; + }; + auto dmsg = make_data_message("/micro/benchmark", g.next_data(index + 1)); + auto nmsg = make_node_message(dmsg, alm::multipath{nid}); + auto lmsg = legacy_node_message{dmsg, 20}; + auto umsg = uuid_node_message{dmsg, uuid_multipath{uid, true}}; + run_single_system(suffixed("/single-system/data-message/"), n, dmsg); + run_single_system(suffixed("/single-system/node-message/"), n, nmsg); + run_single_system(suffixed("/single-system/legacy-node-message/"), n, lmsg); + run_single_system(suffixed("/single-system/uuid-node-message/"), n, umsg); + run_distributed(suffixed("/distributed/data-message/"), n, dmsg); + run_distributed(suffixed("/distributed/node-message/"), n, nmsg); + run_distributed(suffixed("/distributed/legacy-node-message/"), n, lmsg); + run_distributed(suffixed("/distributed/uuid-node-message/"), n, umsg); + } + print_footer(); +} diff --git a/tests/python/communication.py b/tests/python/communication.py index 42a6a9e3..020f7d4f 100644 --- a/tests/python/communication.py +++ b/tests/python/communication.py @@ -14,9 +14,11 @@ def test_ping(self): broker.Endpoint() as ep2, \ ep1.make_subscriber("/test") as s1, \ ep2.make_subscriber("/test") as s2: - port = ep1.listen("127.0.0.1", 0) - ep2.peer("127.0.0.1", port, 1.0) + self.assertTrue(ep2.peer("127.0.0.1", port, 1.0)) + + ep1.await_peer(ep2.node_id()) + ep2.await_peer(ep1.node_id()) # --peer-end @@ -50,7 +52,10 @@ def test_messages(self): ep1.make_subscriber("/test") as s1: port = ep1.listen("127.0.0.1", 0) - ep2.peer("127.0.0.1", port, 1.0) + self.assertTrue(ep2.peer("127.0.0.1", port, 1.0)) + + ep1.await_peer(ep2.node_id()) + ep2.await_peer(ep1.node_id()) msg0 = ("/test/1", ()) ep2.publish(*msg0) @@ -75,7 +80,10 @@ def test_publisher(self): ep2.make_publisher("/test") as p2: port = ep1.listen("127.0.0.1", 0) - ep2.peer("127.0.0.1", port, 1.0) + self.assertTrue(ep2.peer("127.0.0.1", port, 1.0)) + + ep1.await_peer(ep2.node_id()) + ep2.await_peer(ep1.node_id()) p2.publish([1, 2, 3]) p2.publish_batch(["a", "b", "c"], [True, False]) @@ -95,22 +103,29 @@ def test_status_subscriber(self): ep2.make_status_subscriber(True) as es2: port = ep1.listen("127.0.0.1", 0) - ep2.peer("127.0.0.1", port, 1.0) - st1 = es1.get() - st2 = es2.get() - # st1.code() == broker.SC.PeerAdded, st2.code() == broker.SC.PeerAdded + self.assertEqual(ep2.peer("127.0.0.1", port, 1.0), True) + + ep1.await_peer(ep2.node_id()) + ep2.await_peer(ep1.node_id()) + + st1 = es1.get(2) + st2 = es2.get(2) + # st1.code() == [broker.SC.EndpointDiscovered, broker.SC.PeerAdded] + # st2.code() == [broker.SC.EndpointDiscovered, broker.SC.PeerAdded] # --status-end - self.assertEqual(st1.code(), broker.SC.PeerAdded) - self.assertEqual(st1.context().network.get().address, "127.0.0.1") - self.assertEqual(st2.code(), broker.SC.PeerAdded) - self.assertEqual(st2.context().network.get().address, "127.0.0.1") + self.assertEqual(len(st1), 2) + self.assertEqual(st1[0].code(), broker.SC.EndpointDiscovered) + self.assertEqual(st1[1].code(), broker.SC.PeerAdded) + self.assertEqual(len(st2), 2) + self.assertEqual(st2[0].code(), broker.SC.EndpointDiscovered) + self.assertEqual(st2[1].code(), broker.SC.PeerAdded) + self.assertEqual(st2[1].context().network.get().address, "127.0.0.1") def test_status_subscriber_error(self): # --error-start with broker.Endpoint() as ep1, \ ep1.make_status_subscriber() as es1: - r = ep1.peer("127.0.0.1", 1947, 0.0) # Try unavailable port, no retry self.assertEqual(r, False) # Not shown in docs. st1 = es1.get() diff --git a/tests/python/forwarding.py b/tests/python/forwarding.py index 08ca047c..a113540f 100644 --- a/tests/python/forwarding.py +++ b/tests/python/forwarding.py @@ -39,10 +39,13 @@ def cfg(opts): return ((ep1, ep2, ep3, ep4), (s1, s2, s3, s4)) class TestCommunication(unittest.TestCase): - def test_two_hops(self): - # Two hops that are subscribed, so they'll forward. + def test_two_subscribed_hops(self): + # Two hops that are subscribed. ((ep1, ep2, ep3, ep4), (s1, s2, s3, s4)) = setup_peers() + ep1.await_peer(ep4.node_id()) + ep4.await_peer(ep1.node_id()) + ep1.publish("/test/foo", "Foo!") ep4.publish("/test/bar", "Bar!") @@ -52,13 +55,13 @@ def test_two_hops(self): self.assertEqual(x, ('/test/bar', 'Bar!')) cleanup((ep1, ep2, ep3, ep4), (s1, s2, s3, s4)) - def test_two_hops_with_forward(self): - # Two hops that are not subscribed, but configured to forward. + def test_two_unsubscribed_hops(self): + # Two hops that are not subscribed, but still forward due to the source + # routing. ((ep1, ep2, ep3, ep4), (s1, s2, s3, s4)) = setup_peers(create_s2=False, create_s3=False) - ep2.forward("/test/"); - ep3.forward("/test/"); - time.sleep(1) # give time to take effect. + ep1.await_peer(ep4.node_id()) + ep4.await_peer(ep1.node_id()) ep1.publish("/test/foo", "Foo!") ep4.publish("/test/bar", "Bar!") @@ -69,42 +72,6 @@ def test_two_hops_with_forward(self): self.assertEqual(x, ('/test/bar', 'Bar!')) cleanup((ep1, ep2, ep3, ep4), (s1, s2, s3, s4)) - def test_two_hops_forwarding_disabled(self): - # Two hops that are subscribed, so they would forward but we disable. - no_forward = broker.BrokerOptions() - no_forward.forward = False - - ((ep1, ep2, ep3, ep4), (s1, s2, s3, s4)) = setup_peers(opts2=no_forward) - - ep1.publish("/test/foo", "Foo!") # Shouldn't arrive - x = s4.get(1.0) - self.assertEqual(x, None) - cleanup((ep1, ep2, ep3, ep4), (s1, s2, s3, s4)) - - def test_two_hops_without_forward(self): - # Two hops that are not subscribed, and hence don't forward. - ((ep1, ep2, ep3, ep4), (s1, s2, s3, s4)) = setup_peers(create_s2=False, create_s3=False) - - ep1.publish("/test/foo", "Foo!") - x = s4.get(1.0) - cleanup((ep1, ep2, ep3, ep4), (s1, s2, s3, s4)) - self.assertEqual(x, None) - - def test_two_hops_ttl(self): - ttl1 = broker.BrokerOptions() - ttl1.ttl = 2 - ((ep1, ep2, ep3, ep4), (s1, s2, s3, s4)) = setup_peers(opts1=ttl1) - - ep1.publish("/test/foo", "Foo!") - - x = s2.get(1.0) - self.assertEqual(x, ('/test/foo', 'Foo!')) - x = s3.get(1.0) - self.assertEqual(x, ('/test/foo', 'Foo!')) - x = s4.get(1.0) - self.assertEqual(x, None) # Doesn't get here anymore. - cleanup((ep1, ep2, ep3, ep4), (s1, s2, s3, s4)) - if __name__ == '__main__': #TestCommunication().test_two_hops() unittest.main(verbosity=3) diff --git a/tests/python/store.py b/tests/python/store.py index e53c8b04..c3585add 100644 --- a/tests/python/store.py +++ b/tests/python/store.py @@ -5,7 +5,9 @@ import broker -def create_stores(): +from inspect import currentframe, getframeinfo + +def create_stores(self): ep0 = broker.Endpoint() ep1 = broker.Endpoint() ep2 = broker.Endpoint() @@ -34,6 +36,40 @@ def create_stores(): return (ep0, ep1, ep2, m, c1, c2) +# Runs a test with one master and two clones +# --tri-setup-start +def run_tri_setup(self, f): + with broker.Endpoint() as ep0, \ + broker.Endpoint() as ep1, \ + broker.Endpoint() as ep2, \ + ep0.attach_master("test", broker.Backend.Memory) as m, \ + ep1.attach_clone("test") as c1, \ + ep2.attach_clone("test") as c2: + + # connect the nodes + port = ep0.listen("127.0.0.1", 0) + self.assertTrue(ep1.peer("127.0.0.1", port)) + self.assertTrue(ep2.peer("127.0.0.1", port)) + + # wait until the nodes are fully connected + self.assertTrue(ep0.await_peer(ep1.node_id())) + self.assertTrue(ep1.await_peer(ep0.node_id())) + self.assertTrue(ep0.await_peer(ep2.node_id())) + self.assertTrue(ep2.await_peer(ep0.node_id())) + self.assertTrue(ep1.await_peer(ep2.node_id())) + self.assertTrue(ep2.await_peer(ep1.node_id())) + + # wait until the clones have connected to the master + self.assertTrue(c1.await_idle()) + self.assertTrue(c2.await_idle()) + + f(m, c1, c2) +# --tri-setup-end + +def await_idle(self, *argv): + for store in argv: + self.assertTrue(store.await_idle()); + class TestStore(unittest.TestCase): def test_basic(self): # --master-start @@ -48,176 +84,209 @@ def test_basic(self): self.assertEqual(m.name(), "test") def test_from_master(self): - (ep0, ep1, ep2, m, c1, c2) = create_stores() - - v1 = "A" - v2 = {"A", "B", "C"} - v3 = {1: "A", 2: "B", 3: "C"} - v4 = ("A", "B", "C") - - m.put("a", v1) - m.put("b", v2) - m.put("c", v3) - m.put("d", v4) - self.assertEqual(c2.put_unique("e", "first"), True) - self.assertEqual(c2.put_unique("e", "second"), False) - self.assertEqual(c2.put_unique("e", "third"), False) - time.sleep(.5) - - def checkAccessors(x): - self.assertEqual(x.get("a"), v1) - self.assertEqual(x.get("b"), v2) - self.assertEqual(x.get("c"), v3) - self.assertEqual(x.get("d"), v4) - self.assertEqual(x.get("e"), "first") - self.assertEqual(x.get("X"), None) - self.assertEqual(x.exists("d"), True) - self.assertEqual(x.exists("X"), False) - self.assertEqual(x.get_index_from_value("b", "A"), True) - self.assertEqual(x.get_index_from_value("b", "X"), False) - self.assertEqual(x.get_index_from_value("c", 1), "A") - self.assertEqual(x.get_index_from_value("c", 10), None) - self.assertEqual(x.get_index_from_value("d", 1), "B") - self.assertEqual(x.get_index_from_value("d", 10), None) - self.assertEqual(x.keys(), {'a', 'b', 'c', 'd', 'e'}) - - checkAccessors(m) - checkAccessors(c1) - checkAccessors(c2) - - v5 = 5 - m.put("e", v5) - m.put("f", v5) - m.put("g", v5, 0.1) - m.put("h", v5, 2) - m.put("str", "b") - m.put("vec", (1, 2)) - m.put("set", set([1, 2])) - m.put("table", {1: "A", "2": "C"}) - - # --ops-start - m.increment("e", 1) - m.decrement("f", 1) - m.append("str", "ar") - m.insert_into("set", 3) - m.remove_from("set", 1) - m.insert_into("table", 3, "D") - m.remove_from("table", 1) - m.push("vec", 3) - m.push("vec", 4) - m.pop("vec") - # --ops-end - - time.sleep(.5) - - def checkModifiers(x): - self.assertEqual(x.get("e"), v5 + 1) - self.assertEqual(x.get("f"), v5 - 1) - self.assertEqual(x.get("g"), None) # Expired - self.assertEqual(x.get("h"), v5) # Not Expired - self.assertEqual(x.get("str"), "bar") - self.assertEqual(x.get("set"), set([2, 3])) - self.assertEqual(x.get("table"), {3: "D", "2": "C"}) - self.assertEqual(x.get("vec"), (1, 2, 3)) - - checkModifiers(m) - checkModifiers(c1) - checkModifiers(c2) - - m.clear() - time.sleep(.5) - self.assertEqual(m.keys(), set()) - self.assertEqual(c1.keys(), set()) - self.assertEqual(c2.keys(), set()) - - ep1.shutdown() - ep2.shutdown() - - def test_from_clones(self): - (ep0, ep1, ep2, m, c1, c2) = create_stores() - - v1 = "A" - v2 = {"A", "B", "C"} - v3 = {1: "A", 2: "B", 3: "C"} - v4 = ("A", "B", "C") - - c1.put("a", v1) - c1.put("b", v2) - c2.put("c", v3) - c2.put("d", v4) - self.assertEqual(c2.put_unique("e", "first"), True) - self.assertEqual(c2.put_unique("e", "second"), False) - self.assertEqual(c2.put_unique("e", "third"), False) - time.sleep(.5) - - def checkAccessors(x): - self.assertEqual(x.get("a"), v1) - self.assertEqual(x.get("b"), v2) - self.assertEqual(x.get("c"), v3) - self.assertEqual(x.get("d"), v4) - self.assertEqual(x.get("e"), "first") - self.assertEqual(x.get("X"), None) - self.assertEqual(x.exists("d"), True) - self.assertEqual(x.exists("X"), False) - self.assertEqual(x.get_index_from_value("b", "A"), True) - self.assertEqual(x.get_index_from_value("b", "X"), False) - self.assertEqual(x.get_index_from_value("c", 1), "A") - self.assertEqual(x.get_index_from_value("c", 10), None) - self.assertEqual(x.get_index_from_value("d", 1), "B") - self.assertEqual(x.get_index_from_value("d", 10), None) - self.assertEqual(x.keys(), {'a', 'b', 'c', 'd', 'e'}) - - checkAccessors(m) - checkAccessors(c1) - checkAccessors(c2) - - v5 = 5 - c1.put("e", v5) - c2.put("f", v5) - c1.put("g", v5, 0.1) - c2.put("h", v5, 2) - m.put("str", "b") - m.put("vec", [1, 2]) - m.put("set", set([1, 2])) - m.put("table", {1: "A", "2": "C"}) - - time.sleep(.5) - c2.increment("e", 1) - c1.decrement("f", 1) - c2.append("str", "ar") - c1.insert_into("set", 3) - c2.remove_from("set", 1) - c1.insert_into("table", 3, "D") - c2.remove_from("table", 1) - c1.push("vec", 3) - time.sleep(.5) - c2.push("vec", 4) - c2.pop("vec") - time.sleep(.5) - - def checkModifiers(x): - self.assertEqual(x.get("e"), v5 + 1) - self.assertEqual(x.get("f"), v5 - 1) - self.assertEqual(x.get("g"), None) # Expired - self.assertEqual(x.get("h"), v5) # Not Expired - self.assertEqual(x.get("str"), "bar") - self.assertEqual(x.get("set"), set([2, 3])) - self.assertEqual(x.get("table"), {3: "D", "2": "C"}) - self.assertEqual(x.get("vec"), (1, 2, 3)) - - checkModifiers(m) - checkModifiers(c1) - checkModifiers(c2) - - m.clear() - time.sleep(.5) - self.assertEqual(m.keys(), set()) - self.assertEqual(c1.keys(), set()) - self.assertEqual(c2.keys(), set()) - - ep1.shutdown() - ep2.shutdown() + def impl(m, c1, c2): + v1 = "A" + v2 = {"A", "B", "C"} + v3 = {1: "A", 2: "B", 3: "C"} + v4 = ("A", "B", "C") + + m.put("a", v1) + m.put("b", v2) + m.put("c", v3) + m.put("d", v4) + self.assertEqual(c2.put_unique("e", "first"), True) + self.assertEqual(c2.put_unique("e", "second"), False) + self.assertEqual(c2.put_unique("e", "third"), False) + time.sleep(.5) + + def checkAccessors(x): + self.assertEqual(x.get("a"), v1) + self.assertEqual(x.get("b"), v2) + self.assertEqual(x.get("c"), v3) + self.assertEqual(x.get("d"), v4) + self.assertEqual(x.get("e"), "first") + self.assertEqual(x.get("X"), None) + self.assertEqual(x.exists("d"), True) + self.assertEqual(x.exists("X"), False) + self.assertEqual(x.get_index_from_value("b", "A"), True) + self.assertEqual(x.get_index_from_value("b", "X"), False) + self.assertEqual(x.get_index_from_value("c", 1), "A") + self.assertEqual(x.get_index_from_value("c", 10), None) + self.assertEqual(x.get_index_from_value("d", 1), "B") + self.assertEqual(x.get_index_from_value("d", 10), None) + self.assertEqual(x.keys(), {'a', 'b', 'c', 'd', 'e'}) + + checkAccessors(m) + checkAccessors(c1) + checkAccessors(c2) + + v5 = 5 + m.put("e", v5) + m.put("f", v5) + m.put("g", v5, 0.1) + m.put("h", v5, 2) + m.put("str", "b") + m.put("vec", (1, 2)) + m.put("set", set([1, 2])) + m.put("table", {1: "A", "2": "C"}) + + # --ops-start + m.increment("e", 1) + m.decrement("f", 1) + m.append("str", "ar") + m.insert_into("set", 3) + m.remove_from("set", 1) + m.insert_into("table", 3, "D") + m.remove_from("table", 1) + m.push("vec", 3) + m.push("vec", 4) + m.pop("vec") + # --ops-end + + time.sleep(0.15) # Make sure 'g' expires. + + await_idle(self, c2, c1, m) + + def checkModifiers(x): + self.assertEqual(x.get("e"), v5 + 1) + self.assertEqual(x.get("f"), v5 - 1) + self.assertEqual(x.get("g"), None) # Expired + self.assertEqual(x.get("h"), v5) # Not Expired + self.assertEqual(x.get("str"), "bar") + self.assertEqual(x.get("set"), set([2, 3])) + self.assertEqual(x.get("table"), {3: "D", "2": "C"}) + self.assertEqual(x.get("vec"), (1, 2, 3)) + + checkModifiers(m) + checkModifiers(c1) + checkModifiers(c2) + + m.clear() + + await_idle(self, c2, c1, m) + + self.assertEqual(m.keys(), set()) + self.assertEqual(c1.keys(), set()) + self.assertEqual(c2.keys(), set()) + + run_tri_setup(self, impl) + + def test_from_one_clone(self): + with broker.Endpoint() as ep0, \ + broker.Endpoint() as ep1, \ + ep0.attach_master("test", broker.Backend.Memory) as m, \ + ep1.attach_clone("test") as c1: + + port = ep0.listen("127.0.0.1", 0) + + self.assertTrue(ep1.peer("127.0.0.1", port)) + + ep0.await_peer(ep1.node_id()) + ep1.await_peer(ep0.node_id()) + + v1 = "A" + v2 = {"A", "B", "C"} + v3 = {1: "A", 2: "B", 3: "C"} + v4 = ("A", "B", "C") + + c1.put("a", v1) + c1.put("b", v2) + + await_idle(self, c1, m) + + self.assertEqual(c1.put_unique("e", "first"), True) + + + def test_from_two_clones(self): + def impl(m, c1, c2): + + v1 = "A" + v2 = {"A", "B", "C"} + v3 = {1: "A", 2: "B", 3: "C"} + v4 = ("A", "B", "C") + + c1.put("a", v1) + c1.put("b", v2) + c2.put("c", v3) + c2.put("d", v4) + self.assertEqual(c2.put_unique("e", "first"), True) + self.assertEqual(c2.put_unique("e", "second"), False) + self.assertEqual(c2.put_unique("e", "third"), False) + + await_idle(self, c1, c2) + + def checkAccessors(x): + self.assertEqual(x.get("a"), v1) + self.assertEqual(x.get("b"), v2) + self.assertEqual(x.get("c"), v3) + self.assertEqual(x.get("d"), v4) + self.assertEqual(x.get("e"), "first") + self.assertEqual(x.get("X"), None) + self.assertEqual(x.exists("d"), True) + self.assertEqual(x.exists("X"), False) + self.assertEqual(x.get_index_from_value("b", "A"), True) + self.assertEqual(x.get_index_from_value("b", "X"), False) + self.assertEqual(x.get_index_from_value("c", 1), "A") + self.assertEqual(x.get_index_from_value("c", 10), None) + self.assertEqual(x.get_index_from_value("d", 1), "B") + self.assertEqual(x.get_index_from_value("d", 10), None) + self.assertEqual(x.keys(), {'a', 'b', 'c', 'd', 'e'}) + + checkAccessors(m) + checkAccessors(c1) + checkAccessors(c2) + + v5 = 5 + c1.put("e", v5) + c2.put("f", v5) + c1.put("g", v5, 0.1) + c2.put("h", v5, 20) + m.put("str", "b") + m.put("vec", [1, 2]) + m.put("set", set([1, 2])) + m.put("table", {1: "A", "2": "C"}) + + await_idle(self, c1, c2, m) + + c2.increment("e", 1) + c1.decrement("f", 1) + c2.append("str", "ar") + c1.insert_into("set", 3) + c2.remove_from("set", 1) + c1.insert_into("table", 3, "D") + c2.remove_from("table", 1) + c1.push("vec", 3) + await_idle(self, c1, c2, m) + + c2.push("vec", 4) + c2.pop("vec") + + await_idle(self, c1, c2, m) + + def checkModifiers(x): + self.assertEqual(x.get("e"), v5 + 1) + self.assertEqual(x.get("f"), v5 - 1) + self.assertEqual(x.get("g"), None) # Expired + self.assertEqual(x.get("h"), v5) # Not Expired + self.assertEqual(x.get("str"), "bar") + self.assertEqual(x.get("set"), set([2, 3])) + self.assertEqual(x.get("table"), {3: "D", "2": "C"}) + self.assertEqual(x.get("vec"), (1, 2, 3)) + + checkModifiers(m) + checkModifiers(c1) + checkModifiers(c2) + + m.clear() + + await_idle(self, c1, c2, m) + + self.assertEqual(m.keys(), set()) + self.assertEqual(c1.keys(), set()) + self.assertEqual(c2.keys(), set()) + + run_tri_setup(self, impl) if __name__ == '__main__': unittest.main(verbosity=3)