From 7358b7d5a0d5eddd804ac99177a24b4e6fd68c20 Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Mon, 29 Feb 2016 19:38:42 -0800 Subject: [PATCH 01/67] Serialize writes --- tchannel/tornado/connection.py | 20 +++++++++++-- tchannel/tornado/util.py | 51 ---------------------------------- 2 files changed, 17 insertions(+), 54 deletions(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 234d7da6..b0781497 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -53,7 +53,6 @@ from ..messages.types import Types from .message_factory import build_raw_error_message from .message_factory import MessageFactory -from .util import chain from .tombstone import Cemetery log = logging.getLogger('tchannel') @@ -138,6 +137,9 @@ def __init__(self, connection, tchannel=None, direction=None): self.tchannel = tchannel self._close_cb = None + self._write_queue = tornado.queues.Queue() + self._drain_write_queue() + connection.set_close_callback(self._on_close) def set_close_callback(self, cb): @@ -337,7 +339,12 @@ def write(self, message): fragments = message_factory.fragment(message) - return chain(fragments, self._write) + for fragment in fragments: + future = self._write(fragment) + + # We're done writing the message once our last future + # resolves. + return future def _write(self, message): """Writes the given message up the wire. @@ -358,8 +365,15 @@ def _write(self, message): payload=payload ) body = frame.frame_rw.write(f, BytesIO()).getvalue() + return self._write_queue.put(body) - return self.connection.write(body) + @tornado.gen.coroutine + def _drain_write_queue(self): + """Ensures ordering of our writes.""" + + while not self.closed: + body = yield self._write_queue.get() + yield self.connection.write(body) def close(self): if not self.closed: diff --git a/tchannel/tornado/util.py b/tchannel/tornado/util.py index 3c2197e1..a2953a69 100644 --- a/tchannel/tornado/util.py +++ b/tchannel/tornado/util.py @@ -39,54 +39,3 @@ def get_arg(context, index): raise tornado.gen.Return(arg) else: raise TChannelError() - - -def chain(iterable, func): - """Apply an asynchronous function to a list of items in order. - - Where previously you would write - - .. code-block:: python - - @gen.coroutine - def bar(): - for thing in things: - yield async_operation(thing) - - You can instead write: - - .. code-block:: python - - future = chain(things, async_operation) - - This is most useful in coroutine that are called frequently, as this is - significantly faster. - - Returns a future that resolves when all of the given futures have - completed. - - If any future raises an exception, the remainder of the chain will not be - processed, and the exception is propagated to the returned future. - """ - - all_done_future = tornado.concurrent.Future() - - generator = iter(iterable) - - def handle(future): - if future.exception(): - all_done_future.set_exc_info(future.exc_info()) - else: - go() - - def go(): - try: - arg = generator.next() - except StopIteration: - all_done_future.set_result(None) - else: - tornado.ioloop.IOLoop.current().add_future(func(arg), handle) - - go() - - return all_done_future From 7300b2ff77e8119e78a556ef88ed26f4c1ea69ad Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Thu, 3 Mar 2016 15:39:36 -0800 Subject: [PATCH 02/67] Add a failing test --- examples/sync/fanout/client.py | 59 ++++++++++++++++++++++++++++++++++ examples/sync/fanout/server.py | 44 +++++++++++++++++++++++++ tests/test_examples.py | 1 + 3 files changed, 104 insertions(+) create mode 100644 examples/sync/fanout/client.py create mode 100644 examples/sync/fanout/server.py diff --git a/examples/sync/fanout/client.py b/examples/sync/fanout/client.py new file mode 100644 index 00000000..4b441825 --- /dev/null +++ b/examples/sync/fanout/client.py @@ -0,0 +1,59 @@ +# Copyright (c) 2015 Uber Technologies, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import json + +from tchannel import thrift +from tchannel.sync import TChannel + +tchannel = TChannel('thrift-client') +service = thrift.load( + path='tests/data/idls/ThriftTest.thrift', + service='thrift-server', + hostport='localhost:54498', +) + + +def make_requests(): + + # Fan-out + futures = [tchannel.thrift( + request=service.ThriftTest.testString(thing="req"), + headers={ + 'req': 'header', + }, + ) for _ in xrange(20)] + + # Fan-in + for future in futures: + response = future.result() + + return response + + +resp = make_requests() + +assert resp.headers == { + 'resp': 'header', +} +assert resp.body == 'resp' * 100000 + +print resp.body[:4] +print json.dumps(resp.headers) diff --git a/examples/sync/fanout/server.py b/examples/sync/fanout/server.py new file mode 100644 index 00000000..aa5c83ea --- /dev/null +++ b/examples/sync/fanout/server.py @@ -0,0 +1,44 @@ +# Copyright (c) 2015 Uber Technologies, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from __future__ import absolute_import + +from tornado import gen, ioloop +from tchannel import TChannel, Response, thrift + +tchannel = TChannel('thrift-server', hostport='localhost:54498') +service = thrift.load('tests/data/idls/ThriftTest.thrift') + + +@tchannel.thrift.register(service.ThriftTest) +@gen.coroutine +def testString(request): + + assert request.headers == {'req': 'header'} + assert request.body.thing == 'req' + + return Response('resp' * 100000, headers={'resp': 'header'}) + + +tchannel.listen() + +print tchannel.hostport + +ioloop.IOLoop.current().start() diff --git a/tests/test_examples.py b/tests/test_examples.py index b9cfe359..01a77630 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -58,6 +58,7 @@ def popen(path, wait_for_listen=False): ('thrift', 'simple/thrift/'), ('bench', 'benchmark/thrift/'), ('guide', 'guide/keyvalue/keyvalue/'), + ('thrift', 'sync/fanout/'), ) ) def test_example(scheme, path): From bcc7239b7243f2fc7bf0eb522955af21def9e562 Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Fri, 4 Mar 2016 21:31:54 -0800 Subject: [PATCH 03/67] Add a workaround for handshake coordination --- tchannel/tornado/connection.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index b0781497..e1d3038a 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -138,7 +138,7 @@ def __init__(self, connection, tchannel=None, direction=None): self._close_cb = None self._write_queue = tornado.queues.Queue() - self._drain_write_queue() + self._draining = False connection.set_close_callback(self._on_close) @@ -243,6 +243,7 @@ def _loop(self): # # Must be started only after the handshake has been performed. self._loop_running = True + while not self.closed: message = yield self._recv() @@ -365,15 +366,24 @@ def _write(self, message): payload=payload ) body = frame.frame_rw.write(f, BytesIO()).getvalue() - return self._write_queue.put(body) + done_writing_future = tornado.gen.Future() + + self._write_queue.put((body, done_writing_future)) + + if not self._draining: + self._drain_write_queue() + + return done_writing_future @tornado.gen.coroutine def _drain_write_queue(self): """Ensures ordering of our writes.""" + self._draining = True while not self.closed: - body = yield self._write_queue.get() - yield self.connection.write(body) + body, done = yield self._write_queue.get() + result = yield self.connection.write(body) + done.set_result(result) def close(self): if not self.closed: @@ -389,7 +399,7 @@ def initiate_handshake(self, headers): A future that resolves (with a value of None) when the handshake is complete. """ - self._write(messages.InitRequestMessage( + yield self._write(messages.InitRequestMessage( version=PROTOCOL_VERSION, headers=headers )) @@ -423,7 +433,7 @@ def expect_handshake(self, headers): ) self._extract_handshake_headers(init_req) - self._write( + yield self._write( messages.InitResponseMessage( PROTOCOL_VERSION, headers, init_req.id), ) From 9cbcb57cd04bd45d258142f4eada58d2ade17a8f Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Sat, 5 Mar 2016 00:23:18 -0800 Subject: [PATCH 04/67] Break out reader/writer classes, fix tests --- tchannel/tornado/connection.py | 272 ++++++++++++++--------- tchannel/tornado/peer.py | 4 +- tests/conftest.py | 2 +- tests/integration/test_error_handling.py | 2 +- tests/tornado/conftest.py | 2 +- tests/tornado/test_connection.py | 41 ++-- 6 files changed, 192 insertions(+), 131 deletions(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index e1d3038a..692d93c9 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -111,9 +111,6 @@ def __init__(self, connection, tchannel=None, direction=None): self.remote_process_name = None self.requested_version = PROTOCOL_VERSION - # Tracks message IDs for this connection. - self._id_sequence = 0 - # We need to use two separate message factories to avoid message ID # collision while assembling fragmented messages. self.request_message_factory = MessageFactory(self.remote_host, @@ -137,8 +134,8 @@ def __init__(self, connection, tchannel=None, direction=None): self.tchannel = tchannel self._close_cb = None - self._write_queue = tornado.queues.Queue() - self._draining = False + self.reader = Reader(self.connection) + self.writer = Writer(self.connection) connection.set_close_callback(self._on_close) @@ -154,10 +151,6 @@ def set_close_callback(self, cb): ) self._close_cb = stack_context.wrap(cb) - def next_message_id(self): - self._id_sequence = (self._id_sequence + 1) % MAX_MESSAGE_ID - return self._id_sequence - def _on_close(self): self.closed = True self._request_tombstones.clear() @@ -188,53 +181,7 @@ def await(self): if self._loop_running: return self._messages.get() else: - return self._recv() - - def _recv(self): - """Receive the next message off the wire. - - :returns: - A Future that produces a Context object containing the next - message off the wire. - """ - - # This is the message_future we'll return for any inbound messages. - message_future = tornado.gen.Future() - io_loop = IOLoop.current() - - def on_body(read_body_future, size): - if read_body_future.exception(): - return on_error(read_body_future) - - body = read_body_future.result() - f = frame.frame_rw.read(BytesIO(body), size=size) - message_rw = messages.RW[f.header.message_type] - message = message_rw.read(BytesIO(f.payload)) - message.id = f.header.message_id - message_future.set_result(message) - - def on_read_size(read_size_future): - if read_size_future.exception(): - return on_error(read_size_future) - - size_bytes = read_size_future.result() - size = frame.frame_rw.size_rw.read(BytesIO(size_bytes)) - read_body_future = self.connection.read_bytes(size - size_width) - - io_loop.add_future( - read_body_future, - lambda future: on_body(future, size) - ) - - return read_body_future - - def on_error(future): - log.info("Failed to read data: %s", future.exception()) - - size_width = frame.frame_rw.size_rw.width() - read_bytes_future = self.connection.read_bytes(size_width) - io_loop.add_future(read_bytes_future, on_read_size) - return message_future + return self.reader.get() @tornado.gen.coroutine def _loop(self): @@ -245,7 +192,7 @@ def _loop(self): self._loop_running = True while not self.closed: - message = yield self._recv() + message = yield self.reader.get() # TODO: There should probably be a try-catch on the yield. if message.message_type in self.CALL_REQ_TYPES: @@ -313,7 +260,7 @@ def send(self, message): "Message '%s' can't use send" % repr(message) ) - message.id = message.id or self.next_message_id() + message.id = message.id or self.writer.next_message_id() assert message.id not in self._outstanding, ( "Message ID '%d' already being used" % message.id ) @@ -331,7 +278,7 @@ def write(self, message): :param message: Message to write. """ - message.id = message.id or self.next_message_id() + message.id = message.id or self.writer.next_message_id() if message.message_type in self.CALL_REQ_TYPES: message_factory = self.request_message_factory @@ -341,50 +288,12 @@ def write(self, message): fragments = message_factory.fragment(message) for fragment in fragments: - future = self._write(fragment) + future = self.writer.put(fragment) # We're done writing the message once our last future # resolves. return future - def _write(self, message): - """Writes the given message up the wire. - - The message must be small enough to fit in a single frame. - """ - message.id = message.id or self.next_message_id() - - payload = messages.RW[message.message_type].write( - message, BytesIO() - ).getvalue() - - f = frame.Frame( - header=frame.FrameHeader( - message_type=message.message_type, - message_id=message.id, - ), - payload=payload - ) - body = frame.frame_rw.write(f, BytesIO()).getvalue() - done_writing_future = tornado.gen.Future() - - self._write_queue.put((body, done_writing_future)) - - if not self._draining: - self._drain_write_queue() - - return done_writing_future - - @tornado.gen.coroutine - def _drain_write_queue(self): - """Ensures ordering of our writes.""" - self._draining = True - - while not self.closed: - body, done = yield self._write_queue.get() - result = yield self.connection.write(body) - done.set_result(result) - def close(self): if not self.closed: self.connection.close() @@ -399,11 +308,11 @@ def initiate_handshake(self, headers): A future that resolves (with a value of None) when the handshake is complete. """ - yield self._write(messages.InitRequestMessage( + self.writer.put(messages.InitRequestMessage( version=PROTOCOL_VERSION, headers=headers )) - init_res = yield self._recv() + init_res = yield self.reader.get() if init_res.message_type != Types.INIT_RES: raise errors.InvalidMessageError( "Expected handshake response, got %s" % repr(init_res) @@ -426,14 +335,14 @@ def expect_handshake(self, headers): A future that resolves (with a value of None) when the handshake is complete. """ - init_req = yield self._recv() + init_req = yield self.reader.get() if init_req.message_type != Types.INIT_REQ: raise errors.InvalidMessageError( "You need to shake my hand first. Got %s" % repr(init_req) ) self._extract_handshake_headers(init_req) - yield self._write( + self.writer.put( messages.InitResponseMessage( PROTOCOL_VERSION, headers, init_req.id), ) @@ -544,7 +453,7 @@ def send_error(self, error): """ error_message = build_raw_error_message(error) - write_future = self._write(error_message) + write_future = self.writer.put(error_message) write_future.add_done_callback( lambda f: self.tchannel.event_emitter.fire( EventType.after_send_error, @@ -554,10 +463,10 @@ def send_error(self, error): return write_future def ping(self): - return self._write(messages.PingRequestMessage()) + return self.writer.put(messages.PingRequestMessage()) def pong(self): - return self._write(messages.PingResponseMessage()) + return self.writer.put(messages.PingResponseMessage()) class StreamConnection(TornadoConnection): @@ -725,3 +634,156 @@ def _request_timed_out(self, req_id, req_ttl, future): # while. future.set_exception(errors.TimeoutError()) self._request_tombstones.add(req_id, req_ttl) + + +class Reader(object): + + def __init__(self, io_stream): + self.queue = tornado.queues.Queue() + self.filling = False + self.io_stream = io_stream + + def fill(self): + self.filling = True + + io_loop = IOLoop.current() + + def keep_reading(f): + if f.exception(): + return log(f.exception()) + # connect these two in the case when put blocks + self.queue.put(f.result()) + io_loop.spawn_callback(self.fill) + + io_loop.add_future(self._dequeue(), keep_reading) + + def get(self): + """Receive the next message off the wire. + + :returns: + A Future that produces a Context object containing the next + message off the wire. + """ + if self.filling is False: + self.fill() + + return self.queue.get() + + def _dequeue(self): + # This is the message_future we'll return for any inbound messages. + message_future = tornado.gen.Future() + io_loop = IOLoop.current() + + def on_body(read_body_future, size): + if read_body_future.exception(): + return on_error(read_body_future) + + body = read_body_future.result() + f = frame.frame_rw.read(BytesIO(body), size=size) + message_rw = messages.RW[f.header.message_type] + message = message_rw.read(BytesIO(f.payload)) + message.id = f.header.message_id + message_future.set_result(message) + + def on_read_size(read_size_future): + if read_size_future.exception(): + return on_error(read_size_future) + + size_bytes = read_size_future.result() + size = frame.frame_rw.size_rw.read(BytesIO(size_bytes)) + read_body_future = self.io_stream.read_bytes(size - size_width) + + io_loop.add_future( + read_body_future, + lambda future: on_body(future, size) + ) + + return read_body_future + + def on_error(future): + log.info("Failed to read data: %s", future.exception()) + + size_width = frame.frame_rw.size_rw.width() + read_bytes_future = self.io_stream.read_bytes(size_width) + io_loop.add_future(read_bytes_future, on_read_size) + + return message_future + + +class Writer(object): + + def __init__(self, io_stream): + self.queue = tornado.queues.Queue() + self.draining = False + self.io_stream = io_stream + # Tracks message IDs for this connection. + self._id_sequence = 0 + + def drain(self): + self.draining = True + + io_loop = IOLoop.current() + + def on_write(f, done): + if f.exception(): + log.error(f.exception()) + done.set_exception(f.exception()) + else: + done.set_result(f.result()) + + io_loop.spawn_callback(next_write) + + def on_message(f): + if f.exception(): + io_loop.spawn_callback(next_write) + log.error(f.exception()) + return + message, done = f.result() + io_loop.add_future( + self.io_stream.write(message), + lambda f: on_write(f, done), + ) + + def next_write(): + if self.io_stream.closed(): + return + + io_loop.add_future(self.queue.get(), on_message) + + io_loop.spawn_callback(next_write) + + def put(self, message): + """Enqueues the given message for writing to the wire. + + The message must be small enough to fit in a single frame. + """ + if self.draining is False: + self.drain() + + return self._enqueue(message) + + def next_message_id(self): + self._id_sequence = (self._id_sequence + 1) % MAX_MESSAGE_ID + return self._id_sequence + + def _enqueue(self, message): + message.id = message.id or self.next_message_id() + + payload = messages.RW[message.message_type].write( + message, BytesIO() + ).getvalue() + + f = frame.Frame( + header=frame.FrameHeader( + message_type=message.message_type, + message_id=message.id, + ), + payload=payload + ) + body = frame.frame_rw.write(f, BytesIO()).getvalue() + + done_writing_future = tornado.gen.Future() + + self.queue.put((body, done_writing_future)) + + return done_writing_future diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py index 61aad961..71323957 100644 --- a/tchannel/tornado/peer.py +++ b/tchannel/tornado/peer.py @@ -357,7 +357,7 @@ def send( request = Request( service=self.service, argstreams=[InMemStream(endpoint), arg2, arg3], - id=connection.next_message_id(), + id=connection.writer.next_message_id(), headers=headers, endpoint=endpoint, ttl=ttl, @@ -475,7 +475,7 @@ def prepare_next_request(self, request, blacklist): connection = yield peer.connect() # roll back request - request.rewind(connection.next_message_id()) + request.rewind(connection.writer.next_message_id()) raise gen.Return((peer, connection)) diff --git a/tests/conftest.py b/tests/conftest.py index d5313ef6..193dcf64 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -47,7 +47,7 @@ def connection(): @pytest.yield_fixture -def mock_server(): +def mock_server(io_loop): with MockServer() as server: yield server diff --git a/tests/integration/test_error_handling.py b/tests/integration/test_error_handling.py index e7723213..879520b5 100644 --- a/tests/integration/test_error_handling.py +++ b/tests/integration/test_error_handling.py @@ -114,7 +114,7 @@ def test_invalid_message_during_streaming(mock_server): # bypass the default checksum calculation # set a wrong checksum callreqcontinue.checksum = (ChecksumType.crc32c, 1) - yield connection._write(callreqcontinue) + yield connection.writer.put(callreqcontinue) with pytest.raises(FatalProtocolError) as e: resp = yield resp_future diff --git a/tests/tornado/conftest.py b/tests/tornado/conftest.py index cebd4551..b6a80005 100644 --- a/tests/tornado/conftest.py +++ b/tests/tornado/conftest.py @@ -29,7 +29,7 @@ @pytest.yield_fixture -def tornado_pair(): +def tornado_pair(io_loop): server, client = socket.socketpair() server_stream = tornado.iostream.IOStream(server) diff --git a/tests/tornado/test_connection.py b/tests/tornado/test_connection.py index edf501a9..5fe9adaf 100644 --- a/tests/tornado/test_connection.py +++ b/tests/tornado/test_connection.py @@ -23,7 +23,6 @@ import mock import pytest import tornado.ioloop -import tornado.testing from tornado import gen from tchannel import TChannel @@ -39,33 +38,33 @@ def dummy_headers(): } -class ConnectionTestCase(tornado.testing.AsyncTestCase): - @pytest.fixture(autouse=True) - def make_server_client(self, tornado_pair): - self.server, self.client = tornado_pair +@pytest.mark.gen_test +def test_handshake(tornado_pair): + """Verify we handshake in an async manner.""" + server, client = tornado_pair + headers = dummy_headers() + + print 'IOLOOP from test is', tornado.ioloop.IOLoop.current() - @tornado.testing.gen_test - def test_handshake(self): - """Verify we handshake in an async manner.""" - headers = dummy_headers() + client.initiate_handshake(headers=headers) + yield server.expect_handshake(headers=headers) - self.client.initiate_handshake(headers=headers) - yield self.server.expect_handshake(headers=headers) + assert client.requested_version == server.requested_version - assert self.client.requested_version == self.server.requested_version - @tornado.testing.gen_test - def test_pings(self): - """Verify calls are sent to handler properly.""" - self.client.ping() +@pytest.mark.gen_test +def test_pings(tornado_pair): + """Verify calls are sent to handler properly.""" + server, client = tornado_pair + client.ping() - ping = yield self.server.await() - assert ping.message_type == Types.PING_REQ + ping = yield server.await() + assert ping.message_type == Types.PING_REQ - self.server.pong() + server.pong() - pong = yield self.client.await() - assert pong.message_type == Types.PING_RES + pong = yield client.await() + assert pong.message_type == Types.PING_RES @pytest.mark.gen_test From 67e249105a7b988f129a7603d53b9016c9f906f8 Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Sat, 5 Mar 2016 16:49:22 -0800 Subject: [PATCH 05/67] Catch an error condition with queue.put --- tchannel/tornado/connection.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 692d93c9..135f5724 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -290,8 +290,7 @@ def write(self, message): for fragment in fragments: future = self.writer.put(fragment) - # We're done writing the message once our last future - # resolves. + # We're done writing the message once our last future resolves. return future def close(self): @@ -784,6 +783,12 @@ def _enqueue(self, message): done_writing_future = tornado.gen.Future() - self.queue.put((body, done_writing_future)) + def on_queue_error(f): + if f.exception(): + done_writing_future.set_exception(f.exception()) + + self.queue.put( + (body, done_writing_future) + ).add_done_callback(on_queue_error) return done_writing_future From 17f906d0aed73c64e837bf44c7c388866dc16fc3 Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Sun, 6 Mar 2016 09:42:50 -0800 Subject: [PATCH 06/67] Add some backpressure --- tchannel/tornado/connection.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 135f5724..f3e7bf6c 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -637,8 +637,8 @@ def _request_timed_out(self, req_id, req_ttl, future): class Reader(object): - def __init__(self, io_stream): - self.queue = tornado.queues.Queue() + def __init__(self, io_stream, capacity=64): + self.queue = tornado.queues.Queue(capacity) self.filling = False self.io_stream = io_stream @@ -711,8 +711,8 @@ def on_error(future): class Writer(object): - def __init__(self, io_stream): - self.queue = tornado.queues.Queue() + def __init__(self, io_stream, capacity=64): + self.queue = tornado.queues.Queue(capacity) self.draining = False self.io_stream = io_stream # Tracks message IDs for this connection. From ea023d630c63181ec60b97879d84fd07ed7f300c Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Sun, 6 Mar 2016 10:30:11 -0800 Subject: [PATCH 07/67] Add a roundtrip benchmark --- .travis.yml | 1 + Makefile | 4 +++ benchmarks/test_roundtrip.py | 57 ++++++++++++++++++++++++++++++++++++ tox.ini | 3 ++ 4 files changed, 65 insertions(+) create mode 100644 benchmarks/test_roundtrip.py diff --git a/.travis.yml b/.travis.yml index 3ce97ba2..0328e5d4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,6 +19,7 @@ env: - TOX_ENV=cover - TOX_ENV=flake8 - TOX_ENV=docs + - TOX_ENV=benchmark install: - make install diff --git a/Makefile b/Makefile index e1257765..7565c692 100644 --- a/Makefile +++ b/Makefile @@ -48,6 +48,10 @@ test: clean test_ci: clean tox -e $(TOX_ENV) -- tests +.PHONY: benchmark +benchmark: + py.test benchmarks --benchmark-autosave --benchmark-save-data --benchmark-warmup --benchmark-disable-gc --benchmark-histogram + .PHONY: testhtml testhtml: clean $(pytest) $(html_report) && open htmlcov/index.html diff --git a/benchmarks/test_roundtrip.py b/benchmarks/test_roundtrip.py new file mode 100644 index 00000000..c1eb7d87 --- /dev/null +++ b/benchmarks/test_roundtrip.py @@ -0,0 +1,57 @@ +# Copyright (c) 2015 Uber Technologies, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from tornado import ioloop, gen + +from tchannel import TChannel, thrift + + +def test_roundtrip(benchmark): + loop = ioloop.IOLoop.current() + + server = TChannel('benchmark-server') + server.listen() + + clients = [TChannel('benchmark-client') for _ in range(10)] + + service = thrift.load( + path='examples/guide/keyvalue/service.thrift', + service='benchmark-server', + ) + service.hostport = server.hostport + + @server.thrift.register(service.KeyValue) + def getValue(request): + return 'bar' + + def roundtrip(): + @gen.coroutine + def doit(): + yield [ + client.thrift(service.KeyValue.getValue("foo")) + for client in clients + ] + + return loop.run_sync(doit) + + # Establish initial connection + roundtrip() + + benchmark(roundtrip) diff --git a/tox.ini b/tox.ini index 76649d9c..83941bf6 100644 --- a/tox.ini +++ b/tox.ini @@ -23,6 +23,9 @@ basepython = python commands = py.test --cov tchannel --cov-report=xml --cov-report=term-missing {posargs} +[testenv:benchmark] +basepython = python +commands = make benchmark [testenv:docs] basepython = python From 3097de22ca2e22b6b28ca008258c94a35a7bf49d Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Sun, 6 Mar 2016 21:05:51 -0800 Subject: [PATCH 08/67] Add fail_to decorator to help with future-callback-style Tornado code. --- tchannel/_future.py | 78 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_future.py | 72 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+) create mode 100644 tchannel/_future.py create mode 100644 tests/test_future.py diff --git a/tchannel/_future.py b/tchannel/_future.py new file mode 100644 index 00000000..cf11d96c --- /dev/null +++ b/tchannel/_future.py @@ -0,0 +1,78 @@ +# Copyright (c) 2015 Uber Technologies, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from __future__ import ( + absolute_import, unicode_literals, division, print_function +) + +import sys +from functools import wraps + +from tornado.gen import is_future + + +def fail_to(future): + """A decorator for function callbacks to catch uncaught non-async + exceptions and forward them to the given future. + + The primary use for this is to catch exceptions in async callbacks and + propagate them to futures. For example, consider, + + .. code-block:: python + + answer = Future() + + def on_done(future): + foo = bar() + answer.set_result(foo) + + some_async_operation().add_done_callback(on_done) + + If ``bar()`` fails, ``answer`` will never get filled with an exception or + a result. Now if we change ``on_done`` to, + + .. code-block:: python + + @fail_to(answer) + def on_done(future): + foo = bar() + answer.set_result(foo) + + Uncaught exceptions in ``on_done`` will be caught and propagated to + ``answer``. Note that ``on_done`` will return None if an exception was + caught. + + :param answer: + Future to which the result will be written. + """ + assert is_future(future), 'you forgot to pass a future' + + def decorator(f): + + @wraps(f) + def new_f(*args, **kwargs): + try: + return f(*args, **kwargs) + except Exception: + future.set_exc_info(sys.exc_info()) + + return new_f + + return decorator diff --git a/tests/test_future.py b/tests/test_future.py new file mode 100644 index 00000000..01dfd8bd --- /dev/null +++ b/tests/test_future.py @@ -0,0 +1,72 @@ +# Copyright (c) 2015 Uber Technologies, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from __future__ import ( + absolute_import, unicode_literals, division, print_function +) + +from tornado import gen + +import pytest + +from tchannel._future import fail_to + + +def test_fail_to_no_failure(): + answer = gen.Future() + + @fail_to(answer) + def f(): + return 42 + + assert f() == 42 + assert answer.running() + + +@pytest.mark.gen_test +def test_fail_to_failure(): + answer = gen.Future() + + @fail_to(answer) + def f(): + raise GreatSadness + + assert f() is None + with pytest.raises(GreatSadness): + yield answer + + +@pytest.mark.gen_test +@pytest.mark.gen_test +def test_fail_to_failure_in_coroutine(): + answer = gen.Future() + + @fail_to(answer) + @gen.coroutine + def f(): + raise GreatSadness + + with pytest.raises(GreatSadness): + yield f() + assert answer.running() + + +class GreatSadness(Exception): + pass From 4eaa2922092893ec87a9983ce911956437b12792 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Sun, 6 Mar 2016 21:17:34 -0800 Subject: [PATCH 09/67] Move message reading logic to separate function. --- tchannel/tornado/connection.py | 98 +++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 42 deletions(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index f3e7bf6c..24923bea 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -24,7 +24,7 @@ import os import socket import sys - +from functools import partial import tornado.gen import tornado.iostream @@ -37,6 +37,7 @@ from .. import errors from .. import frame from .. import messages +from .._future import fail_to from ..errors import NetworkError from ..errors import TChannelError from ..event import EventType @@ -654,7 +655,7 @@ def keep_reading(f): self.queue.put(f.result()) io_loop.spawn_callback(self.fill) - io_loop.add_future(self._dequeue(), keep_reading) + io_loop.add_future(read_message(self.io_stream), keep_reading) def get(self): """Receive the next message off the wire. @@ -668,46 +669,6 @@ def get(self): return self.queue.get() - def _dequeue(self): - # This is the message_future we'll return for any inbound messages. - message_future = tornado.gen.Future() - io_loop = IOLoop.current() - - def on_body(read_body_future, size): - if read_body_future.exception(): - return on_error(read_body_future) - - body = read_body_future.result() - f = frame.frame_rw.read(BytesIO(body), size=size) - message_rw = messages.RW[f.header.message_type] - message = message_rw.read(BytesIO(f.payload)) - message.id = f.header.message_id - message_future.set_result(message) - - def on_read_size(read_size_future): - if read_size_future.exception(): - return on_error(read_size_future) - - size_bytes = read_size_future.result() - size = frame.frame_rw.size_rw.read(BytesIO(size_bytes)) - read_body_future = self.io_stream.read_bytes(size - size_width) - - io_loop.add_future( - read_body_future, - lambda future: on_body(future, size) - ) - - return read_body_future - - def on_error(future): - log.info("Failed to read data: %s", future.exception()) - - size_width = frame.frame_rw.size_rw.width() - read_bytes_future = self.io_stream.read_bytes(size_width) - io_loop.add_future(read_bytes_future, on_read_size) - - return message_future - class Writer(object): @@ -792,3 +753,56 @@ def on_queue_error(f): ).add_done_callback(on_queue_error) return done_writing_future + +############################################################################## + + +FRAME_SIZE_WIDTH = frame.frame_rw.size_rw.width() + + +def read_message(stream): + """Reads a message from the given IOStream. + + :param IOStream stream: + IOStream to read from. + """ + answer = tornado.gen.Future() + io_loop = IOLoop.current() + + def on_error(future): + log.info('Failed to read data: %s', future.exception()) + return answer.set_exc_info(future.exc_info()) + + @fail_to(answer) + def on_body(size, future): + if future.exception(): + return on_error(future) + + body = future.result() + f = frame.frame_rw.read(BytesIO(body), size=size) + message_type = f.header.message_type + message_rw = messages.RW.get(message_type) + if not message_rw: + exc = errors.FatalProtocolError( + 'Unknown message type %s', str(message_type) + ) + return answer.set_exception(exc) + + message = message_rw.read(BytesIO(f.payload)) + message.id = f.header.message_id + answer.set_result(message) + + @fail_to(answer) + def on_read_size(future): + if future.exception(): + return answer.set_exc_info(future.exc_info()) + + size_bytes = future.result() + size = frame.frame_rw.size_rw.read(BytesIO(size_bytes)) + io_loop.add_future( + stream.read_bytes(size - FRAME_SIZE_WIDTH), + partial(on_body, size), + ) + + io_loop.add_future(stream.read_bytes(FRAME_SIZE_WIDTH), on_read_size) + return answer From 5622497de087dbf626cf2b22af0dada9aa00e256 Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Mon, 7 Mar 2016 17:00:57 -0800 Subject: [PATCH 10/67] Use exc_info in a couple spots --- tchannel/tornado/connection.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 24923bea..b035c42f 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -601,9 +601,9 @@ def adapt_result(self, f, request, response_future): return if f.exception(): - protocol_exception = f.exception() + cls, protocol_exception, tb = f.exc_info() protocol_exception.tracing = request.tracing - response_future.set_exception(protocol_exception) + response_future.set_exc_info((cls, protocol_exception, tb)) else: response = f.result() @@ -650,6 +650,7 @@ def fill(self): def keep_reading(f): if f.exception(): + self.filling = False return log(f.exception()) # connect these two in the case when put blocks self.queue.put(f.result()) @@ -687,7 +688,7 @@ def drain(self): def on_write(f, done): if f.exception(): log.error(f.exception()) - done.set_exception(f.exception()) + done.set_exc_info(f.exc_info()) else: done.set_result(f.result()) @@ -746,7 +747,7 @@ def _enqueue(self, message): def on_queue_error(f): if f.exception(): - done_writing_future.set_exception(f.exception()) + done_writing_future.set_exc_info(f.exc_info()) self.queue.put( (body, done_writing_future) From 0a280effd864dbfc41afb3e480cf45bb67a3e41a Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Mon, 7 Mar 2016 17:03:03 -0800 Subject: [PATCH 11/67] Changelog --- CHANGES.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index f1c3e5d5..fa7cbffb 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,6 +6,8 @@ Changes by Version - Tornado 4.2 was listed as a requirement but this was corrected to be 4.3 which introduced the locks module. +- Fixed in issue where clients could incorrectly time out when reading large + response bodies. 0.21.4 (2016-02-15) From 66cba92babe715222fc53221d4ea3e55b7f1f1b9 Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Mon, 7 Mar 2016 17:45:56 -0800 Subject: [PATCH 12/67] Generate 10 requests per client --- benchmarks/test_roundtrip.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/benchmarks/test_roundtrip.py b/benchmarks/test_roundtrip.py index c1eb7d87..73c60b66 100644 --- a/benchmarks/test_roundtrip.py +++ b/benchmarks/test_roundtrip.py @@ -23,6 +23,12 @@ from tchannel import TChannel, thrift +service = thrift.load( + path='examples/guide/keyvalue/service.thrift', + service='benchmark-server', +) + + def test_roundtrip(benchmark): loop = ioloop.IOLoop.current() @@ -31,12 +37,6 @@ def test_roundtrip(benchmark): clients = [TChannel('benchmark-client') for _ in range(10)] - service = thrift.load( - path='examples/guide/keyvalue/service.thrift', - service='benchmark-server', - ) - service.hostport = server.hostport - @server.thrift.register(service.KeyValue) def getValue(request): return 'bar' @@ -44,10 +44,17 @@ def getValue(request): def roundtrip(): @gen.coroutine def doit(): - yield [ - client.thrift(service.KeyValue.getValue("foo")) - for client in clients - ] + futures = [] + # 10 clients send 10 requests concurrently + for client in clients: + for _ in range(10): + futures.append( + client.thrift( + service.KeyValue.getValue("foo"), + hostport=server.hostport, + ) + ) + yield futures return loop.run_sync(doit) From 713c91e43d2af6d18e199992bb9dac84f1050a6a Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Tue, 8 Mar 2016 13:06:26 -0800 Subject: [PATCH 13/67] abg feedback --- tchannel/tornado/connection.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index b035c42f..4e6bf992 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -638,7 +638,9 @@ def _request_timed_out(self, req_id, req_ttl, future): class Reader(object): - def __init__(self, io_stream, capacity=64): + def __init__(self, io_stream, capacity=None): + capacity = capacity or 64 + self.queue = tornado.queues.Queue(capacity) self.filling = False self.io_stream = io_stream @@ -653,8 +655,9 @@ def keep_reading(f): self.filling = False return log(f.exception()) # connect these two in the case when put blocks - self.queue.put(f.result()) - io_loop.spawn_callback(self.fill) + self.queue.put(f.result()).add_done_callback( + lambda f: io_loop.spawn_callback(self.fill), + ) io_loop.add_future(read_message(self.io_stream), keep_reading) @@ -662,10 +665,9 @@ def get(self): """Receive the next message off the wire. :returns: - A Future that produces a Context object containing the next - message off the wire. + A Future that resolves to the next message off the wire. """ - if self.filling is False: + if not self.filling: self.fill() return self.queue.get() @@ -673,7 +675,9 @@ def get(self): class Writer(object): - def __init__(self, io_stream, capacity=64): + def __init__(self, io_stream, capacity=None): + capacity = capacity or 64 + self.queue = tornado.queues.Queue(capacity) self.draining = False self.io_stream = io_stream @@ -748,6 +752,8 @@ def _enqueue(self, message): def on_queue_error(f): if f.exception(): done_writing_future.set_exc_info(f.exc_info()) + else: + done_writing_future.set_result(None) self.queue.put( (body, done_writing_future) From debf2848b5afa1ccd4d5f6aa7972a66b86797ecb Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Tue, 8 Mar 2016 13:16:49 -0800 Subject: [PATCH 14/67] More changelog --- CHANGES.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index fa7cbffb..d03d5fca 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -7,7 +7,8 @@ Changes by Version - Tornado 4.2 was listed as a requirement but this was corrected to be 4.3 which introduced the locks module. - Fixed in issue where clients could incorrectly time out when reading large - response bodies. + response bodies. This was due to response fragments being dropped due to + out-of-order writes; writes are now serialized on a per-connection basis. 0.21.4 (2016-02-15) From c79bd974d36568063376066ce12a70f8c6198542 Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Tue, 8 Mar 2016 13:18:45 -0800 Subject: [PATCH 15/67] Remove a print statement --- tests/tornado/test_connection.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/tornado/test_connection.py b/tests/tornado/test_connection.py index 5fe9adaf..bd5fb7dc 100644 --- a/tests/tornado/test_connection.py +++ b/tests/tornado/test_connection.py @@ -44,8 +44,6 @@ def test_handshake(tornado_pair): server, client = tornado_pair headers = dummy_headers() - print 'IOLOOP from test is', tornado.ioloop.IOLoop.current() - client.initiate_handshake(headers=headers) yield server.expect_handshake(headers=headers) From 40413752a8d885b0a1b4886c1b13f4cb6443e651 Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Tue, 8 Mar 2016 13:42:57 -0800 Subject: [PATCH 16/67] Remove some dead code --- tchannel/tornado/connection.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 4e6bf992..21a4210b 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -752,8 +752,6 @@ def _enqueue(self, message): def on_queue_error(f): if f.exception(): done_writing_future.set_exc_info(f.exc_info()) - else: - done_writing_future.set_result(None) self.queue.put( (body, done_writing_future) From ee9b8ce0571e9e296b90bb2f347d056c95db7b28 Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Tue, 8 Mar 2016 15:55:57 -0800 Subject: [PATCH 17/67] Preparing release 0.21.5 --- CHANGES.rst | 2 +- tchannel/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index d03d5fca..10cab5ac 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,7 +1,7 @@ Changes by Version ================== -0.21.5 (unreleased) +0.21.5 (2016-03-08) ------------------- - Tornado 4.2 was listed as a requirement but this was corrected to be 4.3 diff --git a/tchannel/__init__.py b/tchannel/__init__.py index a14967a9..66d58160 100644 --- a/tchannel/__init__.py +++ b/tchannel/__init__.py @@ -22,7 +22,7 @@ absolute_import, division, print_function, unicode_literals ) -__version__ = '0.21.5.dev0' +__version__ = '0.21.5' # Update setup.py when changing this. zest.releaser doesn't support updating # both of them yet. From 8d1c78afcceb8f1f66192ae8f9cc4bc50cdc397e Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Tue, 8 Mar 2016 15:57:08 -0800 Subject: [PATCH 18/67] Back to development: 0.21.6 --- CHANGES.rst | 6 ++++++ tchannel/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 10cab5ac..f4224722 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,12 @@ Changes by Version ================== +0.21.6 (unreleased) +------------------- + +- Nothing changed yet. + + 0.21.5 (2016-03-08) ------------------- diff --git a/tchannel/__init__.py b/tchannel/__init__.py index 66d58160..ebd8ac86 100644 --- a/tchannel/__init__.py +++ b/tchannel/__init__.py @@ -22,7 +22,7 @@ absolute_import, division, print_function, unicode_literals ) -__version__ = '0.21.5' +__version__ = '0.21.6.dev0' # Update setup.py when changing this. zest.releaser doesn't support updating # both of them yet. From c756c145fac740a67b846d370b8fc4fd62af33e5 Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Tue, 8 Mar 2016 16:08:21 -0800 Subject: [PATCH 19/67] Fix a logging error on failed reads --- CHANGES.rst | 2 +- tchannel/tornado/connection.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index f4224722..29ee6f7b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,7 @@ Changes by Version 0.21.6 (unreleased) ------------------- -- Nothing changed yet. +- Fixed a logging error on failed reads. 0.21.5 (2016-03-08) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 21a4210b..5823cc01 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -653,7 +653,7 @@ def fill(self): def keep_reading(f): if f.exception(): self.filling = False - return log(f.exception()) + return log.exception(f.exception()) # connect these two in the case when put blocks self.queue.put(f.result()).add_done_callback( lambda f: io_loop.spawn_callback(self.fill), From 5bf6a78c72e40d261d991f19d09a32c06175bcf6 Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Tue, 8 Mar 2016 16:20:21 -0800 Subject: [PATCH 20/67] Preparing release 0.21.6 --- CHANGES.rst | 2 +- tchannel/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 29ee6f7b..0008edf4 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,7 +1,7 @@ Changes by Version ================== -0.21.6 (unreleased) +0.21.6 (2016-03-08) ------------------- - Fixed a logging error on failed reads. diff --git a/tchannel/__init__.py b/tchannel/__init__.py index ebd8ac86..600511e5 100644 --- a/tchannel/__init__.py +++ b/tchannel/__init__.py @@ -22,7 +22,7 @@ absolute_import, division, print_function, unicode_literals ) -__version__ = '0.21.6.dev0' +__version__ = '0.21.6' # Update setup.py when changing this. zest.releaser doesn't support updating # both of them yet. From e5aaed332890037de9d655f55e7c6a9991dbab8d Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Tue, 8 Mar 2016 16:22:12 -0800 Subject: [PATCH 21/67] Back to development: 0.21.7 --- CHANGES.rst | 6 ++++++ tchannel/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 0008edf4..b408d1fb 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,12 @@ Changes by Version ================== +0.21.7 (unreleased) +------------------- + +- Nothing changed yet. + + 0.21.6 (2016-03-08) ------------------- diff --git a/tchannel/__init__.py b/tchannel/__init__.py index 600511e5..8b429f03 100644 --- a/tchannel/__init__.py +++ b/tchannel/__init__.py @@ -22,7 +22,7 @@ absolute_import, division, print_function, unicode_literals ) -__version__ = '0.21.6' +__version__ = '0.21.7.dev0' # Update setup.py when changing this. zest.releaser doesn't support updating # both of them yet. From cae8d2632590984cc1f453dbbdd47cec8f809665 Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Tue, 8 Mar 2016 17:03:00 -0800 Subject: [PATCH 22/67] Fix an unhelpful error message --- CHANGES.rst | 2 +- tchannel/tornado/connection.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index b408d1fb..54254221 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,7 @@ Changes by Version 0.21.7 (unreleased) ------------------- -- Nothing changed yet. +- Fixed an unhelpful stack trace on failed reads. 0.21.6 (2016-03-08) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 5823cc01..d1e59ef7 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -653,7 +653,7 @@ def fill(self): def keep_reading(f): if f.exception(): self.filling = False - return log.exception(f.exception()) + return log.error("read error", exc_info=f.exc_info()) # connect these two in the case when put blocks self.queue.put(f.result()).add_done_callback( lambda f: io_loop.spawn_callback(self.fill), From 7e244302f4e0cae8058d6ff5dc1b3541b1f4f8bb Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Tue, 8 Mar 2016 17:11:01 -0800 Subject: [PATCH 23/67] Preparing release 0.21.7 --- CHANGES.rst | 2 +- tchannel/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 54254221..0746e954 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,7 +1,7 @@ Changes by Version ================== -0.21.7 (unreleased) +0.21.7 (2016-03-08) ------------------- - Fixed an unhelpful stack trace on failed reads. diff --git a/tchannel/__init__.py b/tchannel/__init__.py index 8b429f03..c1647fae 100644 --- a/tchannel/__init__.py +++ b/tchannel/__init__.py @@ -22,7 +22,7 @@ absolute_import, division, print_function, unicode_literals ) -__version__ = '0.21.7.dev0' +__version__ = '0.21.7' # Update setup.py when changing this. zest.releaser doesn't support updating # both of them yet. From 42531e59523404280a7c67d246016183c271678c Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Tue, 8 Mar 2016 17:13:19 -0800 Subject: [PATCH 24/67] Back to development: 0.21.8 --- CHANGES.rst | 6 ++++++ tchannel/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 0746e954..a849a3e7 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,12 @@ Changes by Version ================== +0.21.8 (unreleased) +------------------- + +- Nothing changed yet. + + 0.21.7 (2016-03-08) ------------------- diff --git a/tchannel/__init__.py b/tchannel/__init__.py index c1647fae..8694aeab 100644 --- a/tchannel/__init__.py +++ b/tchannel/__init__.py @@ -22,7 +22,7 @@ absolute_import, division, print_function, unicode_literals ) -__version__ = '0.21.7' +__version__ = '0.21.8.dev0' # Update setup.py when changing this. zest.releaser doesn't support updating # both of them yet. From e32bef6a5a9d8aab03fc7f208217a9d41394d189 Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Tue, 8 Mar 2016 18:09:32 -0800 Subject: [PATCH 25/67] Reduce read errors to info --- tchannel/tornado/connection.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index d1e59ef7..b30fee3e 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -653,7 +653,8 @@ def fill(self): def keep_reading(f): if f.exception(): self.filling = False - return log.error("read error", exc_info=f.exc_info()) + # This is usually StreamClosed due to a client disconnecting. + return log.info("read error", exc_info=f.exc_info()) # connect these two in the case when put blocks self.queue.put(f.result()).add_done_callback( lambda f: io_loop.spawn_callback(self.fill), From b96e492e66424145656183963cfaa3ec9930b0fa Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Tue, 8 Mar 2016 18:18:14 -0800 Subject: [PATCH 26/67] Changes --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index a849a3e7..a887374d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,7 @@ Changes by Version 0.21.8 (unreleased) ------------------- -- Nothing changed yet. +- Reduce read errors due to clients disconnecting to INFO from ERROR. 0.21.7 (2016-03-08) From aadd3eba5b423cc86f299e7088e996a4767dd7d8 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Wed, 9 Mar 2016 16:37:03 -0800 Subject: [PATCH 27/67] Lower read errors to INFO on StreamClosedError only. --- tchannel/tornado/connection.py | 5 ++++- tests/tornado/test_connection.py | 33 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index b30fee3e..40239fae 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -654,7 +654,10 @@ def keep_reading(f): if f.exception(): self.filling = False # This is usually StreamClosed due to a client disconnecting. - return log.info("read error", exc_info=f.exc_info()) + if isinstance(f.exception(), StreamClosedError): + return log.info("read error", exc_info=f.exc_info()) + else: + return log.error("read error", exc_info=f.exc_info()) # connect these two in the case when put blocks self.queue.put(f.result()).add_done_callback( lambda f: io_loop.spawn_callback(self.fill), diff --git a/tests/tornado/test_connection.py b/tests/tornado/test_connection.py index bd5fb7dc..26860c5f 100644 --- a/tests/tornado/test_connection.py +++ b/tests/tornado/test_connection.py @@ -24,6 +24,7 @@ import pytest import tornado.ioloop from tornado import gen +from datetime import timedelta from tchannel import TChannel from tchannel.errors import TimeoutError @@ -113,3 +114,35 @@ def hello(request): yield gen.sleep(0.03) assert mock_warn.call_count == 0 + + +@pytest.mark.gen_test +def test_stream_closed_error_on_read(tornado_pair): + # Test that we don't log an error for StreamClosedErrors while reading. + server, client = tornado_pair + future = server.await() + client.close() + + with mock.patch.object(connection, 'log') as mock_log: # :( + with pytest.raises(gen.TimeoutError): + yield gen.with_timeout(timedelta(milliseconds=100), future) + + assert mock_log.error.call_count == 0 + assert mock_log.info.call_count == 1 + + +@pytest.mark.gen_test +def test_other_error_on_read(tornado_pair): + # Test that we do log errors for non-StreamClosedError failures while + # reading. + server, client = tornado_pair + + future = server.await() + yield client.connection.write(b'\x00\x02\x00\x00') # bad payload + + with mock.patch.object(connection, 'log') as mock_log: # :( + with pytest.raises(gen.TimeoutError): + yield gen.with_timeout(timedelta(milliseconds=100), future) + + assert mock_log.error.call_count == 1 + assert mock_log.info.call_count == 0 From bbfc6cfaed58cce2b33b00ddee8a85871219c2a9 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Thu, 10 Mar 2016 10:22:33 -0800 Subject: [PATCH 28/67] Preparing release 0.21.8 --- CHANGES.rst | 2 +- tchannel/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index a887374d..ec92f7df 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,7 +1,7 @@ Changes by Version ================== -0.21.8 (unreleased) +0.21.8 (2016-03-10) ------------------- - Reduce read errors due to clients disconnecting to INFO from ERROR. diff --git a/tchannel/__init__.py b/tchannel/__init__.py index 8694aeab..acb50087 100644 --- a/tchannel/__init__.py +++ b/tchannel/__init__.py @@ -22,7 +22,7 @@ absolute_import, division, print_function, unicode_literals ) -__version__ = '0.21.8.dev0' +__version__ = '0.21.8' # Update setup.py when changing this. zest.releaser doesn't support updating # both of them yet. From 50249f4b9e34eb1ede89079b83c780cb9a76c835 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Thu, 10 Mar 2016 10:23:44 -0800 Subject: [PATCH 29/67] Back to development: 0.21.9 --- CHANGES.rst | 6 ++++++ tchannel/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index ec92f7df..0a4ca54e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,12 @@ Changes by Version ================== +0.21.9 (unreleased) +------------------- + +- Nothing changed yet. + + 0.21.8 (2016-03-10) ------------------- diff --git a/tchannel/__init__.py b/tchannel/__init__.py index acb50087..9cb34aee 100644 --- a/tchannel/__init__.py +++ b/tchannel/__init__.py @@ -22,7 +22,7 @@ absolute_import, division, print_function, unicode_literals ) -__version__ = '0.21.8' +__version__ = '0.21.9.dev0' # Update setup.py when changing this. zest.releaser doesn't support updating # both of them yet. From 07e5f15c79c5d9851793c4fe3d684063b7a4e770 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Fri, 11 Mar 2016 10:50:42 -0800 Subject: [PATCH 30/67] connection: log stack traces on write failures --- tchannel/tornado/connection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 40239fae..32d8074d 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -695,7 +695,7 @@ def drain(self): def on_write(f, done): if f.exception(): - log.error(f.exception()) + log.error("write failed", exc_info=f.exc_info()) done.set_exc_info(f.exc_info()) else: done.set_result(f.result()) @@ -705,7 +705,7 @@ def on_write(f, done): def on_message(f): if f.exception(): io_loop.spawn_callback(next_write) - log.error(f.exception()) + log.error("queue get failed", exc_info=f.exc_info()) return message, done = f.result() io_loop.add_future( From 09510f8858f358fe722dd467563a8a31f9bca585 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Fri, 11 Mar 2016 10:51:02 -0800 Subject: [PATCH 31/67] fix Response class used by advertise test --- tests/test_tchannel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_tchannel.py b/tests/test_tchannel.py index be332e30..967ef0f1 100644 --- a/tests/test_tchannel.py +++ b/tests/test_tchannel.py @@ -325,6 +325,7 @@ def test_event_hook_register(): @pytest.mark.gen_test def test_advertise_should_take_a_router_file(): + from tchannel.tornado.response import Response as TornadoResponse host_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), @@ -343,7 +344,7 @@ def test_advertise_should_take_a_router_file(): ) as mock_advertise: f = gen.Future() mock_advertise.return_value = f - f.set_result(Response()) + f.set_result(TornadoResponse()) tchannel.advertise(router_file=host_path) mock_advertise.assert_called_once_with(ANY, routers=routers, From d8c5c2accdc2661f6666e5ac3b84f3d36c6c3ff3 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Fri, 11 Mar 2016 11:06:00 -0800 Subject: [PATCH 32/67] fix hypothesis warnings while running rw tests --- tests/test_rw.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tests/test_rw.py b/tests/test_rw.py index 024bd2f0..22f4100a 100644 --- a/tests/test_rw.py +++ b/tests/test_rw.py @@ -30,13 +30,13 @@ from doubles import expect from hypothesis import assume from hypothesis import given -from hypothesis import specifiers +from hypothesis import strategies as st from tchannel import rw from tchannel.errors import ReadError from tchannel.io import BytesIO -number_width = specifiers.sampled_from([1, 2, 4, 8]) +number_width = st.sampled_from((1, 2, 4, 8)) def bio(bs): @@ -47,19 +47,19 @@ def roundtrip(value, v_rw): return v_rw.read(bio(v_rw.write(value, BytesIO()).getvalue())) -@given(int, number_width) +@given(st.integers(), number_width) def test_number_roundtrip(num, width): num = num % (2 ** width - 1) assert roundtrip(num, rw.number(width)) == num -@given(unicode, number_width) +@given(st.text(), number_width) def test_len_prefixed_string_roundtrip(s, len_width): assume(len(s.encode('utf-8')) <= 2 ** len_width - 1) assert roundtrip(s, rw.len_prefixed_string(rw.number(len_width))) == s -@given(str, number_width) +@given(st.binary(), number_width) def test_len_prefixed_string_binary_roundtrip(s, len_width): assume(len(s) <= 2 ** len_width - 1) assert roundtrip( @@ -67,12 +67,10 @@ def test_len_prefixed_string_binary_roundtrip(s, len_width): ) == s -@given(str) +@given(st.binary()) def test_none_r(bs): stream = bio(bs) assert rw.none().read(stream) is None - stream.read() == 'a b c' - assert rw.none().width() == 0 From e09e9153ec7986a8cb01ed3a1d30a0fcd340c3b5 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Fri, 11 Mar 2016 11:39:21 -0800 Subject: [PATCH 33/67] Remove usages of BaseException.message. That's been deprecated since 2.6 --- tchannel/event.py | 5 +++-- tchannel/rw.py | 8 ++------ tchannel/testing/vcr/patch.py | 3 +-- tchannel/testing/vcr/server.py | 4 ++-- tchannel/tornado/connection.py | 5 +++-- tchannel/tornado/dispatch.py | 2 +- tests/integration/test_error_handling.py | 5 ++--- 7 files changed, 14 insertions(+), 18 deletions(-) diff --git a/tchannel/event.py b/tchannel/event.py index 92864a64..51d9b34e 100644 --- a/tchannel/event.py +++ b/tchannel/event.py @@ -20,6 +20,7 @@ from __future__ import absolute_import +import sys import collections import functools import logging @@ -144,8 +145,8 @@ def fire(self, event, *args, **kwargs): for hook in self.hooks[event]: try: hook(*args, **kwargs) - except Exception as e: - log.error(e.message) + except Exception: + log.error("error calling hook", exc_info=sys.exc_info()) class EventRegistrar(object): diff --git a/tchannel/rw.py b/tchannel/rw.py index efda9762..cb773b0e 100644 --- a/tchannel/rw.py +++ b/tchannel/rw.py @@ -496,9 +496,7 @@ def read(self, stream): if name != skip: result[name] = value except ReadError as e: - raise ReadError( - "Failed to read %s: %s" % (name, e.message) - ) + raise ReadError("Failed to read %s: %s" % (name, e)) return result def write(self, obj, stream): @@ -538,9 +536,7 @@ def read(self, stream): if attr != skip: kwargs[attr] = value except ReadError as e: - raise ReadError( - "Failed to read %s: %s" % (self._cls, e.message) - ) + raise ReadError("Failed to read %s: %s" % (self._cls, e)) return self._cls(**kwargs) diff --git a/tchannel/testing/vcr/patch.py b/tchannel/testing/vcr/patch.py index 3ddce727..9eebd7c2 100644 --- a/tchannel/testing/vcr/patch.py +++ b/tchannel/testing/vcr/patch.py @@ -111,8 +111,7 @@ def send(self, arg1, arg2, arg3, raise TChannelError.from_code( e.code, description=( - "The remote service threw a protocol error: %s" % - e.message + "The remote service threw a protocol error: %s" % (e,) ) ) diff --git a/tchannel/testing/vcr/server.py b/tchannel/testing/vcr/server.py index ddf16579..ee5cf430 100644 --- a/tchannel/testing/vcr/server.py +++ b/tchannel/testing/vcr/server.py @@ -53,7 +53,7 @@ def new_f(*args, **kwargs): # TODO maybe use traceback.format_exc to also send a # traceback? raise e - raise proxy.VCRServiceError(e.message) + raise proxy.VCRServiceError(str(e)) else: raise gen.Return(result) @@ -150,7 +150,7 @@ def send(self, request): except TChannelError as e: raise proxy.RemoteServiceError( code=e.code, - message=e.message, + message=str(e), ) response_headers = yield response.get_header() response_body = yield response.get_body() diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 32d8074d..abc548c7 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -532,9 +532,10 @@ def _stream(self, context, message_factory): yield self.write(message) context.state = StreamState.completed # Stop streamming immediately if exception occurs on the handler side - except TChannelError as e: + except TChannelError: # raise by tchannel intentionally - log.info("Stop Outgoing Streams because of error: %s", e.message) + log.info("Stopped outgoing streams because of an error", + exc_info=sys.exc_info()) @tornado.gen.coroutine def post_response(self, response): diff --git a/tchannel/tornado/dispatch.py b/tchannel/tornado/dispatch.py index 8232a740..486437f7 100644 --- a/tchannel/tornado/dispatch.py +++ b/tchannel/tornado/dispatch.py @@ -225,7 +225,7 @@ def handle_call(self, request, connection): connection.send_error(e) except Exception as e: error = UnexpectedError( - description="Unexpected Error: '%s'" % e.message, + description="Unexpected Error: '%s'" % e, id=request.id, tracing=request.tracing, ) diff --git a/tests/integration/test_error_handling.py b/tests/integration/test_error_handling.py index 879520b5..edb9392a 100644 --- a/tests/integration/test_error_handling.py +++ b/tests/integration/test_error_handling.py @@ -121,7 +121,7 @@ def test_invalid_message_during_streaming(mock_server): yield resp.get_header() yield resp.get_body() - assert e.value.message == u"Checksum does not match!" + assert 'Checksum does not match' in str(e) @pytest.mark.gen_test @@ -143,5 +143,4 @@ def test_continue_message_error(mock_server): with pytest.raises(FatalProtocolError) as e: yield connection.send(callreqcontinue) - assert (e.value.message == - u"missing call message after receiving continue message") + assert 'missing call message after receiving continue message' in str(e) From 3627f6a2a225a98cd3baa01ea39d88786db9bbfe Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Fri, 11 Mar 2016 11:27:28 -0800 Subject: [PATCH 34/67] connection.Writer: re-raise StreamClosedError The future returned by Writer.put() will never resolve if the IOStream was already closed by the time put was called. This is because IOStream.write raises a StreamClosedError synchronously rather than inside a future if it's already closed at the time write() is called. --- tchannel/tornado/connection.py | 14 ++++++++++---- tests/tornado/test_connection.py | 24 +++++++++++++++++++++--- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index abc548c7..0b2cf7dc 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -708,11 +708,17 @@ def on_message(f): io_loop.spawn_callback(next_write) log.error("queue get failed", exc_info=f.exc_info()) return + message, done = f.result() - io_loop.add_future( - self.io_stream.write(message), - lambda f: on_write(f, done), - ) + try: + # write() may raise if the stream was closed while we were + # waiting for an entry in the queue. + write_future = self.io_stream.write(message) + except Exception: + io_loop.spawn_callback(next_write) + done.set_exc_info(sys.exc_info()) + else: + io_loop.add_future(write_future, lambda f: on_write(f, done)) def next_write(): if self.io_stream.closed(): diff --git a/tests/tornado/test_connection.py b/tests/tornado/test_connection.py index 26860c5f..06825d2e 100644 --- a/tests/tornado/test_connection.py +++ b/tests/tornado/test_connection.py @@ -22,14 +22,16 @@ import mock import pytest +import socket import tornado.ioloop from tornado import gen from datetime import timedelta +from tornado.iostream import IOStream, StreamClosedError from tchannel import TChannel +from tchannel import messages from tchannel.errors import TimeoutError from tchannel.tornado import connection -from tchannel.messages import Types def dummy_headers(): @@ -58,12 +60,12 @@ def test_pings(tornado_pair): client.ping() ping = yield server.await() - assert ping.message_type == Types.PING_REQ + assert ping.message_type == messages.Types.PING_REQ server.pong() pong = yield client.await() - assert pong.message_type == Types.PING_RES + assert pong.message_type == messages.Types.PING_RES @pytest.mark.gen_test @@ -146,3 +148,19 @@ def test_other_error_on_read(tornado_pair): assert mock_log.error.call_count == 1 assert mock_log.info.call_count == 0 + + +@pytest.mark.gen_test +def test_writer_write_error(): + server, client = socket.socketpair() + reader = connection.Reader(IOStream(server)) + writer = connection.Writer(IOStream(client)) + + # one successful message first + yield writer.put(messages.PingRequestMessage()) + ping = yield reader.get() + assert isinstance(ping, messages.PingRequestMessage) + + writer.io_stream.close() + with pytest.raises(StreamClosedError): + yield writer.put(messages.PingResponseMessage()) From 63c46022589b3b634c18e715e745d57ade4fb671 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Fri, 11 Mar 2016 13:20:20 -0800 Subject: [PATCH 35/67] Update CHANGELOG --- CHANGES.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 0a4ca54e..2a51ffc6 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,8 @@ Changes by Version 0.21.9 (unreleased) ------------------- -- Nothing changed yet. +- Fixed a bug that caused silent failures when a write attempt was made to a + closed connection. 0.21.8 (2016-03-10) From 42c6f1884e1838a5e39e941baf28378c8e960296 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Fri, 11 Mar 2016 11:50:35 -0800 Subject: [PATCH 36/67] Ensure Connection.stream_request always consumes its exceptions --- tchannel/tornado/connection.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 0b2cf7dc..b4bf195b 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -555,13 +555,12 @@ def stream_request(self, request): """send the given request and response is not required""" request.close_argstreams() - stream_future = self._stream(request, self.request_message_factory) - - IOLoop.current().add_future( - stream_future, - lambda f: request.close_argstreams(force=True) - ) + def on_done(future): + future.exception() # < to avoid "unconsumed exception" logs + request.close_argstreams(force=True) + stream_future = self._stream(request, self.request_message_factory) + stream_future.add_done_callback(on_done) return stream_future def send_request(self, request): From 4f098192543f8384c5d0e42a9dc1d850366a4dca Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Fri, 11 Mar 2016 14:11:14 -0800 Subject: [PATCH 37/67] Fix unconsumed StreamClosedError logs The tests were logging unconsumed StreamClosedErrors a lot. The IOLoop was getting stopped before it could tell the future to call the callback, so the future exception was never getting consumed. I think this will solve the same problem for uses of `IOLoop.run_sync` that time out as well. --- CHANGES.rst | 1 + tchannel/tornado/connection.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 2a51ffc6..1b1ec97c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,6 +6,7 @@ Changes by Version - Fixed a bug that caused silent failures when a write attempt was made to a closed connection. +- Reduce ``StreamClosedError`` log noisiness for certain scenarios. 0.21.8 (2016-03-10) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index b4bf195b..70764f1a 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -663,7 +663,7 @@ def keep_reading(f): lambda f: io_loop.spawn_callback(self.fill), ) - io_loop.add_future(read_message(self.io_stream), keep_reading) + read_message(self.io_stream).add_done_callback(keep_reading) def get(self): """Receive the next message off the wire. From 41c22c721425544ab9c76e0e27edc4ec306bc486 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Fri, 11 Mar 2016 17:00:19 -0800 Subject: [PATCH 38/67] Make TChannel.advertise() idempotent and thread safe. Resloves #319. --- CHANGES.rst | 1 + tchannel/tchannel.py | 55 +++++++++++++++++++++------- tests/test_tchannel.py | 81 +++++++++++++++++++++++++++++++++++++----- 3 files changed, 117 insertions(+), 20 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 1b1ec97c..91d93700 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -7,6 +7,7 @@ Changes by Version - Fixed a bug that caused silent failures when a write attempt was made to a closed connection. - Reduce ``StreamClosedError`` log noisiness for certain scenarios. +- Make ``TChannel.advertise`` idempotent and thread-safe. 0.21.8 (2016-03-10) diff --git a/tchannel/tchannel.py b/tchannel/tchannel.py index b060d846..f8e2110f 100644 --- a/tchannel/tchannel.py +++ b/tchannel/tchannel.py @@ -121,6 +121,11 @@ def __init__(self, name, hostport=None, process_name=None, # register default health endpoint self.thrift.register(Meta)(health) + # advertise_response is the Future containing the response of calling + # advertise(). + self._advertise_response = None + self._advertise_lock = Lock() + def is_listening(self): return self._dep_tchannel.is_listening() @@ -260,7 +265,6 @@ def decorator(fn): else: return decorator(handler) - @gen.coroutine def advertise(self, routers=None, name=None, timeout=None, router_file=None): """Advertise with Hyperbahn. @@ -274,6 +278,9 @@ def advertise(self, routers=None, name=None, timeout=None, minute). Hyperbahn will eject us from the network if it doesn't get a re-advertise from us after 5 minutes. + This function may be called multiple times if it fails. If it + succeeds, all consecutive calls are ignored. + :param list routers: A seed list of known Hyperbahn addresses to attempt contact with. Entries should be of the form ``"host:port"``. @@ -315,14 +322,38 @@ def advertise(self, routers=None, name=None, timeout=None, log.exception('Failed to read seed routers list.') raise - dep_result = yield self._dep_tchannel.advertise( - routers=routers, - name=name, - timeout=timeout - ) - - body = yield dep_result.get_body() - headers = yield dep_result.get_header() - response = Response(json.loads(body), headers or {}) - - raise gen.Return(response) + @gen.coroutine + def _advertise(): + result = yield self._dep_tchannel.advertise( + routers=routers, + name=name, + timeout=timeout, + ) + body = yield result.get_body() + headers = yield result.get_header() + response = Response(json.loads(body), headers or {}) + raise gen.Return(response) + + def _on_advertise(future): + if not future.exception(): + return + + # If the request failed, clear the response so that we can try + # again. + with self._advertise_lock: + # `is` comparison to ensure we're not deleting another Future. + if self._advertise_response is future: + self._advertise_response = None + + with self._advertise_lock: + if self._advertise_response is not None: + return self._advertise_response + future = self._advertise_response = _advertise() + + # We call add_done_callback here rather than when we call _advertise() + # because if the future has already resolved by the time we call + # add_done_callback, the callback will immediately be executed. The + # callback will try to acquire the advertise_lock which we already + # hold and end up in a deadlock. + future.add_done_callback(_on_advertise) + return future diff --git a/tests/test_tchannel.py b/tests/test_tchannel.py index 967ef0f1..a480656c 100644 --- a/tests/test_tchannel.py +++ b/tests/test_tchannel.py @@ -33,6 +33,7 @@ import pytest from tornado import gen +from tchannel.tornado.stream import InMemStream from tchannel import TChannel, Request, Response, schemes, errors, thrift from tchannel.errors import AlreadyListeningError, TimeoutError from tchannel.event import EventHook @@ -323,17 +324,18 @@ def test_event_hook_register(): mock_register.called +@pytest.fixture +def router_file(): + return os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data/hosts.json') + + @pytest.mark.gen_test -def test_advertise_should_take_a_router_file(): +def test_advertise_should_take_a_router_file(router_file): from tchannel.tornado.response import Response as TornadoResponse - host_path = os.path.join( - os.path.dirname(os.path.realpath(__file__)), - 'data/hosts.json', - ) - tchannel = TChannel(name='client') - with open(host_path, 'r') as json_data: + with open(router_file, 'r') as json_data: routers = json.load(json_data) with ( @@ -345,7 +347,7 @@ def test_advertise_should_take_a_router_file(): f = gen.Future() mock_advertise.return_value = f f.set_result(TornadoResponse()) - tchannel.advertise(router_file=host_path) + tchannel.advertise(router_file=router_file) mock_advertise.assert_called_once_with(ANY, routers=routers, name=ANY, timeout=ANY) @@ -362,6 +364,69 @@ def test_advertise_should_raise_on_invalid_router_file(): yield tchannel.advertise(routers='lala', router_file='?~~lala') +@pytest.mark.gen_test +def test_advertise_is_idempotent(router_file): + from tchannel.tornado.response import Response as TornadoResponse + + def new_advertise(*args, **kwargs): + f = gen.Future() + f.set_result(TornadoResponse( + argstreams=[closed_stream(b'{}') for i in range(3)], + )) + return f + + tchannel = TChannel(name='client') + with patch( + 'tchannel.tornado.TChannel.advertise', autospec=True + ) as mock_advertise: + mock_advertise.side_effect = new_advertise + + yield tchannel.advertise(router_file=router_file) + yield tchannel.advertise(router_file=router_file) + yield tchannel.advertise(router_file=router_file) + + assert mock_advertise.call_count == 1 + + +@pytest.mark.gen_test +def test_advertise_is_retryable(router_file): + from tchannel.tornado.response import Response as TornadoResponse + + def new_advertise(*args, **kwargs): + f = gen.Future() + f.set_result(TornadoResponse( + argstreams=[closed_stream(b'{}') for i in range(3)], + )) + return f + + tchannel = TChannel(name='client') + with patch( + 'tchannel.tornado.TChannel.advertise', autospec=True + ) as mock_advertise: + f = gen.Future() + f.set_exception(Exception('great sadness')) + mock_advertise.return_value = f + + with pytest.raises(Exception) as e: + yield tchannel.advertise(router_file=router_file) + assert 'great sadness' in str(e) + assert mock_advertise.call_count == 1 + + mock_advertise.side_effect = new_advertise + yield tchannel.advertise(router_file=router_file) + yield tchannel.advertise(router_file=router_file) + yield tchannel.advertise(router_file=router_file) + yield tchannel.advertise(router_file=router_file) + + assert mock_advertise.call_count == 2 + + +def closed_stream(body): + stream = InMemStream(body) + stream.close() + return stream + + def test_listen_different_ports(): server = TChannel(name='test_server') server.listen() From d93c68016108c8bd924264beaf35ceef5dba37cb Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Mon, 14 Mar 2016 15:33:56 -0700 Subject: [PATCH 39/67] Preparing release 0.21.9 --- CHANGES.rst | 2 +- tchannel/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 91d93700..09c67e71 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,7 +1,7 @@ Changes by Version ================== -0.21.9 (unreleased) +0.21.9 (2016-03-14) ------------------- - Fixed a bug that caused silent failures when a write attempt was made to a diff --git a/tchannel/__init__.py b/tchannel/__init__.py index 9cb34aee..4806ab7c 100644 --- a/tchannel/__init__.py +++ b/tchannel/__init__.py @@ -22,7 +22,7 @@ absolute_import, division, print_function, unicode_literals ) -__version__ = '0.21.9.dev0' +__version__ = '0.21.9' # Update setup.py when changing this. zest.releaser doesn't support updating # both of them yet. From a9dce498fcbe3ebfa1fb826e1f8f452b9c43d0c6 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Mon, 14 Mar 2016 15:34:21 -0700 Subject: [PATCH 40/67] Back to development: 0.21.10 --- CHANGES.rst | 6 ++++++ tchannel/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 09c67e71..0df7391d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,12 @@ Changes by Version ================== +0.21.10 (unreleased) +-------------------- + +- Nothing changed yet. + + 0.21.9 (2016-03-14) ------------------- diff --git a/tchannel/__init__.py b/tchannel/__init__.py index 4806ab7c..0eedec9e 100644 --- a/tchannel/__init__.py +++ b/tchannel/__init__.py @@ -22,7 +22,7 @@ absolute_import, division, print_function, unicode_literals ) -__version__ = '0.21.9' +__version__ = '0.21.10.dev0' # Update setup.py when changing this. zest.releaser doesn't support updating # both of them yet. From 94200202129d3a2f1591645b287e007cc08a218e Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Fri, 11 Mar 2016 11:51:54 -0800 Subject: [PATCH 41/67] connection: read_message should propagate StreamClosedError If the IOStream has already been closed by the time read_message is called, it will fail with a synchronous StreamClosedError which read_message needs to catch. --- tchannel/tornado/connection.py | 11 ++++++++--- tests/tornado/test_connection.py | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 70764f1a..993bd412 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -24,7 +24,6 @@ import os import socket import sys -from functools import partial import tornado.gen import tornado.iostream @@ -816,8 +815,14 @@ def on_read_size(future): size = frame.frame_rw.size_rw.read(BytesIO(size_bytes)) io_loop.add_future( stream.read_bytes(size - FRAME_SIZE_WIDTH), - partial(on_body, size), + lambda f: on_body(size, f) ) - io_loop.add_future(stream.read_bytes(FRAME_SIZE_WIDTH), on_read_size) + try: + # read_bytes may fail if the stream has already been closed + read_size_future = stream.read_bytes(FRAME_SIZE_WIDTH) + except Exception: + answer.set_exc_info(sys.exc_info()) + else: + read_size_future.add_done_callback(on_read_size) return answer diff --git a/tests/tornado/test_connection.py b/tests/tornado/test_connection.py index 06825d2e..6ecc32f6 100644 --- a/tests/tornado/test_connection.py +++ b/tests/tornado/test_connection.py @@ -164,3 +164,23 @@ def test_writer_write_error(): writer.io_stream.close() with pytest.raises(StreamClosedError): yield writer.put(messages.PingResponseMessage()) + + +@pytest.mark.gen_test +def test_reader_read_error(): + server, client = socket.socketpair() + reader = connection.Reader(IOStream(server)) + writer = connection.Writer(IOStream(client)) + + yield writer.put(messages.PingRequestMessage()) + ping = yield reader.get() + assert isinstance(ping, messages.PingRequestMessage) + + reader.io_stream.close() + future = reader.get() + with pytest.raises(gen.TimeoutError): + yield gen.with_timeout(timedelta(milliseconds=100), future) + + # TODO(abg): Need to fix Reader.get to actually raise StreamClosedError + # rather than freezing forever. Then this can be changed to assert that a + # StreamClosedError is raised. From 5ff6eaf27b8ccd6e90da8feecad1b37b4b5b5cc9 Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Tue, 15 Mar 2016 18:33:23 -0700 Subject: [PATCH 42/67] Include a cn annotation for all spans --- CHANGES.rst | 2 +- tchannel/zipkin/zipkin_trace.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 0df7391d..aa51a13b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,7 @@ Changes by Version 0.21.10 (unreleased) -------------------- -- Nothing changed yet. +- Zipkin traces now include a server-side 'cn' annotation to identify callers. 0.21.9 (2016-03-14) diff --git a/tchannel/zipkin/zipkin_trace.py b/tchannel/zipkin/zipkin_trace.py index 59d4db3b..c5d60ba4 100644 --- a/tchannel/zipkin/zipkin_trace.py +++ b/tchannel/zipkin/zipkin_trace.py @@ -82,8 +82,13 @@ def before_receive_request(self, request): if not request.tracing.traceflags: return - ann = annotation.server_recv() - request.tracing.annotations.append(ann) + request.tracing.annotations.append(annotation.server_recv()) + + caller_name = request.headers.get('cn') + if caller_name: + request.tracing.annotations.append( + annotation.string('cn', caller_name), + ) def after_send_response(self, response): if not response.tracing.traceflags: From b2e6d1543dff69fa81edea917743a0cc6e2ffb47 Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Wed, 16 Mar 2016 08:41:00 -0700 Subject: [PATCH 43/67] reduce unconsumed messages to info --- CHANGES.rst | 3 +++ tchannel/tornado/connection.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index aa51a13b..7a8cdc15 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,9 @@ Changes by Version -------------------- - Zipkin traces now include a server-side 'cn' annotation to identify callers. +- Reduced "unconsumed message" warnings to INFO. These are typically generated + when Hyperbahn garbage collects your process due to a timed-out + advertisement. 0.21.9 (2016-03-14) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 993bd412..594c9cd2 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -241,7 +241,7 @@ def _loop(self): # Recently timed out. Safe to ignore. continue - log.warn('Unconsumed message %s', message) + log.info('Unconsumed message %s', message) # Basically, the only difference between send and write is that send # sets up a Future to get the response. That's ideal for peers making From 51cca951173b2900ffcefdc318cbe8ddecb06472 Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Thu, 17 Mar 2016 13:14:56 -0700 Subject: [PATCH 44/67] catch StreamClosedError on handshake --- CHANGES.rst | 2 ++ tchannel/tornado/connection.py | 22 ++++++++++++---------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 7a8cdc15..f7b182cb 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,8 @@ Changes by Version - Reduced "unconsumed message" warnings to INFO. These are typically generated when Hyperbahn garbage collects your process due to a timed-out advertisement. +- Handshake timeouts were incorrectly being surfaced as StreamClosedError but + are now raised as NetworkError. 0.21.9 (2016-03-14) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 594c9cd2..45354c25 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -399,22 +399,24 @@ def outgoing(cls, hostport, process_name=None, serve_hostport=None, log.debug("Connecting to %s", hostport) try: yield stream.connect((host, int(port))) + + connection = cls(stream, tchannel, direction=OUTGOING) + + log.debug("Performing handshake with %s", hostport) + + yield connection.initiate_handshake(headers={ + 'host_port': serve_hostport, + 'process_name': process_name, + 'tchannel_language': TCHANNEL_LANGUAGE, + 'tchannel_language_version': TCHANNEL_LANGUAGE_VERSION, + 'tchannel_version': TCHANNEL_VERSION, + }) except (StreamClosedError, socket.error) as e: log.warn("Couldn't connect to %s", hostport) raise NetworkError( "Couldn't connect to %s" % hostport, e ) - connection = cls(stream, tchannel, direction=OUTGOING) - log.debug("Performing handshake with %s", hostport) - yield connection.initiate_handshake(headers={ - 'host_port': serve_hostport, - 'process_name': process_name, - 'tchannel_language': TCHANNEL_LANGUAGE, - 'tchannel_language_version': TCHANNEL_LANGUAGE_VERSION, - 'tchannel_version': TCHANNEL_VERSION, - }) - if handler: connection.serve(handler) From 1d3b5f807d1f9f6ae89727d97f15baf24cf98761 Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Thu, 17 Mar 2016 13:19:03 -0700 Subject: [PATCH 45/67] reduce default sample rate --- CHANGES.rst | 1 + tchannel/zipkin/zipkin_trace.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index f7b182cb..83817f77 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,6 +10,7 @@ Changes by Version advertisement. - Handshake timeouts were incorrectly being surfaced as StreamClosedError but are now raised as NetworkError. +- Reduced default tracing sample rate from 100% to 1%. 0.21.9 (2016-03-14) diff --git a/tchannel/zipkin/zipkin_trace.py b/tchannel/zipkin/zipkin_trace.py index c5d60ba4..ecd96c69 100644 --- a/tchannel/zipkin/zipkin_trace.py +++ b/tchannel/zipkin/zipkin_trace.py @@ -29,7 +29,7 @@ class ZipkinTraceHook(EventHook): """generate zipkin-style span for tracing""" - DEFAULT_RATE = 1.0 + DEFAULT_RATE = 0.01 def __init__(self, tchannel=None, dst=None, sample_rate=None): """Log zipkin style trace. From 08a7e6686266548f0b2e79bc20987ca9d11a1dbc Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Thu, 17 Mar 2016 14:45:37 -0700 Subject: [PATCH 46/67] Log all response write errors except StreamClosedErorrs This fixes yet another "Future exception was never retrieved" issue. --- tchannel/tornado/dispatch.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tchannel/tornado/dispatch.py b/tchannel/tornado/dispatch.py index 486437f7..b578f48f 100644 --- a/tchannel/tornado/dispatch.py +++ b/tchannel/tornado/dispatch.py @@ -26,6 +26,7 @@ import tornado import tornado.gen from tornado import gen +from tornado.iostream import StreamClosedError from tchannel.request import Request from tchannel.request import TransportHeaders @@ -170,7 +171,18 @@ def handle_call(self, request, connection): serializer=handler.resp_serializer, ) - connection.post_response(response) + def _on_post_response(future): + if not future.exception(): + return + + # Failed to write response because client disappeared. Nothing to + # do. + if isinstance(future.exception(), StreamClosedError): + return + + log.error('failed to write response', exc_info=future.exc_info()) + + connection.post_response(response).add_done_callback(_on_post_response) try: # New impl - the handler takes a request and returns a response From 7943b7e01123fce071ac6e97bb9ecf2125b517ed Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Thu, 17 Mar 2016 15:54:31 -0700 Subject: [PATCH 47/67] Preparing release 0.21.10 --- CHANGES.rst | 2 +- tchannel/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 83817f77..a1044158 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,7 +1,7 @@ Changes by Version ================== -0.21.10 (unreleased) +0.21.10 (2016-03-17) -------------------- - Zipkin traces now include a server-side 'cn' annotation to identify callers. diff --git a/tchannel/__init__.py b/tchannel/__init__.py index 0eedec9e..094892b7 100644 --- a/tchannel/__init__.py +++ b/tchannel/__init__.py @@ -22,7 +22,7 @@ absolute_import, division, print_function, unicode_literals ) -__version__ = '0.21.10.dev0' +__version__ = '0.21.10' # Update setup.py when changing this. zest.releaser doesn't support updating # both of them yet. From 9d68797cbc380a298e6703b858d1b24ddd269c4f Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Thu, 17 Mar 2016 15:55:00 -0700 Subject: [PATCH 48/67] Back to development: 0.21.11 --- CHANGES.rst | 6 ++++++ tchannel/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index a1044158..b15c426e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,12 @@ Changes by Version ================== +0.21.11 (unreleased) +-------------------- + +- Nothing changed yet. + + 0.21.10 (2016-03-17) -------------------- diff --git a/tchannel/__init__.py b/tchannel/__init__.py index 094892b7..31cffaa2 100644 --- a/tchannel/__init__.py +++ b/tchannel/__init__.py @@ -22,7 +22,7 @@ absolute_import, division, print_function, unicode_literals ) -__version__ = '0.21.10' +__version__ = '0.21.11.dev0' # Update setup.py when changing this. zest.releaser doesn't support updating # both of them yet. From dbbe2c1f8b4c4b55d14ee5aeb72c80e57998e6e1 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Mon, 14 Mar 2016 19:30:14 -0700 Subject: [PATCH 49/67] Swap out tornado Queue for custom implementation. --- CHANGES.rst | 3 +- tchannel/_queue.py | 195 +++++++++++++++++++++ tchannel/tornado/connection.py | 13 +- tests/test_queue.py | 282 +++++++++++++++++++++++++++++++ tests/tornado/test_connection.py | 19 +-- 5 files changed, 494 insertions(+), 18 deletions(-) create mode 100644 tchannel/_queue.py create mode 100644 tests/test_queue.py diff --git a/CHANGES.rst b/CHANGES.rst index b15c426e..20a188ef 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,8 @@ Changes by Version 0.21.11 (unreleased) -------------------- -- Nothing changed yet. +- Fixed a bug where certain errors while reading requests would propagate as + TimeoutErrors. 0.21.10 (2016-03-17) diff --git a/tchannel/_queue.py b/tchannel/_queue.py new file mode 100644 index 00000000..7806f519 --- /dev/null +++ b/tchannel/_queue.py @@ -0,0 +1,195 @@ +# Copyright (c) 2015 Uber Technologies, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from __future__ import ( + absolute_import, unicode_literals, division, print_function +) + +from collections import deque + +from tornado.gen import TimeoutError +from tornado.ioloop import IOLoop +from tornado.queues import QueueEmpty +from tornado.concurrent import Future + +__all__ = ['Queue', 'QueueEmpty', 'TimeoutError'] + + +class Queue(object): + """A specialized version of Tornado's Queue class. + + This class is an almost drop-in replacement for Tornado's Queue class. It + behaves similar to Tornado's Queue except it provides a ``terminate()`` + function to fail all outstanding operations. + + :param int maxsize: + If specified, this is the buffer size for the queue. Once the capacity + is reached, we will start applying backpressure on putters. If + unspecified or None, the queue is unbuffered. + """ + + __slots__ = ('_getters', '_putters', '_surplus', 'maxsize') + + # How this works: + # + # Reads: + # - Check if we have a value sitting in surplus. If yes, use that. + # Otherwise, + # - Check if we have a putter waiting to provide a value. If yes, use + # that. Otherwise, + # - Store the future in getters for later. + # + # Writes: + # - Check if we have a future waiting for a value in getters. If yes, use + # that. Othrewise, + # - Check if we have room in surplus. If yes, use that. Otherwise, + # - Store the value and future in putters for later. + # + # Invariants: + # - Either getters is empty or both, surplus and putters are empty. + # - If putters is non-empty, surplus is maxsize (which is more than 0). + + def __init__(self, maxsize=None): + if maxsize is None: + maxsize = 0 + self.maxsize = maxsize + + # collection of futures waiting for values + self._getters = deque() + + # collection of (value, future) pairs waiting to put values. + self._putters = deque() + + # collection of values that have not yet been consumed + self._surplus = deque() + + def terminate(self, exc): + """Terminate all outstanding get requests with the given exception. + + :param exc: + An exception or an exc_info triple. + """ + if isinstance(exc, tuple): + fail = (lambda f: f.set_exc_info(exc)) + else: + fail = (lambda f: f.set_exception(exc)) + + while self._putters: + _, future = self._putters.popleft() + if future.running(): + fail(future) + + while self._getters: + future = self._getters.popleft() + if future.running(): + fail(future) + + def __receive_put(self): + """Receive a value from a waiting putter.""" + while self._putters: + value, future = self._putters.popleft() + if future.running(): + self._surplus.append(value) + future.set_result(None) + return + + def get(self, timeout=None): + """Get the next item from the queue. + + Returns a future that resolves to the next item. + + :param timeout: + If set, the future will resolve to a TimeoutError if a value is + not received within the given time. The value for ``timeout`` may + be anything accepted by ``IOLoop.add_timeout`` (a ``timedelta`` or + an **absolute** time relative to ``IOLoop.time``). + """ + self.__receive_put() + + answer = Future() + if self._surplus: + answer.set_result(self._surplus.popleft()) + return answer + + # Wait for a value + if timeout is not None: + _add_timeout(timeout, answer) + self._getters.append(answer) + return answer + + def get_nowait(self): + """Returns a value from the queue without waiting. + + Raises ``QueueEmpty`` if no values are available right now. + """ + self.__receive_put() + + if self._surplus: + return self._surplus.popleft() + raise QueueEmpty() + + def put(self, value, timeout=None): + """Puts an item into the queue. + + Returns a future that resolves to None once the value has been + accepted by the queue. + + The value is accepted immediately if there is room in the queue or + maxsize was not specified. + + :param timeout: + If set, the future will resolve to a TimeoutError if a value is + not accepted within the given time. The value for ``timeout`` may + be anything accepted by ``IOLoop.add_timeout`` (a ``timedelta`` or + an **absolute** time relative to ``IOLoop.time``). + """ + + answer = Future() + + # If there's a getter waiting, send it the result. + while self._getters: + future = self._getters.popleft() + if future.running(): + future.set_result(value) + answer.set_result(None) + return answer + + # We have room. Put the value into surplus. + if self.maxsize < 1 or len(self._surplus) < self.maxsize: + self._surplus.append(value) + answer.set_result(None) + return answer + + # Wait until there is room. + if timeout is not None: + _add_timeout(timeout, answer) + self._putters.append((value, answer)) + return answer + + +def _add_timeout(timeout, future): + io_loop = IOLoop.current() + + def on_timeout(): + if future.running(): + future.set_exception(TimeoutError("timed out")) + + t = io_loop.add_timeout(timeout, on_timeout) + future.add_done_callback(lambda _: io_loop.remove_timeout(t)) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 45354c25..dbdcc904 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -27,7 +27,6 @@ import tornado.gen import tornado.iostream -import tornado.queues as queues from tornado import stack_context from tornado.ioloop import IOLoop @@ -36,6 +35,7 @@ from .. import errors from .. import frame from .. import messages +from .. import _queue as queues from .._future import fail_to from ..errors import NetworkError from ..errors import TChannelError @@ -192,7 +192,10 @@ def _loop(self): self._loop_running = True while not self.closed: - message = yield self.reader.get() + try: + message = yield self.reader.get() + except StreamClosedError: + break # TODO: There should probably be a try-catch on the yield. if message.message_type in self.CALL_REQ_TYPES: @@ -642,7 +645,7 @@ class Reader(object): def __init__(self, io_stream, capacity=None): capacity = capacity or 64 - self.queue = tornado.queues.Queue(capacity) + self.queue = queues.Queue(capacity) self.filling = False self.io_stream = io_stream @@ -654,7 +657,7 @@ def fill(self): def keep_reading(f): if f.exception(): self.filling = False - # This is usually StreamClosed due to a client disconnecting. + self.queue.terminate(f.exc_info()) if isinstance(f.exception(), StreamClosedError): return log.info("read error", exc_info=f.exc_info()) else: @@ -683,7 +686,7 @@ class Writer(object): def __init__(self, io_stream, capacity=None): capacity = capacity or 64 - self.queue = tornado.queues.Queue(capacity) + self.queue = queues.Queue(capacity) self.draining = False self.io_stream = io_stream # Tracks message IDs for this connection. diff --git a/tests/test_queue.py b/tests/test_queue.py new file mode 100644 index 00000000..b81cae90 --- /dev/null +++ b/tests/test_queue.py @@ -0,0 +1,282 @@ +# Copyright (c) 2015 Uber Technologies, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from __future__ import ( + absolute_import, unicode_literals, division, print_function +) + +import sys +import pytest +from datetime import timedelta +from six.moves import range +from tornado import gen + +from tchannel._queue import Queue, QueueEmpty, TimeoutError + + +def assert_invariant(q): + assert not (q._getters and (q._surplus or q._putters)) + assert (not q._putters or len(q._surplus) == q.maxsize) + + +@pytest.fixture +def items(): + return list(range(100)) + + +@pytest.mark.gen_test +def test_unbounded_put_then_get(items): + queue = Queue() + assert_invariant(queue) + + for item in items: + yield queue.put(item) + assert_invariant(queue) + + got_futures = [] + for i in range(len(items)): + got_futures.append(queue.get()) + assert_invariant(queue) + + got = yield got_futures + assert got == items + + with pytest.raises(TimeoutError): + yield queue.get(timedelta(seconds=0.1)) + + +@pytest.mark.gen_test +def test_bounded_put_then_get(items): + queue = Queue(5) + assert_invariant(queue) + + for item in items[:5]: + yield queue.put(item) + assert_invariant(queue) + + put_futures = [] + for item in items[5:]: + put_futures.append(queue.put(item)) + + got = [] + for i in range(len(items)): + item = yield queue.get() + assert_invariant(queue) + got.append(item) + + assert got == items + yield put_futures + + +@pytest.mark.gen_test +def test_bounded_put_timeout_then_get(items): + queue = Queue(1) + assert_invariant(queue) + + yield queue.put(items[0]) + + put_futures = [] + for item in range(len(items)): + put_futures.append(queue.put(item, timedelta(seconds=0.001))) + + yield gen.sleep(0.01) + + get_futures = [] + for item in range(len(items)): + get_futures.append(queue.get()) + + for future in put_futures: + with pytest.raises(TimeoutError): + yield future + + for item in items[1:]: + yield queue.put(item) + + got = yield get_futures + assert got == items + + +@pytest.mark.gen_test +def test_unbounded_put_then_get_nowait(items): + queue = Queue() + + for item in items: + yield queue.put(item) + assert_invariant(queue) + + got = [] + for i in range(len(items)): + got.append(queue.get_nowait()) + assert_invariant(queue) + + assert got == items + + with pytest.raises(QueueEmpty): + queue.get_nowait() + + +@pytest.mark.gen_test +def test_unbounded_get_then_put(items): + queue = Queue() + + got_futures = [] + for i in range(len(items)): + got_futures.append(queue.get()) + assert_invariant(queue) + + for item in items: + yield queue.put(item) + assert_invariant(queue) + + got = yield got_futures + assert got == items + + +@pytest.mark.gen_test +def test_unbounded_expired_gets_then_get_first(): + queue = Queue() + + expired_gets = [] + for i in range(99): + expired_gets.append(queue.get(timedelta(seconds=0.001))) + assert_invariant(queue) + + yield gen.sleep(0.01) # 99 * 0.001 = 0.099 + + # get the value before putting it + real_get = queue.get() + yield queue.put(42) + assert 42 == (yield real_get) + assert_invariant(queue) + + for future in expired_gets: + with pytest.raises(TimeoutError): + yield future + + +@pytest.mark.gen_test +def test_unbounded_expired_gets_then_put_first(): + queue = Queue() + + expired_gets = [] + for i in range(99): + expired_gets.append(queue.get(timedelta(seconds=0.001))) + assert_invariant(queue) + + yield gen.sleep(0.01) + + # put the value first, get afterwards + yield queue.put(42) + assert 42 == (yield queue.get()) + assert_invariant(queue) + + for future in expired_gets: + with pytest.raises(TimeoutError): + yield future + + +@pytest.mark.gen_test +def test_terminate_get_with_exception(): + + class GreatSadness(Exception): + pass + + queue = Queue() + expired_gets = [] + terminated_gets = [] + + flag = True + for i in range(99): + if flag: + expired_gets.append(queue.get(timedelta(seconds=0.001))) + else: + terminated_gets.append(queue.get(timedelta(seconds=1))) + flag = not flag + + yield gen.sleep(0.01) + + queue.terminate(GreatSadness()) + + for future in expired_gets: + with pytest.raises(TimeoutError): + yield future + + for future in terminated_gets: + with pytest.raises(GreatSadness): + yield future + + +@pytest.mark.gen_test +def test_terminate_put_with_exception(): + + class GreatSadness(Exception): + pass + + queue = Queue(10) + expired_puts = [] + terminated_puts = [] + + for i in range(10): + yield queue.put(i) + + flag = True + for i in range(99): + if flag: + expired_puts.append(queue.put(i, timedelta(seconds=0.001))) + else: + terminated_puts.append(queue.put(i, timedelta(seconds=1))) + flag = not flag + + yield gen.sleep(0.01) + + try: + raise GreatSadness() + except GreatSadness: + queue.terminate(sys.exc_info()) + else: + assert False, "expected failure" + + for future in expired_puts: + with pytest.raises(TimeoutError): + yield future + + for future in terminated_puts: + with pytest.raises(GreatSadness): + yield future + + +@pytest.mark.gen_test +def test_get_timeout_too_late(): + queue = Queue() + future = queue.get(timedelta(seconds=0.01)) + yield queue.put(42) + yield gen.sleep(0.01) + assert 42 == (yield future) + + +@pytest.mark.gen_test +def test_put_timeout_too_late(): + queue = Queue(1) + yield queue.put(1) + future = queue.put(2, timedelta(seconds=0.01)) + assert (yield queue.get()) == 1 + assert (yield queue.get()) == 2 + yield gen.sleep(0.01) + yield future diff --git a/tests/tornado/test_connection.py b/tests/tornado/test_connection.py index 6ecc32f6..c1cc69c8 100644 --- a/tests/tornado/test_connection.py +++ b/tests/tornado/test_connection.py @@ -25,12 +25,11 @@ import socket import tornado.ioloop from tornado import gen -from datetime import timedelta from tornado.iostream import IOStream, StreamClosedError from tchannel import TChannel from tchannel import messages -from tchannel.errors import TimeoutError +from tchannel.errors import TimeoutError, ReadError from tchannel.tornado import connection @@ -126,8 +125,8 @@ def test_stream_closed_error_on_read(tornado_pair): client.close() with mock.patch.object(connection, 'log') as mock_log: # :( - with pytest.raises(gen.TimeoutError): - yield gen.with_timeout(timedelta(milliseconds=100), future) + with pytest.raises(StreamClosedError): + yield future assert mock_log.error.call_count == 0 assert mock_log.info.call_count == 1 @@ -143,8 +142,8 @@ def test_other_error_on_read(tornado_pair): yield client.connection.write(b'\x00\x02\x00\x00') # bad payload with mock.patch.object(connection, 'log') as mock_log: # :( - with pytest.raises(gen.TimeoutError): - yield gen.with_timeout(timedelta(milliseconds=100), future) + with pytest.raises(ReadError): + yield future assert mock_log.error.call_count == 1 assert mock_log.info.call_count == 0 @@ -178,9 +177,5 @@ def test_reader_read_error(): reader.io_stream.close() future = reader.get() - with pytest.raises(gen.TimeoutError): - yield gen.with_timeout(timedelta(milliseconds=100), future) - - # TODO(abg): Need to fix Reader.get to actually raise StreamClosedError - # rather than freezing forever. Then this can be changed to assert that a - # StreamClosedError is raised. + with pytest.raises(StreamClosedError): + yield future From 393d3074e8a6f15de88df1241e26349db218cf6d Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Wed, 16 Mar 2016 14:10:33 -0700 Subject: [PATCH 50/67] connection/reader: Remove need for terminate() --- tchannel/tornado/connection.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index dbdcc904..867458b0 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -655,15 +655,17 @@ def fill(self): io_loop = IOLoop.current() def keep_reading(f): + put_future = self.queue.put(f) + if f.exception(): self.filling = False - self.queue.terminate(f.exc_info()) if isinstance(f.exception(), StreamClosedError): return log.info("read error", exc_info=f.exc_info()) else: return log.error("read error", exc_info=f.exc_info()) + # connect these two in the case when put blocks - self.queue.put(f.result()).add_done_callback( + put_future.add_done_callback( lambda f: io_loop.spawn_callback(self.fill), ) @@ -678,7 +680,20 @@ def get(self): if not self.filling: self.fill() - return self.queue.get() + answer = tornado.gen.Future() + + def _on_result(future): + if future.exception(): + return answer.set_exc_info(future.exc_info()) + answer.set_result(future.result()) + + def _on_item(future): + if future.exception(): + return answer.set_exc_info(future.exc_info()) + future.result().add_done_callback(_on_result) + + self.queue.get().add_done_callback(_on_item) + return answer class Writer(object): From 923b9d89fcdf1262917c81f37d815ea605e62e09 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Wed, 16 Mar 2016 11:39:41 -0700 Subject: [PATCH 51/67] queue: much simpler queue implementation. This implementation does not support terminate() or backpressure, but it's a much simpler implementation and easier to make thread-safe. --- tchannel/_queue.py | 240 ++++++++++++++--------------- tchannel/tornado/connection.py | 4 +- tests/test_queue.py | 270 ++++++++++++--------------------- 3 files changed, 213 insertions(+), 301 deletions(-) diff --git a/tchannel/_queue.py b/tchannel/_queue.py index 7806f519..cbf7101f 100644 --- a/tchannel/_queue.py +++ b/tchannel/_queue.py @@ -22,116 +22,105 @@ absolute_import, unicode_literals, division, print_function ) -from collections import deque +import threading +from collections import namedtuple -from tornado.gen import TimeoutError from tornado.ioloop import IOLoop from tornado.queues import QueueEmpty from tornado.concurrent import Future -__all__ = ['Queue', 'QueueEmpty', 'TimeoutError'] +__all__ = ['Queue', 'QueueEmpty'] -class Queue(object): - """A specialized version of Tornado's Queue class. +Node = namedtuple('Node', 'value next') - This class is an almost drop-in replacement for Tornado's Queue class. It - behaves similar to Tornado's Queue except it provides a ``terminate()`` - function to fail all outstanding operations. - :param int maxsize: - If specified, this is the buffer size for the queue. Once the capacity - is reached, we will start applying backpressure on putters. If - unspecified or None, the queue is unbuffered. - """ +class Queue(object): + """An unbounded, thread-safe asynchronous queue.""" - __slots__ = ('_getters', '_putters', '_surplus', 'maxsize') + __slots__ = ('_get', '_put', '_lock') # How this works: # - # Reads: - # - Check if we have a value sitting in surplus. If yes, use that. - # Otherwise, - # - Check if we have a putter waiting to provide a value. If yes, use - # that. Otherwise, - # - Store the future in getters for later. + # _get and _put are futures maintaining pointers to a linked list of + # futures. The linked list is implemented as Node objects holding the + # value and the next future. + # + # Node + # +---+---+ +---+---+ E: Empty future + # | 1 | F-|-->| 2 | E | F: Filled future + # +---+---+ +---+---+ + # ^ ^ + # +---+ | +---+ | + # | F-|-+ | F-|-+ + # +---+ +---+ + # _get _put + # + # When there's a put, we fill the current empty future with a Node + # containing the value and a pointer to the next, newly created empty + # future. + # + # +---+---+ +---+---+ +---+---+ + # | 1 | F-|-->| 2 | F-|-->| 3 | E | + # +---+---+ +---+---+ +---+---+ + # ^ ^ + # +---+ | +---+ | + # | F-|-+ | F-|-+ + # +---+ +---+ + # _get _put # - # Writes: - # - Check if we have a future waiting for a value in getters. If yes, use - # that. Othrewise, - # - Check if we have room in surplus. If yes, use that. Otherwise, - # - Store the value and future in putters for later. + # When there's a get, we read the value from the current Node, and move + # _get to the next future. # - # Invariants: - # - Either getters is empty or both, surplus and putters are empty. - # - If putters is non-empty, surplus is maxsize (which is more than 0). + # +---+---+ +---+---+ + # | 2 | F-|-->| 3 | E | + # +---+---+ +---+---+ + # ^ ^ + # +---+ | +---+ | + # | F-|-+ | F-|-+ + # +---+ +---+ + # _get _put + + def __init__(self): + self._lock = threading.Lock() + + # Space for the next Node. + hole = Future() + + # Pointer to the Future that will contain the next Node. + self._get = Future() + self._get.set_result(hole) + + # Pointer to the next empty Future that should be filled with a Node. + self._put = Future() + self._put.set_result(hole) + + def put(self, value): + """Puts an item into the queue. - def __init__(self, maxsize=None): - if maxsize is None: - maxsize = 0 - self.maxsize = maxsize + Returns a Future that resolves to None once the value has been + accepted by the queue. + """ + io_loop = IOLoop.current() + new_hole = Future() - # collection of futures waiting for values - self._getters = deque() + new_put = Future() + new_put.set_result(new_hole) - # collection of (value, future) pairs waiting to put values. - self._putters = deque() + with self._lock: + self._put, put = new_put, self._put - # collection of values that have not yet been consumed - self._surplus = deque() + answer = Future() - def terminate(self, exc): - """Terminate all outstanding get requests with the given exception. + def _on_put(future): + if future.exception(): # pragma: no cover (never happens) + return answer.set_exc_info(future.exc_info()) - :param exc: - An exception or an exc_info triple. - """ - if isinstance(exc, tuple): - fail = (lambda f: f.set_exc_info(exc)) - else: - fail = (lambda f: f.set_exception(exc)) - - while self._putters: - _, future = self._putters.popleft() - if future.running(): - fail(future) - - while self._getters: - future = self._getters.popleft() - if future.running(): - fail(future) - - def __receive_put(self): - """Receive a value from a waiting putter.""" - while self._putters: - value, future = self._putters.popleft() - if future.running(): - self._surplus.append(value) - future.set_result(None) - return - - def get(self, timeout=None): - """Get the next item from the queue. - - Returns a future that resolves to the next item. - - :param timeout: - If set, the future will resolve to a TimeoutError if a value is - not received within the given time. The value for ``timeout`` may - be anything accepted by ``IOLoop.add_timeout`` (a ``timedelta`` or - an **absolute** time relative to ``IOLoop.time``). - """ - self.__receive_put() + old_hole = put.result() + old_hole.set_result(Node(value, new_hole)) + answer.set_result(None) - answer = Future() - if self._surplus: - answer.set_result(self._surplus.popleft()) - return answer - - # Wait for a value - if timeout is not None: - _add_timeout(timeout, answer) - self._getters.append(answer) + io_loop.add_future(put, _on_put) return answer def get_nowait(self): @@ -139,57 +128,50 @@ def get_nowait(self): Raises ``QueueEmpty`` if no values are available right now. """ - self.__receive_put() + new_get = Future() - if self._surplus: - return self._surplus.popleft() - raise QueueEmpty() + with self._lock: + if not self._get.done(): + raise QueueEmpty + get, self._get = self._get, new_get - def put(self, value, timeout=None): - """Puts an item into the queue. + hole = get.result() + if not hole.done(): + # Restore the unfinished hole. + new_get.set_result(hole) + raise QueueEmpty - Returns a future that resolves to None once the value has been - accepted by the queue. + value, new_hole = hole.result() + new_get.set_result(new_hole) + return value - The value is accepted immediately if there is room in the queue or - maxsize was not specified. + def get(self): + """Gets the next item from the queue. - :param timeout: - If set, the future will resolve to a TimeoutError if a value is - not accepted within the given time. The value for ``timeout`` may - be anything accepted by ``IOLoop.add_timeout`` (a ``timedelta`` or - an **absolute** time relative to ``IOLoop.time``). + Returns a Future that resolves to the next item once it is available. """ + io_loop = IOLoop.current() + new_get = Future() + + with self._lock: + get, self._get = self._get, new_get answer = Future() - # If there's a getter waiting, send it the result. - while self._getters: - future = self._getters.popleft() - if future.running(): - future.set_result(value) - answer.set_result(None) - return answer - - # We have room. Put the value into surplus. - if self.maxsize < 1 or len(self._surplus) < self.maxsize: - self._surplus.append(value) - answer.set_result(None) - return answer + def _on_node(future): + if future.exception(): # pragma: no cover (never happens) + return answer.set_exc_info(future.exc_info()) - # Wait until there is room. - if timeout is not None: - _add_timeout(timeout, answer) - self._putters.append((value, answer)) - return answer + value, new_hole = future.result() + new_get.set_result(new_hole) + answer.set_result(value) + def _on_get(future): + if future.exception(): # pragma: no cover (never happens) + return answer.set_exc_info(future.exc_info()) -def _add_timeout(timeout, future): - io_loop = IOLoop.current() + hole = future.result() + io_loop.add_future(hole, _on_node) - def on_timeout(): - if future.running(): - future.set_exception(TimeoutError("timed out")) - - t = io_loop.add_timeout(timeout, on_timeout) - future.add_done_callback(lambda _: io_loop.remove_timeout(t)) + io_loop.add_future(get, _on_get) + return answer diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 867458b0..1ef9bda3 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -645,7 +645,7 @@ class Reader(object): def __init__(self, io_stream, capacity=None): capacity = capacity or 64 - self.queue = queues.Queue(capacity) + self.queue = queues.Queue() self.filling = False self.io_stream = io_stream @@ -701,7 +701,7 @@ class Writer(object): def __init__(self, io_stream, capacity=None): capacity = capacity or 64 - self.queue = queues.Queue(capacity) + self.queue = queues.Queue() self.draining = False self.io_stream = io_stream # Tracks message IDs for this connection. diff --git a/tests/test_queue.py b/tests/test_queue.py index b81cae90..ed0d156b 100644 --- a/tests/test_queue.py +++ b/tests/test_queue.py @@ -22,18 +22,13 @@ absolute_import, unicode_literals, division, print_function ) -import sys import pytest -from datetime import timedelta -from six.moves import range +import threading +from collections import deque, defaultdict from tornado import gen +from tornado.ioloop import IOLoop -from tchannel._queue import Queue, QueueEmpty, TimeoutError - - -def assert_invariant(q): - assert not (q._getters and (q._surplus or q._putters)) - assert (not q._putters or len(q._surplus) == q.maxsize) +from tchannel._queue import Queue, QueueEmpty @pytest.fixture @@ -42,241 +37,176 @@ def items(): @pytest.mark.gen_test -def test_unbounded_put_then_get(items): +def test_put_then_get(items): queue = Queue() - assert_invariant(queue) for item in items: yield queue.put(item) - assert_invariant(queue) got_futures = [] for i in range(len(items)): got_futures.append(queue.get()) - assert_invariant(queue) got = yield got_futures assert got == items - with pytest.raises(TimeoutError): - yield queue.get(timedelta(seconds=0.1)) - - -@pytest.mark.gen_test -def test_bounded_put_then_get(items): - queue = Queue(5) - assert_invariant(queue) - - for item in items[:5]: - yield queue.put(item) - assert_invariant(queue) - - put_futures = [] - for item in items[5:]: - put_futures.append(queue.put(item)) - - got = [] - for i in range(len(items)): - item = yield queue.get() - assert_invariant(queue) - got.append(item) - - assert got == items - yield put_futures - @pytest.mark.gen_test -def test_bounded_put_timeout_then_get(items): - queue = Queue(1) - assert_invariant(queue) - - yield queue.put(items[0]) - - put_futures = [] - for item in range(len(items)): - put_futures.append(queue.put(item, timedelta(seconds=0.001))) - - yield gen.sleep(0.01) - - get_futures = [] - for item in range(len(items)): - get_futures.append(queue.get()) - - for future in put_futures: - with pytest.raises(TimeoutError): - yield future - - for item in items[1:]: - yield queue.put(item) - - got = yield get_futures - assert got == items - - -@pytest.mark.gen_test -def test_unbounded_put_then_get_nowait(items): +def test_put_then_get_nowait(items): queue = Queue() for item in items: yield queue.put(item) - assert_invariant(queue) got = [] for i in range(len(items)): got.append(queue.get_nowait()) - assert_invariant(queue) assert got == items with pytest.raises(QueueEmpty): queue.get_nowait() + future = queue.get() + yield queue.put(42) + assert 42 == (yield future) + @pytest.mark.gen_test -def test_unbounded_get_then_put(items): +def test_get_then_put(items): queue = Queue() got_futures = [] for i in range(len(items)): got_futures.append(queue.get()) - assert_invariant(queue) for item in items: yield queue.put(item) - assert_invariant(queue) got = yield got_futures assert got == items @pytest.mark.gen_test -def test_unbounded_expired_gets_then_get_first(): - queue = Queue() +@pytest.mark.concurrency_test +def test_concurrent_producers_single_consumer(): + num_threads = 1000 + num_items = 10 + q = Queue() - expired_gets = [] - for i in range(99): - expired_gets.append(queue.get(timedelta(seconds=0.001))) - assert_invariant(queue) + def producer(i): - yield gen.sleep(0.01) # 99 * 0.001 = 0.099 + @gen.coroutine + def run(): + for j in range(num_items): + yield q.put((i, j)) - # get the value before putting it - real_get = queue.get() - yield queue.put(42) - assert 42 == (yield real_get) - assert_invariant(queue) + IOLoop(make_current=False).run_sync(run) - for future in expired_gets: - with pytest.raises(TimeoutError): - yield future + producers = [ + threading.Thread(target=producer, args=(i,)) + for i in range(num_threads) + ] + for p in producers: + p.start() -@pytest.mark.gen_test -def test_unbounded_expired_gets_then_put_first(): - queue = Queue() + items = defaultdict(lambda: []) + for x in range(num_items * num_threads): + i, j = yield q.get() + items[i].append(j) - expired_gets = [] - for i in range(99): - expired_gets.append(queue.get(timedelta(seconds=0.001))) - assert_invariant(queue) + for v in items.values(): + assert v == list(range(num_items)) - yield gen.sleep(0.01) + for p in producers: + p.join() - # put the value first, get afterwards - yield queue.put(42) - assert 42 == (yield queue.get()) - assert_invariant(queue) - for future in expired_gets: - with pytest.raises(TimeoutError): - yield future +@pytest.mark.gen_test +@pytest.mark.concurrency_test +def test_concurrent_consumers_single_producer(): + num_threads = 1000 + num_items = 10 + q = Queue() + items = deque() -@pytest.mark.gen_test -def test_terminate_get_with_exception(): + def consumer(): - class GreatSadness(Exception): - pass + @gen.coroutine + def run(): + for i in range(num_items): + x = yield q.get() + items.append(x) - queue = Queue() - expired_gets = [] - terminated_gets = [] + IOLoop(make_current=False).run_sync(run) - flag = True - for i in range(99): - if flag: - expired_gets.append(queue.get(timedelta(seconds=0.001))) - else: - terminated_gets.append(queue.get(timedelta(seconds=1))) - flag = not flag + consumers = [ + threading.Thread(target=consumer) for i in range(num_threads) + ] - yield gen.sleep(0.01) + for c in consumers: + c.start() - queue.terminate(GreatSadness()) + for x in range(num_items * num_threads): + yield q.put(x) - for future in expired_gets: - with pytest.raises(TimeoutError): - yield future + for c in consumers: + c.join() - for future in terminated_gets: - with pytest.raises(GreatSadness): - yield future + assert len(items) == num_items * num_threads + assert set(items) == set(range(num_items * num_threads)) + # Order for concurrent consumers cannot be guaranteed because we don't + # know in which order consumers will wake up. The only guarantee we have + # is that none of the items were lost. @pytest.mark.gen_test -def test_terminate_put_with_exception(): - - class GreatSadness(Exception): - pass +@pytest.mark.concurrency_test +def test_concurrent_producers_and_consumers(): + num_threads = 1000 + num_items = 10 - queue = Queue(10) - expired_puts = [] - terminated_puts = [] + q = Queue() + items = defaultdict(lambda: []) - for i in range(10): - yield queue.put(i) + def producer(i): - flag = True - for i in range(99): - if flag: - expired_puts.append(queue.put(i, timedelta(seconds=0.001))) - else: - terminated_puts.append(queue.put(i, timedelta(seconds=1))) - flag = not flag + @gen.coroutine + def run(): + for j in range(num_items): + yield q.put((i, j)) - yield gen.sleep(0.01) + IOLoop(make_current=False).run_sync(run) - try: - raise GreatSadness() - except GreatSadness: - queue.terminate(sys.exc_info()) - else: - assert False, "expected failure" + def consumer(): - for future in expired_puts: - with pytest.raises(TimeoutError): - yield future + @gen.coroutine + def run(): + for x in range(num_items): + i, j = yield q.get() + items[i].append(j) - for future in terminated_puts: - with pytest.raises(GreatSadness): - yield future + IOLoop(make_current=False).run_sync(run) + num_producers = int(num_threads / 2) + threads = [ + threading.Thread(target=producer, args=(i,)) + for i in range(num_producers) + ] + [ + threading.Thread(target=consumer) + for i in range(num_threads - num_producers) + ] -@pytest.mark.gen_test -def test_get_timeout_too_late(): - queue = Queue() - future = queue.get(timedelta(seconds=0.01)) - yield queue.put(42) - yield gen.sleep(0.01) - assert 42 == (yield future) + for t in threads: + t.start() + for t in threads: + t.join() -@pytest.mark.gen_test -def test_put_timeout_too_late(): - queue = Queue(1) - yield queue.put(1) - future = queue.put(2, timedelta(seconds=0.01)) - assert (yield queue.get()) == 1 - assert (yield queue.get()) == 2 - yield gen.sleep(0.01) - yield future + for i in range(num_producers): + assert len(items[i]) == num_items + assert set(items[i]) == set(range(num_items)) + # As with the previous concurrent consumers case, we cannot guarantee + # ordering, only that the items are all present. From 3f0ea8a4c68d3feab352a05e867328b9a4bf33e3 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Thu, 17 Mar 2016 15:18:27 -0700 Subject: [PATCH 52/67] Lower number of threads for queue concurrency test This allows it to run in less than a second --- tests/test_queue.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_queue.py b/tests/test_queue.py index ed0d156b..76503ec6 100644 --- a/tests/test_queue.py +++ b/tests/test_queue.py @@ -90,7 +90,7 @@ def test_get_then_put(items): @pytest.mark.gen_test @pytest.mark.concurrency_test def test_concurrent_producers_single_consumer(): - num_threads = 1000 + num_threads = 100 num_items = 10 q = Queue() @@ -126,7 +126,7 @@ def run(): @pytest.mark.gen_test @pytest.mark.concurrency_test def test_concurrent_consumers_single_producer(): - num_threads = 1000 + num_threads = 100 num_items = 10 q = Queue() @@ -165,7 +165,7 @@ def run(): @pytest.mark.gen_test @pytest.mark.concurrency_test def test_concurrent_producers_and_consumers(): - num_threads = 1000 + num_threads = 100 num_items = 10 q = Queue() From 218d167448bd958f8fe138683363bfb3e0608a62 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Fri, 18 Mar 2016 13:01:53 -0700 Subject: [PATCH 53/67] Lower "cannot register on sync TChannel" to info. --- CHANGES.rst | 3 ++- tchannel/sync/client.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index b15c426e..218bc1d3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,8 @@ Changes by Version 0.21.11 (unreleased) -------------------- -- Nothing changed yet. +- Attempting to register endpoints against a synchronous TChannel now logs an + INFO level message. 0.21.10 (2016-03-17) diff --git a/tchannel/sync/client.py b/tchannel/sync/client.py index c65b6ad3..b4117a86 100644 --- a/tchannel/sync/client.py +++ b/tchannel/sync/client.py @@ -141,7 +141,7 @@ def register(self, *args, **kwargs): def _register(*args, **kwargs): - log.warning("Registration not yet supported for sync tchannel.") + log.info("Registration not yet supported for sync tchannel.") def decorator(fn): return fn From baa3dec0272ea17d92203c27e9e52017b8b69a2d Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Wed, 23 Mar 2016 15:15:07 -0700 Subject: [PATCH 54/67] Calls should reraise with the origianl traceback when retries run out --- tchannel/tornado/peer.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py index 71323957..c6a6c1be 100644 --- a/tchannel/tornado/peer.py +++ b/tchannel/tornado/peer.py @@ -22,6 +22,7 @@ absolute_import, division, print_function, unicode_literals ) +import sys import logging import random from collections import deque @@ -431,19 +432,23 @@ def send_with_retry(self, request, peer, retry_limit, connection): try: response = yield self._send(connection, request) raise gen.Return(response) - except TChannelError as error: - blacklist.add(peer.hostport) - (peer, connection) = yield self._prepare_for_retry( - request=request, - connection=connection, - protocol_error=error, - blacklist=blacklist, - num_of_attempt=num_of_attempt, - max_retry_limit=retry_limit, - ) - - if not connection: - raise error + except TChannelError: + (typ, error, tb) = sys.exc_info() + try: + blacklist.add(peer.hostport) + (peer, connection) = yield self._prepare_for_retry( + request=request, + connection=connection, + protocol_error=error, + blacklist=blacklist, + num_of_attempt=num_of_attempt, + max_retry_limit=retry_limit, + ) + + if not connection: + raise typ, error, tb + finally: + del tb # for GC @gen.coroutine def _prepare_for_retry( From c0f62f5aec94f04b64e717a8b2c69b71674e2f3c Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Thu, 24 Mar 2016 08:22:28 -0700 Subject: [PATCH 55/67] improve retry_limit docs --- tchannel/schemes/json.py | 11 +++++++++-- tchannel/schemes/raw.py | 11 +++++++++-- tchannel/schemes/thrift.py | 11 +++++++++-- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/tchannel/schemes/json.py b/tchannel/schemes/json.py index 51d7bad0..f957b64d 100644 --- a/tchannel/schemes/json.py +++ b/tchannel/schemes/json.py @@ -82,8 +82,15 @@ def __call__( :param string retry_on: What events to retry on - valid values can be found in ``tchannel.retry``. - :param string retry_limit: - How many times to retry before + :param int retry_limit: + How many attempts should be made (in addition to the initial + attempt) to re-send this request when retryable error conditions + (specified by ``retry_on``) are encountered. + + Defaults to ``tchannel.retry.DEFAULT_RETRY_LIMIT`` (4). + + Note that the maximum possible time elapsed for a request is thus + ``(retry_limit + 1) * timeout``. :param string hostport: A 'host:port' value to use when making a request directly to a TChannel service, bypassing Hyperbahn. diff --git a/tchannel/schemes/raw.py b/tchannel/schemes/raw.py index ffdb12b0..f49113af 100644 --- a/tchannel/schemes/raw.py +++ b/tchannel/schemes/raw.py @@ -80,8 +80,15 @@ def __call__( :param string retry_on: What events to retry on - valid values can be found in ``tchannel.retry``. - :param string retry_limit: - How many times to retry before + :param int retry_limit: + How many attempts should be made (in addition to the initial + attempt) to re-send this request when retryable error conditions + (specified by ``retry_on``) are encountered. + + Defaults to ``tchannel.retry.DEFAULT_RETRY_LIMIT`` (4). + + Note that the maximum possible time elapsed for a request is thus + ``(retry_limit + 1) * timeout``. :param string hostport: A 'host:port' value to use when making a request directly to a TChannel service, bypassing Hyperbahn. diff --git a/tchannel/schemes/thrift.py b/tchannel/schemes/thrift.py index c1fbdf32..5b11df0b 100644 --- a/tchannel/schemes/thrift.py +++ b/tchannel/schemes/thrift.py @@ -102,8 +102,15 @@ def __call__( :param string retry_on: What events to retry on - valid values can be found in ``tchannel.retry``. - :param string retry_limit: - How many times to retry before + :param int retry_limit: + How many attempts should be made (in addition to the initial + attempt) to re-send this request when retryable error conditions + (specified by ``retry_on``) are encountered. + + Defaults to ``tchannel.retry.DEFAULT_RETRY_LIMIT`` (4). + + Note that the maximum possible time elapsed for a request is thus + ``(retry_limit + 1) * timeout``. :param routing_delegate: Name of a service to which the request router should forward the request instead of the service specified in the call req. From 00f4a0692cba04096bce9e659a9fb663d9e4bd90 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Wed, 23 Mar 2016 15:15:07 -0700 Subject: [PATCH 56/67] Calls should reraise with the origianl traceback when retries run out --- tchannel/tornado/peer.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py index 71323957..c6a6c1be 100644 --- a/tchannel/tornado/peer.py +++ b/tchannel/tornado/peer.py @@ -22,6 +22,7 @@ absolute_import, division, print_function, unicode_literals ) +import sys import logging import random from collections import deque @@ -431,19 +432,23 @@ def send_with_retry(self, request, peer, retry_limit, connection): try: response = yield self._send(connection, request) raise gen.Return(response) - except TChannelError as error: - blacklist.add(peer.hostport) - (peer, connection) = yield self._prepare_for_retry( - request=request, - connection=connection, - protocol_error=error, - blacklist=blacklist, - num_of_attempt=num_of_attempt, - max_retry_limit=retry_limit, - ) - - if not connection: - raise error + except TChannelError: + (typ, error, tb) = sys.exc_info() + try: + blacklist.add(peer.hostport) + (peer, connection) = yield self._prepare_for_retry( + request=request, + connection=connection, + protocol_error=error, + blacklist=blacklist, + num_of_attempt=num_of_attempt, + max_retry_limit=retry_limit, + ) + + if not connection: + raise typ, error, tb + finally: + del tb # for GC @gen.coroutine def _prepare_for_retry( From dadf4fb33bf38c3152fd654208689ce8fedf4b64 Mon Sep 17 00:00:00 2001 From: junchaowu Date: Thu, 3 Dec 2015 15:56:37 -0800 Subject: [PATCH 57/67] Cherry-pick #320 Original message: Merge pull request #320 from uber/pending Add list to track outbound pending req/res --- tchannel/tornado/connection.py | 44 +++++++++++++++++++++--------- tchannel/tornado/peer.py | 9 ++++++ tests/tornado/test_connection.py | 47 ++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 13 deletions(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 1ef9bda3..7854e281 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -122,7 +122,13 @@ def __init__(self, connection, tchannel=None, direction=None): self._messages = queues.Queue() # Map from message ID to futures for responses of outgoing calls. - self._outstanding = {} + self._outbound_pending_call = {} + + # Map from message ID to outgoing request that is being sent. + self._outbound_pending_req = {} + + # Map from message ID to outgoing response that is being sent. + self._outbound_pending_res = {} # Collection of request IDs known to have timed out. self._request_tombstones = Cemetery() @@ -139,6 +145,13 @@ def __init__(self, connection, tchannel=None, direction=None): connection.set_close_callback(self._on_close) + @property + def total_outbound_pendings(self): + """Returns the total number of pending outbound requests and + responses.""" + return (len(self._outbound_pending_res) + + len(self._outbound_pending_req)) + def set_close_callback(self, cb): """Specify a function to be called when this connection is closed. @@ -155,13 +168,13 @@ def _on_close(self): self.closed = True self._request_tombstones.clear() - for message_id, future in self._outstanding.iteritems(): + for message_id, future in self._outbound_pending_call.iteritems(): future.set_exception( NetworkError( "canceling outstanding request %d" % message_id ) ) - self._outstanding = {} + self._outbound_pending_call = {} try: while True: @@ -202,10 +215,10 @@ def _loop(self): self._messages.put(message) continue - elif message.id in self._outstanding: + elif message.id in self._outbound_pending_call: # set exception if receive error message if message.message_type == Types.ERROR: - future = self._outstanding.pop(message.id) + future = self._outbound_pending_call.pop(message.id) if future.running(): error = TChannelError.from_code( message.code, @@ -232,9 +245,9 @@ def _loop(self): if (message.message_type in self.CALL_RES_TYPES and message.flags == FlagsType.fragment): # still streaming, keep it for record - future = self._outstanding.get(message.id) + future = self._outbound_pending_call.get(message.id) else: - future = self._outstanding.pop(message.id) + future = self._outbound_pending_call.pop(message.id) if response and future.running(): future.set_result(response) @@ -264,12 +277,12 @@ def send(self, message): ) message.id = message.id or self.writer.next_message_id() - assert message.id not in self._outstanding, ( + assert message.id not in self._outbound_pending_call, ( "Message ID '%d' already being used" % message.id ) future = tornado.gen.Future() - self._outstanding[message.id] = future + self._outbound_pending_call[message.id] = future self.write(message) return future @@ -544,6 +557,7 @@ def _stream(self, context, message_factory): @tornado.gen.coroutine def post_response(self, response): try: + self._outbound_pending_res[response.id] = response # TODO: before_send_response yield self._stream(response, self.response_message_factory) @@ -553,6 +567,7 @@ def post_response(self, response): response, ) finally: + self._outbound_pending_res.pop(response.id, None) response.close_argstreams(force=True) def stream_request(self, request): @@ -579,13 +594,16 @@ def send_request(self, request): """ assert self._loop_running, "Perform a handshake first." - assert request.id not in self._outstanding, ( + assert request.id not in self._outbound_pending_call, ( "Message ID '%d' already being used" % request.id ) future = tornado.gen.Future() - self._outstanding[request.id] = future - self.stream_request(request) + self._outbound_pending_call[request.id] = future + self._outbound_pending_req[request.id] = request + self.stream_request(request).add_done_callback( + lambda f: self._outbound_pending_req.pop(request.id, None) + ) if request.ttl: self._add_timeout(request, future) @@ -616,7 +634,7 @@ def adapt_result(self, f, request, response_future): def remove_outstanding_request(self, request): """Remove request from pending request list""" - self._outstanding.pop(request.id, None) + self._outbound_pending_call.pop(request.id, None) def _add_timeout(self, request, future): """Adds a timeout for the given request to the given future.""" diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py index c6a6c1be..9dc70ac8 100644 --- a/tchannel/tornado/peer.py +++ b/tchannel/tornado/peer.py @@ -177,6 +177,15 @@ def incoming_connections(self): takewhile(lambda c: c.direction == INCOMING, self._connections) ) + @property + def total_outbound_pendings(self): + """Return the total number of out pending req/res among connections""" + num = 0 + for con in self.connections: + num += con.total_outbound_pendings + + return num + @property def is_ephemeral(self): """Whether this Peer is ephemeral.""" diff --git a/tests/tornado/test_connection.py b/tests/tornado/test_connection.py index c1cc69c8..f53aa533 100644 --- a/tests/tornado/test_connection.py +++ b/tests/tornado/test_connection.py @@ -31,6 +31,7 @@ from tchannel import messages from tchannel.errors import TimeoutError, ReadError from tchannel.tornado import connection +from tchannel.tornado.message_factory import MessageFactory def dummy_headers(): @@ -149,6 +150,52 @@ def test_other_error_on_read(tornado_pair): assert mock_log.info.call_count == 0 +@pytest.mark.gen_test +def test_pending_outgoing(): + server = TChannel('server') + server.listen() + + @server.raw.register + def hello(request): + assert server._dep_tchannel.peers.peers[0].total_outbound_pendings == 1 + return 'hi' + + client = TChannel('client') + yield client.raw( + hostport=server.hostport, + body='work', + endpoint='hello', + service='server' + ) + + client_peer = client._dep_tchannel.peers.peers[0] + server_peer = server._dep_tchannel.peers.peers[0] + assert client_peer.total_outbound_pendings == 0 + assert server_peer.total_outbound_pendings == 0 + + class FakeMessageFactory(MessageFactory): + def build_raw_message(self, context, args, is_completed=True): + assert client_peer.total_outbound_pendings == 1 + return super(FakeMessageFactory, self).build_raw_message( + context, args, is_completed, + ) + + client_conn = client_peer.connections[0] + client_conn.request_message_factory = FakeMessageFactory( + client_conn.remote_host, + client_conn.remote_host_port, + ) + yield client.raw( + hostport=server.hostport, + body='work', + endpoint='hello', + service='server' + ) + + assert client_peer.total_outbound_pendings == 0 + assert server_peer.total_outbound_pendings == 0 + + @pytest.mark.gen_test def test_writer_write_error(): server, client = socket.socketpair() From ad1a60acc50fd953aa74504363a8c9f45e6954cb Mon Sep 17 00:00:00 2001 From: junchaowu Date: Thu, 21 Jan 2016 15:15:25 -0800 Subject: [PATCH 58/67] Cherry-pick heap-based peer selection (#355). Original message: Merge pull request #355 from uber/heap Heap selection branch merged into mash develop branch. --- benchmarks/test_concurrent_requests.py | 99 +++++++++++++ tchannel/container/__init__.py | 0 tchannel/container/heap.py | 179 +++++++++++++++++++++++ tchannel/peer_heap.py | 157 ++++++++++++++++++++ tchannel/peer_strategy.py | 60 ++++++++ tchannel/tornado/connection.py | 38 ++++- tchannel/tornado/dispatch.py | 13 +- tchannel/tornado/peer.py | 110 ++++++++++---- tchannel/tornado/tchannel.py | 4 +- tests/conftest.py | 6 + tests/container/test_heap.py | 187 ++++++++++++++++++++++++ tests/test_event.py | 2 +- tests/test_peer_heap.py | 192 +++++++++++++++++++++++++ tests/test_peer_strategy.py | 78 ++++++++++ tests/tornado/test_connection.py | 50 +++++++ tests/tornado/test_peer.py | 124 +++++++++++++++- 16 files changed, 1262 insertions(+), 37 deletions(-) create mode 100644 benchmarks/test_concurrent_requests.py create mode 100644 tchannel/container/__init__.py create mode 100644 tchannel/container/heap.py create mode 100644 tchannel/peer_heap.py create mode 100644 tchannel/peer_strategy.py create mode 100644 tests/container/test_heap.py create mode 100644 tests/test_peer_heap.py create mode 100644 tests/test_peer_strategy.py diff --git a/benchmarks/test_concurrent_requests.py b/benchmarks/test_concurrent_requests.py new file mode 100644 index 00000000..09898702 --- /dev/null +++ b/benchmarks/test_concurrent_requests.py @@ -0,0 +1,99 @@ +# Copyright (c) 2015 Uber Technologies, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from __future__ import ( + absolute_import, unicode_literals, print_function, division +) + +from tornado import gen, ioloop +from tchannel import TChannel + + +servers = None +client = None + + +def setUpServer(num): + servers = [] + for i in xrange(num): + server = TChannel('server' + str(i)) + + @server.raw.register + def hello(request): + return 'hello' + server.listen() + servers.append(server) + + return servers + + +@gen.coroutine +def setUpClient(servers): + router = [] + for i in xrange(1000): + router.append('1.1.1.1:'+str(i)) + client = TChannel('client', known_peers=router) + + @client.raw.register + def hello(request): + return 'hello' + + client.listen() + + for i in xrange(100): + server = servers[i] + yield server.raw( + service='server', + endpoint='hello', + body='hi', + hostport=client.hostport, + ) + + raise gen.Return(client) + + +@gen.coroutine +def peer_test(): + global servers, client + fs = [] + for _ in xrange(100): + fs.append(client.raw( + service='server', + endpoint='hello', + body='hi', + timeout=1000 + )) + yield fs + + +def stress_test(): + ioloop.IOLoop.current().run_sync(peer_test) + + +@gen.coroutine +def setup(): + global servers, client + servers = setUpServer(100) + client = yield setUpClient(servers) + + +def test_peer_heap(benchmark): + ioloop.IOLoop.current().run_sync(setup) + benchmark(stress_test) diff --git a/tchannel/container/__init__.py b/tchannel/container/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tchannel/container/heap.py b/tchannel/container/heap.py new file mode 100644 index 00000000..5eeb96ac --- /dev/null +++ b/tchannel/container/heap.py @@ -0,0 +1,179 @@ +# Copyright (c) 2015 Uber Technologies, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from __future__ import absolute_import +from __future__ import division + +import math +import six +from collections import deque + + +class NoMatchError(Exception): + pass + + +class HeapOperation(object): + """HeapOperation defines the interface on how to manipulate the heap. + Any extension of heap class should implement all the heap operations in + order to have complete heap functions. + """ + + def lt(self, i, j): + """Compare the items in position i and j of the heap. + + :param i: the first item's position of the heap list + :param j: the second item's position of the heap list + :return true if items[i] < items[j], otherwise false. + """ + raise NotImplementedError() + + def push(self, x): + """Push a new item into heap""" + raise NotImplementedError() + + def pop(self): + """Pop an item from the heap""" + raise NotImplementedError() + + def peek(self, i): + """Peek at the item at the given position without removing it from the + heap. + + :param i: + 0-indexed position of the iteam in the heap + """ + raise NotImplementedError() + + def swap(self, i, j): + """swap items between position i and j of the heap""" + raise NotImplementedError() + + def size(self): + """Return length of the heap.""" + raise NotImplementedError() + + +def init(h): + """Initialize existing object into the heap.""" + # heapify + n = h.size() + for i in six.moves.range(int(math.floor(n/2)) - 1, -1, -1): + down(h, i, n) + + +def push(h, x): + """Push a new value into heap.""" + h.push(x) + up(h, h.size()-1) + + +def pop(h): + """Pop the heap value from the heap.""" + n = h.size() - 1 + h.swap(0, n) + down(h, 0, n) + return h.pop() + + +def remove(h, i): + """Remove the item at position i of the heap.""" + n = h.size() - 1 + if n != i: + h.swap(i, n) + down(h, i, n) + up(h, i) + + return h.pop() + + +def fix(h, i): + """Rearrange the heap after the item at position i got updated.""" + down(h, i, h.size()) + up(h, i) + + +def up(h, child): + while child > 0: + parent = int(math.floor((child - 1) / 2)) + if not h.lt(child, parent): + break + + h.swap(parent, child) + child = parent + + +def down(h, parent, n): + while True: + child1 = 2 * parent + 1 + if child1 >= n or child1 < 0: + break + + min_child = child1 + child2 = child1 + 1 + if child2 < n and not h.lt(child1, child2): + min_child = child2 + + if not h.lt(min_child, parent): + break + + h.swap(parent, min_child) + parent = min_child + + +def smallest(heap, predicate): + """Finds the index of the smallest item in the heap that matches the given + predicate. + + :param heap: + Heap on which this search is being performed. + :param predicate: + Function that accepts an item from the heap and returns true or false. + :returns: + Index of the first item for which ``predicate`` returned true. + :raises NoMatchError: + If no matching items were found. + """ + n = heap.size() + + # items contains indexes of items yet to be checked. + items = deque([0]) + while items: + current = items.popleft() + if current >= n: + continue + + if predicate(heap.peek(current)): + return current + + child1 = 2 * current + 1 + child2 = child1 + 1 + + if child1 < n and child2 < n and heap.lt(child2, child1): + # make sure we check the smaller child first. + child1, child2 = child2, child1 + + if child1 < n: + items.append(child1) + + if child2 < n: + items.append(child2) + + raise NoMatchError() diff --git a/tchannel/peer_heap.py b/tchannel/peer_heap.py new file mode 100644 index 00000000..34152c7d --- /dev/null +++ b/tchannel/peer_heap.py @@ -0,0 +1,157 @@ +# Copyright (c) 2015 Uber Technologies, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from __future__ import absolute_import + +import random + +from .container import heap +from .container.heap import HeapOperation +from .container.heap import NoMatchError +from .container.heap import smallest + + +class PeerHeap(HeapOperation): + """PeerHeap maintains a min-heap of peers based on their ranks. + + Peer in the heap will be arranged based on the peer's rank and peer's + order. Order is equal to peer heap's current order number plus random value + within the current peer size. It solves two problems when peers are in + same rank: + + Dead peers: If the order is completely random, then an unlucky peer + with a very bad assigned order may never get selected. + + Determinism: If the insertion order is used as-is, then all TChannel + instances would follow the same selection pattern, causing load + imbalance. For example if they get the same static list of peers, they + will all pick the first one, then the second one, and so on, cycling + between which host gets overloaded with requests. + + All in all, it will keep certain level randomization but at the same + time make the peer rank not deterministic among different tchannel + instances. + + """ + + __slots__ = ('peers',) + + def __init__(self): + self.peers = [] + self.order = 0 + + def size(self): + return len(self.peers) + + def lt(self, i, j): + """Compare the priority of two peers. + + Primary comparator will be the rank of each peer. If the ``rank`` is + same then compare the ``order``. The ``order`` attribute of the peer + tracks the heap push order of the peer. This help solve the imbalance + problem caused by randomization when deal with same rank situation. + + :param i: ith peer + :param j: jth peer + :return: True or False + """ + if self.peers[i].rank == self.peers[j].rank: + return self.peers[i].order < self.peers[j].order + + return self.peers[i].rank < self.peers[j].rank + + def peek(self, i): + return self.peers[i] + + def push(self, x): + x.index = len(self.peers) + self.peers.append(x) + + def pop(self): + item = self.peers.pop() + return item + + def swap(self, i, j): + self.peers[i], self.peers[j] = self.peers[j], self.peers[i] + self.peers[i].index = i + self.peers[j].index = j + + def update_peer(self, peer): + """Update the peer's position in the heap after peer's rank changed""" + heap.fix(self, peer.index) + + def pop_peer(self): + """Pop the top peer from the heap + + :return + return the top peer and remove it from the heap if heap is not + empty.Otherwise return None. + """ + if not self.peers: + return None + return heap.pop(self) + + def push_peer(self, peer): + """Push a new peer into the heap""" + + self.order += 1 + peer.order = self.order + random.randint(0, self.size()) + heap.push(self, peer) + + def peek_peer(self): + """Return the top peer of the heap + :return + return the top peer if heap is not empty. Otherwise return None. + """ + + if not self.peers: + return None + + return self.peers[0] + + def remove_peer(self, peer): + """Remove the peer from the heap. + + Return: removed peer if peer exists. If peer's index is out of range, + raise IndexError. + """ + if peer.index < 0 or peer.index >= self.size(): + raise IndexError('Peer index is out of range') + + assert peer is self.peers[peer.index], "peer is not in the heap" + + return heap.remove(self, peer.index) + + def smallest_peer(self, predicate): + """Return the smallest peer in the heap that matches the given + predicate. + + :param predicate: + Function that accepts an item from the heap and returns + true or false. + :returns: + The first peer for which ``predicate`` returned true. + """ + try: + return self.peek( + smallest(self, predicate), + ) + except NoMatchError: + return None diff --git a/tchannel/peer_strategy.py b/tchannel/peer_strategy.py new file mode 100644 index 00000000..5e6042e7 --- /dev/null +++ b/tchannel/peer_strategy.py @@ -0,0 +1,60 @@ +# Copyright (c) 2015 Uber Technologies, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from __future__ import absolute_import + +import sys + + +class RankCalculator(object): + """RankCalculator calculates the rank of a peer.""" + + def get_rank(self, peer): + raise NotImplementedError() + + +class PreferIncomingCalculator(RankCalculator): + + # TIERS lists three ranges for three different kinds of peers. + # 0: ephemeral peers or unconnected peers + # 1: peers with only outgoing connections + # 2: peers with incoming connections + TIERS = [sys.maxint, sys.maxint / 2, 0] + + def get_rank(self, peer): + """Calculate the peer rank based on connections. + + If the peer has no incoming connections, it will have largest rank. + In our peer selection strategy, the largest number has least priority + in the heap. + + If the peer has incoming connections, we will return number of outbound + pending requests and responses. + + :param peer: instance of `tchannel.tornado.peer.Peer` + :return: rank of the peer + """ + if peer.is_ephemeral or not peer.connections: + return self.TIERS[0] + + if not peer.incoming_connections: + return self.TIERS[1] + peer.total_outbound_pendings + + return self.TIERS[2] + peer.total_outbound_pendings diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 7854e281..9383d13a 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -139,6 +139,9 @@ def __init__(self, connection, tchannel=None, direction=None): self.tchannel = tchannel self._close_cb = None + # callback that will be called when there is a change in the outbound + # pending request/response lists. + self._outbound_pending_change_cb = None self.reader = Reader(self.connection) self.writer = Writer(self.connection) @@ -152,6 +155,12 @@ def total_outbound_pendings(self): return (len(self._outbound_pending_res) + len(self._outbound_pending_req)) + def set_outbound_pending_change_callback(self, cb): + """Specify a function to be called when outbound pending request or + response list changed. + """ + self._outbound_pending_change_cb = cb + def set_close_callback(self, cb): """Specify a function to be called when this connection is closed. @@ -485,6 +494,27 @@ def ping(self): def pong(self): return self.writer.put(messages.PingResponseMessage()) + def _outbound_pending_change(self): + """Called when there is a change in the outbound pending list.""" + if self._outbound_pending_change_cb: + self._outbound_pending_change_cb() + + def add_outbound_pending_res(self, res): + self._outbound_pending_res[res.id] = res + self._outbound_pending_change() + + def add_outbound_pending_req(self, req): + self._outbound_pending_req[req.id] = req + self._outbound_pending_change() + + def remove_outbound_pending_res(self, res): + self._outbound_pending_res.pop(res.id, None) + self._outbound_pending_change() + + def remove_outbound_pending_req(self, req): + self._outbound_pending_req.pop(req.id, None) + self._outbound_pending_change() + class StreamConnection(TornadoConnection): """Streaming request/response into protocol messages and sent by tornado @@ -557,7 +587,7 @@ def _stream(self, context, message_factory): @tornado.gen.coroutine def post_response(self, response): try: - self._outbound_pending_res[response.id] = response + self.add_outbound_pending_res(response) # TODO: before_send_response yield self._stream(response, self.response_message_factory) @@ -567,7 +597,7 @@ def post_response(self, response): response, ) finally: - self._outbound_pending_res.pop(response.id, None) + self.remove_outbound_pending_res(response) response.close_argstreams(force=True) def stream_request(self, request): @@ -600,9 +630,9 @@ def send_request(self, request): future = tornado.gen.Future() self._outbound_pending_call[request.id] = future - self._outbound_pending_req[request.id] = request + self.add_outbound_pending_req(request) self.stream_request(request).add_done_callback( - lambda f: self._outbound_pending_req.pop(request.id, None) + lambda f: self.remove_outbound_pending_req(request) ) if request.ttl: diff --git a/tchannel/tornado/dispatch.py b/tchannel/tornado/dispatch.py index b578f48f..dc0fb3a3 100644 --- a/tchannel/tornado/dispatch.py +++ b/tchannel/tornado/dispatch.py @@ -138,10 +138,7 @@ def handle_call(self, request, connection): request.tracing.name = request.endpoint tchannel.event_emitter.fire(EventType.before_receive_request, request) - handler = self.handlers.get(request.endpoint) - - if handler is None: - handler = self.handlers[self.FALLBACK] + handler = self.get_endpoint(request.endpoint) requested_as = request.headers.get('as', None) expected_as = handler.req_serializer.name @@ -250,6 +247,14 @@ def _on_post_response(future): raise gen.Return(response) + def get_endpoint(self, name): + handler = self.handlers.get(name) + + if handler is None: + handler = self.handlers[self.FALLBACK] + + return handler + def register( self, rule, diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py index 9dc70ac8..5d87e96b 100644 --- a/tchannel/tornado/peer.py +++ b/tchannel/tornado/peer.py @@ -24,9 +24,11 @@ import sys import logging -import random +import sys + from collections import deque from itertools import takewhile, dropwhile + from tornado import gen from tornado.iostream import StreamClosedError @@ -40,6 +42,8 @@ from ..errors import NetworkError from ..event import EventType from ..glossary import DEFAULT_TIMEOUT +from ..peer_heap import PeerHeap +from ..peer_strategy import PreferIncomingCalculator from ..zipkin.annotation import Endpoint from ..zipkin.trace import Trace from .connection import StreamConnection @@ -60,8 +64,15 @@ class Peer(object): 'host', 'port', + 'rank', + 'index', + 'order', + 'chosen_count', + 'on_conn_change', + '_connections', '_connecting', + '_on_conn_change_cb', ) # Class used to create new outgoing connections. @@ -69,13 +80,19 @@ class Peer(object): # It must support a .outgoing method. connection_class = StreamConnection - def __init__(self, tchannel, hostport): + def __init__(self, tchannel, hostport, rank=None, on_conn_change=None): """Initialize a Peer :param tchannel: TChannel through which requests will be made. :param hostport: Host-port this Peer is for. + :param rank: + The rank of a peer will affect the chance that the peer gets + selected when the client sends outbound requests. + :param on_conn_change: + A callback method takes Peer object as input and is called whenever + there are connection changes in the peer. """ assert hostport, "hostport is required" @@ -93,6 +110,25 @@ def __init__(self, tchannel, hostport): # helps avoid making multiple outgoing connections. self._connecting = None + # rank is used to measure the performance of the peer. + # It will be used in the peer heap. + if rank is not None: + self.rank = rank + else: + self.rank = sys.maxint + # index records the position of the peer in the peer heap + self.index = -1 + # order maintains the push order of the peer in the heap. + self.order = 0 + # for debug purpose, count the number of times the peer gets selected. + self.chosen_count = 0 + + # callback is called when there is a change in connections. + self._on_conn_change_cb = on_conn_change + + def set_on_conn_change_callback(self, cb): + self._on_conn_change_cb = cb + def connect(self): """Get a connection to this peer. @@ -128,8 +164,7 @@ def on_connect(_): # We don't actually need to handle the exception. That's on # the caller. connection = conn_future.result() - self._connections.append(connection) - self._set_on_close_cb(connection) + self.register_outgoing_conn(connection) self._connecting = None conn_future.add_done_callback(on_connect) @@ -139,13 +174,30 @@ def _set_on_close_cb(self, conn): def on_close(): self._connections.remove(conn) + self._on_conn_change() conn.set_close_callback(on_close) - def register_incoming(self, conn): + def register_outgoing_conn(self, conn): + """Add outgoing connection into the heap.""" + assert conn, "conn is required" + conn.set_outbound_pending_change_callback(self._on_conn_change) + self._connections.append(conn) + self._set_on_close_cb(conn) + self._on_conn_change() + + def register_incoming_conn(self, conn): + """Add incoming connection into the heap.""" assert conn, "conn is required" + conn.set_outbound_pending_change_callback(self._on_conn_change) self._connections.appendleft(conn) self._set_on_close_cb(conn) + self._on_conn_change() + + def _on_conn_change(self): + """Function will be called any time there is connection changes.""" + if self._on_conn_change_cb: + self._on_conn_change_cb(self) @property def hostport(self): @@ -519,6 +571,8 @@ class PeerGroup(object): __slots__ = ( 'tchannel', + 'peer_heap', + 'rank_calculator', '_peers', '_resetting', '_reset_condition', @@ -539,10 +593,8 @@ def __init__(self, tchannel): # to block on the same reset. self._resetting = False - # We'll create a Condition here later. We want to avoid it right now - # because it has a side-effect of scheduling some dummy work on the - # ioloop, which prevents us from forking (if you're into that). - self._reset_condition = None + self.peer_heap = PeerHeap() + self.rank_calculator = PreferIncomingCalculator() def __str__(self): return "" % str(self._peers) @@ -572,7 +624,8 @@ def get(self, hostport): """ assert hostport, "hostport is required" if hostport not in self._peers: - self._peers[hostport] = self.peer_class(self.tchannel, hostport) + self.add(hostport) + return self._peers[hostport] def lookup(self, hostport): @@ -591,7 +644,10 @@ def remove(self, hostport): :returns: The removed Peer """ assert hostport, "hostport is required" - return self._peers.pop(hostport, None) + peer = self._peers.pop(hostport, None) + if peer: + self.peer_heap.remove_peer(peer) + return peer def add(self, peer): """Add an existing Peer to this group. @@ -602,13 +658,27 @@ def add(self, peer): if isinstance(peer, basestring): # Assume strings are host-ports - peer = self.peer_class(self.tchannel, peer) + peer = self.peer_class( + tchannel=self.tchannel, + hostport=peer, + on_conn_change=self._update_heap, + ) assert peer.hostport not in self._peers, ( "%s already has a peer" % peer.hostport ) - + peer.rank = self.rank_calculator.get_rank(peer) self._peers[peer.hostport] = peer + self.peer_heap.push_peer(peer) + + def _update_heap(self, peer): + """Recalculate the peer's rank and update itself in the peer heap.""" + rank = self.rank_calculator.get_rank(peer) + if rank == peer.rank: + return + + peer.rank = rank + self.peer_heap.update_peer(peer) @property def hosts(self): @@ -659,14 +729,6 @@ def choose(self, hostport=None, blacklist=None): if hostport: return self.get(hostport) - hosts = self._peers.viewkeys() - blacklist - if not hosts: - return None - - peers = list(self._connected_peers(hosts)) - if peers: - return peers[random.randint(0, len(peers)-1)] - else: - hosts = list(hosts) - host = hosts[random.randint(0, len(hosts)-1)] - return self.get(host) + return self.peer_heap.smallest_peer( + (lambda p: p.hostport not in blacklist), + ) diff --git a/tchannel/tornado/tchannel.py b/tchannel/tornado/tchannel.py index ddbf57f2..64e4b790 100644 --- a/tchannel/tornado/tchannel.py +++ b/tchannel/tornado/tchannel.py @@ -238,7 +238,7 @@ def request(self, Defaults to 'c'. """ # TODO disallow certain parameters or don't propagate them backwards. - # For example, blacklist and score threshold aren't really + # For example, blacklist and rank threshold aren't really # user-configurable right now. return self.peers.request(hostport=hostport, service=service, @@ -454,7 +454,7 @@ def handle_stream(self, stream, address): self.tchannel.peers.get( "%s:%s" % (conn.remote_host, conn.remote_host_port) - ).register_incoming(conn) + ).register_incoming_conn(conn) yield conn.serve(handler=self._handle) diff --git a/tests/conftest.py b/tests/conftest.py index 193dcf64..1bc985d4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -39,6 +39,12 @@ def write(self, payload, callback=None): def getvalue(self): return self.buff + def set_outbound_pending_change_callback(self, cb): + pass + + def set_close_callback(self, cb): + pass + @pytest.fixture def connection(): diff --git a/tests/container/test_heap.py b/tests/container/test_heap.py new file mode 100644 index 00000000..e56cb3ae --- /dev/null +++ b/tests/container/test_heap.py @@ -0,0 +1,187 @@ +# Copyright (c) 2015 Uber Technologies, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from __future__ import absolute_import +import random +import sys +import math +import pytest +import six +from hypothesis import given +from hypothesis import strategies as st + +from tchannel.container import heap +from tchannel.container.heap import HeapOperation, NoMatchError + + +class IntHeap(HeapOperation): + def __init__(self): + self.values = [] + + def size(self): + return len(self.values) + + def peek(self, i): + return self.values[i] + + def lt(self, i, j): + return self.values[i] < self.values[j] + + def push(self, x): + self.values.append(x) + + def pop(self): + return self.values.pop() + + def swap(self, i, j): + self.values[i], self.values[j] = self.values[j], self.values[i] + + +@pytest.fixture +def int_heap(): + return IntHeap() + + +@pytest.fixture(params=['unique', 'duplicate', 'reverse']) +def values(request): + n = 1000 + v = [] + + if request.param == "unique": + for i in six.moves.range(n): + v.append(i) + random.shuffle(v) + elif request.param == "duplicate": + for _ in six.moves.range(n): + v.append(random.randint(0, math.floor(n/20))) + elif request.param == "reverse": + for i in six.moves.range(n, -1, -1): + v.append(i) + + return v + + +def verify(ph, parent): + child1 = 2 * parent + 1 + child2 = 2 * parent + 2 + if child2 < ph.size(): + assert not ph.lt(child1, parent) + verify(ph, child1) + + if child2 < ph.size(): + assert not ph.lt(child2, parent) + verify(ph, child2) + + +def test_init(int_heap, values): + int_heap.values = values + heap.init(int_heap) + verify(int_heap, 0) + + +def test_push(int_heap, values): + for value in values: + heap.push(int_heap, value) + verify(int_heap, 0) + + assert int_heap.size() == len(values) + + +def test_pop(int_heap, values): + for value in values: + heap.push(int_heap, value) + verify(int_heap, 0) + + for i in sorted(values): + assert i == heap.pop(int_heap) + verify(int_heap, 0) + + assert int_heap.size() == 0 + + +def test_remove(int_heap, values): + for value in values: + heap.push(int_heap, value) + verify(int_heap, 0) + + # random remove item from the heap + n = len(values) + for i in six.moves.range(n - 1, -1, -1): + heap.remove(int_heap, random.randint(0, i)) + verify(int_heap, 0) + + +def test_smallest_basic(int_heap, values): + for value in values: + heap.push(int_heap, value) + verify(int_heap, 0) + + assert heap.smallest(int_heap, (lambda _: True)) == 0 + + with pytest.raises(NoMatchError): + heap.smallest(int_heap, (lambda _: False)) + + +def test_smallest_empty(int_heap): + with pytest.raises(NoMatchError): + heap.smallest(int_heap, (lambda _: True)) + + +def test_smallest_unordered_children(int_heap): + int_heap.values = [1, 4, 2] + verify(int_heap, 0) + + assert heap.smallest(int_heap, (lambda x: x % 2 == 0)) == 2 + + +@given(st.lists(st.integers(), min_size=1)) +def test_smallest_random(values): + int_heap = IntHeap() + for v in values: + heap.push(int_heap, v) + + target = random.choice(int_heap.values) + valid = [i for (i, v) in enumerate(int_heap.values) if v == target] + assert heap.smallest(int_heap, (lambda x: x == target)) in valid + + +@pytest.mark.heapfuzz +@pytest.mark.skipif(True, reason='stress test for the value heap operations') +def test_heap_fuzz(int_heap): + for _ in six.moves.range(random.randint(1, 100000)): + ops = random.randint(0, 1) + if ops == 0: # push + heap.push(int_heap, random.randint(0, sys.maxint)) + elif ops == 1: # pop + if int_heap.size(): + heap.pop(int_heap) + + if int_heap.size(): + assert smallest(int_heap.values) == int_heap.values[0] + + verify(int_heap, 0) + + +def smallest(vs): + m = sys.maxint + for value in vs: + if value < m: + m = value + return m diff --git a/tests/test_event.py b/tests/test_event.py index 6d84ec1f..e68d27e2 100644 --- a/tests/test_event.py +++ b/tests/test_event.py @@ -80,7 +80,7 @@ def test_after_send_error_event_called(): service='test', arg1='endpoint', hostport=tchannel.hostport, - timeout=0.02, + timeout=0.3, ) mock_fire.assert_any_call( mock.ANY, EventType.after_send_error, mock.ANY, diff --git a/tests/test_peer_heap.py b/tests/test_peer_heap.py new file mode 100644 index 00000000..9d337850 --- /dev/null +++ b/tests/test_peer_heap.py @@ -0,0 +1,192 @@ +# Copyright (c) 2015 Uber Technologies, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from __future__ import absolute_import +import sys +import math + +import mock +import random +import pytest +import six +from tchannel.peer_heap import PeerHeap + + +@pytest.fixture +def peer_heap(): + return PeerHeap() + + +@pytest.fixture(params=['unique', 'duplicate', 'reverse']) +def peers(request): + n = 100 + v = [] + + if request.param == "unique": + for i in six.moves.range(n): + v.append(mock_peer(i)) + random.shuffle(v) + elif request.param == "duplicate": + for _ in six.moves.range(n): + v.append(mock_peer(random.randint(0, math.floor(n/20)))) + elif request.param == "reverse": + for i in six.moves.range(n, -1, -1): + v.append(mock_peer(i)) + + return v + + +def mock_peer(rank=None): + peer = mock.MagicMock() + peer.index = -1 + peer.rank = rank if rank is not None else random.randint(0, sys.maxint) + return peer + + +def verify(ph, parent): + """Assert the peer is still well-maintained.""" + child1 = 2*parent + 1 + child2 = 2*parent + 2 + if child2 < ph.size(): + assert not ph.lt(child1, parent) + verify(ph, child1) + + if child2 < ph.size(): + assert not ph.lt(child2, parent) + verify(ph, child2) + + +def test_push(peer_heap, peers): + for peer in peers: + peer_heap.push_peer(peer) + verify(peer_heap, 0) + + assert peer_heap.size() == len(peers) + + +def test_pop(peer_heap, peers): + for peer in peers: + peer_heap.push_peer(peer) + verify(peer_heap, 0) + + n = len(peers) + for _ in six.moves.range(n): + peer_heap.pop_peer() + verify(peer_heap, 0) + + assert peer_heap.size() == 0 + + +def test_update(peer_heap, peers): + for peer in peers: + peer_heap.push_peer(peer) + verify(peer_heap, 0) + + p = peer_heap.peers[len(peers) - 1] + p.rank = -1 + peer_heap.update_peer(p) + verify(peer_heap, 0) + assert peer_heap.peek_peer().rank == -1 + assert peer_heap.peek_peer() == p + + +def test_remove(peer_heap, peers): + for peer in peers: + peer_heap.push_peer(peer) + verify(peer_heap, 0) + + n = len(peers) + for _ in six.moves.range(n): + p = peer_heap.peers[random.randint(0, peer_heap.size() - 1)] + assert p is peer_heap.remove_peer(p) + verify(peer_heap, 0) + verify_peer_not_in_heap(peer_heap, p) + + +def verify_peer_not_in_heap(peer_heap, p): + for peer in peer_heap.peers: + assert peer is not p + + +def test_remove_duplicate(peer_heap, peers): + for peer in peers: + peer_heap.push_peer(peer) + verify(peer_heap, 0) + + n = random.randint(0, len(peers)) + + for _ in six.moves.range(n): + p = peer_heap.peers[random.randint(0, peer_heap.size() - 1)] + assert p is peer_heap.remove_peer(p) + with pytest.raises(IndexError): + peer_heap.remove_peer(p) + verify(peer_heap, 0) + verify_peer_not_in_heap(peer_heap, p) + + +def test_remove_from_empty_heap(): + heap = PeerHeap() + with pytest.raises(IndexError): + heap.remove_peer(mock_peer()) + + +def test_remove_mismatch(peer_heap, peers): + for peer in peers: + peer_heap.push_peer(peer) + verify(peer_heap, 0) + + # create a fake peer with duplicated index. + fake_peer = mock_peer() + fake_peer.index = 1 + with pytest.raises(AssertionError) as e: + peer_heap.remove_peer(fake_peer) + + assert e.value.message == 'peer is not in the heap' + + +@pytest.mark.heapfuzz +@pytest.mark.skipif(True, reason='stress test for the peer heap operations') +def test_heap_fuzz(peer_heap): + for _ in six.moves.range(random.randint(1, 100000)): + ops = random.randint(0, 2) + if ops == 0: # push + peer_heap.push_peer(mock_peer()) + elif ops == 1: # pop + peer_heap.pop_peer() + elif ops == 2: # update + if len(peer_heap) <= 0: + continue + p = peer_heap.peers[random.randint(0, len(peer_heap) - 1)] + p.rank = random.randint(0, sys.maxint) + peer_heap.update_peer(p) + + if peer_heap.size(): + assert smallest(peer_heap.peers) == peer_heap.peek_peer().rank + + verify(peer_heap, 0) + + +def smallest(ps): + m = sys.maxint + for peer in ps: + if peer.rank < m: + m = peer.rank + + return m diff --git a/tests/test_peer_strategy.py b/tests/test_peer_strategy.py new file mode 100644 index 00000000..b81f8d33 --- /dev/null +++ b/tests/test_peer_strategy.py @@ -0,0 +1,78 @@ +# Copyright (c) 2015 Uber Technologies, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from __future__ import absolute_import + +import sys + +import pytest +from tchannel import TChannel +from tchannel.peer_strategy import PreferIncomingCalculator +from tchannel.tornado.connection import TornadoConnection +from tchannel.tornado.connection import INCOMING +from tchannel.tornado.peer import Peer + + +@pytest.mark.gen_test +def test_get_rank_no_connection(): + server = TChannel('server') + server.listen() + peer = Peer(TChannel('test'), '10.10.101.21:230') + calculator = PreferIncomingCalculator() + assert sys.maxint == calculator.get_rank(peer) + + +@pytest.mark.gen_test +def test_get_rank_with_outgoing(): + server = TChannel('server') + server.listen() + connection = yield TornadoConnection.outgoing(server.hostport) + + peer = Peer(TChannel('test'), '10.10.101.21:230') + calculator = PreferIncomingCalculator() + peer.register_outgoing_conn(connection) + assert PreferIncomingCalculator.TIERS[1] == calculator.get_rank(peer) + + +@pytest.mark.gen_test +def test_get_rank_with_imcoming(): + server = TChannel('server') + server.listen() + connection = yield TornadoConnection.outgoing(server.hostport) + connection.direction = INCOMING + peer = Peer(TChannel('test'), '10.10.101.21:230') + calculator = PreferIncomingCalculator() + peer.register_incoming_conn(connection) + assert sys.maxint != calculator.get_rank(peer) + + +@pytest.mark.gen_test +def test_get_rank_ephemeral(): + server = TChannel('server') + server.listen() + connection = yield TornadoConnection.outgoing(server.hostport) + connection.direction = INCOMING + peer = Peer(TChannel('test'), '10.10.101.21:230') + peer.register_incoming_conn(connection) + + peer.host = '0.0.0.0' + peer.port = 0 + calculator = PreferIncomingCalculator() + assert sys.maxint == calculator.get_rank(peer) diff --git a/tests/tornado/test_connection.py b/tests/tornado/test_connection.py index f53aa533..1e3363c8 100644 --- a/tests/tornado/test_connection.py +++ b/tests/tornado/test_connection.py @@ -32,6 +32,7 @@ from tchannel.errors import TimeoutError, ReadError from tchannel.tornado import connection from tchannel.tornado.message_factory import MessageFactory +from tchannel.tornado.peer import Peer def dummy_headers(): @@ -196,6 +197,55 @@ def build_raw_message(self, context, args, is_completed=True): assert server_peer.total_outbound_pendings == 0 +@pytest.mark.gen_test +def test_client_connection_change_callback(): + server = TChannel('server') + server.listen() + + @server.raw.register + def hello(request): + return 'hi' + + client = TChannel('client') + count = [0] + + def test_cb(peer): + count[0] += 1 + + client._dep_tchannel.peers.get( + server.hostport)._on_conn_change_cb = test_cb + yield client.raw( + hostport=server.hostport, + body='work', + endpoint='hello', + service='server' + ) + + # 1: connection built, 1: sending request, 1: finish sending request + assert count[0] == 3 + + +@pytest.mark.gen_test +def test_both_connection_change_callback(): + client = TChannel('client') + + with mock.patch.object(Peer, '_on_conn_change') as mock_conn_change: + server = TChannel('server') + server.listen() + + @server.raw.register + def hello(request): + return 'hi' + + yield client.raw( + hostport=server.hostport, + body='work', + endpoint='hello', + service='server' + ) + assert mock_conn_change.call_count == 6 + + @pytest.mark.gen_test def test_writer_write_error(): server, client = socket.socketpair() diff --git a/tests/tornado/test_peer.py b/tests/tornado/test_peer.py index ff6c6c28..828bb69a 100644 --- a/tests/tornado/test_peer.py +++ b/tests/tornado/test_peer.py @@ -25,12 +25,16 @@ import mock import pytest + from tornado import gen from tchannel import TChannel from tchannel.errors import NoAvailablePeerError - from tchannel.tornado import peer as tpeer +from tchannel.tornado import Request +from tchannel.tornado import Response +from tchannel.tornado.connection import TornadoConnection +from tchannel.tornado.peer import Peer from tchannel.tornado.stream import InMemStream from tchannel.tornado.stream import read_full @@ -208,5 +212,121 @@ def test_peer_incoming_connections_are_preferred(request): assert (yield peer.connect()) is outgoing - peer.register_incoming(incoming) + peer.register_incoming_conn(incoming) assert (yield peer.connect()) is incoming + + +@pytest.fixture +def peer(): + return Peer( + tchannel=TChannel('peer'), + hostport='127.0.0.1:21300', + ) + + +def test_on_conn_change(peer, connection): + c = [0] + + def conn_change_db(peer): + c[0] += 1 + + peer.set_on_conn_change_callback(conn_change_db) + peer.register_incoming_conn(connection) + assert c[0] == 1 + + peer.register_outgoing_conn(connection) + assert c[0] == 2 + + +@pytest.mark.gen_test +def test_outbound_pending_change(): + server = TChannel('server') + server.listen() + connection = yield TornadoConnection.outgoing(server.hostport) + c = [0] + + def outbound_pending_change_callback(): + c[0] += 1 + + connection.set_outbound_pending_change_callback( + outbound_pending_change_callback + ) + + connection.add_outbound_pending_req(Request()) + assert c[0] == 1 + connection.add_outbound_pending_res(Response()) + assert c[0] == 2 + connection.remove_outbound_pending_req(Request()) + assert c[0] == 3 + connection.remove_outbound_pending_res(Response()) + assert c[0] == 4 + + +@pytest.mark.gen_test +def test_outbound_pending_change_propagate(peer): + server = TChannel('server') + server.listen() + connection = yield TornadoConnection.outgoing(server.hostport) + + peer.register_incoming_conn(connection) + b = [0] + + def conn_change_db(peer): + b[0] += 1 + + peer.set_on_conn_change_callback(conn_change_db) + + connection.add_outbound_pending_req(Request()) + assert b[0] == 1 + connection.add_outbound_pending_res(Response()) + assert b[0] == 2 + connection.remove_outbound_pending_req(Request()) + assert b[0] == 3 + connection.remove_outbound_pending_res(Response()) + assert b[0] == 4 + + +@pytest.fixture +def hostports(): + return ['127.0.0.1:' + str(i) for i in range(100)] + + +def test_choose(hostports): + tchannel = TChannel('test') + peer_group = tchannel._dep_tchannel.peers + for hp in hostports: + peer_group.get(hp) + + n = len(hostports) + for _ in hostports: + peer_group.choose() + assert peer_group.peer_heap.size() == n + + +def test_choose_with_blacklist(hostports): + tchannel = TChannel('test') + peer_group = tchannel._dep_tchannel.peers + for hp in hostports: + peer_group.get(hp) + + n = len(hostports) + blacklist = set() + for _ in hostports: + peer = peer_group.choose(blacklist=blacklist) + assert peer not in blacklist + blacklist.add(peer.hostport) + assert peer_group.peer_heap.size() == n + + +def test_choose_with_target_hostport(hostports): + tchannel = TChannel('test') + peer_group = tchannel._dep_tchannel.peers + for hp in hostports: + peer_group.get(hp) + + n = len(hostports) + 1 + target = '1.0.0.1:9000' + for _ in hostports: + peer = peer_group.choose(hostport=target) + assert target == peer.hostport + assert peer_group.peer_heap.size() == n From 6bcee32518fb62026a252c8ab7a88309fafa2c8e Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Thu, 17 Mar 2016 15:35:40 -0700 Subject: [PATCH 59/67] fix peer selection benchmark --- benchmarks/test_peer_selection.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/benchmarks/test_peer_selection.py b/benchmarks/test_peer_selection.py index 4671f811..6f61225d 100644 --- a/benchmarks/test_peer_selection.py +++ b/benchmarks/test_peer_selection.py @@ -25,6 +25,7 @@ import mock import random +from tchannel.tornado.connection import INCOMING from tchannel.tornado.peer import ( Peer as _Peer, PeerGroup as _PeerGroup, @@ -39,6 +40,8 @@ class FakeConnection(object): def __init__(self, hostport): self.hostport = hostport self.closed = False + self.direction = INCOMING + self.total_outbound_pendings = 0 @classmethod def outgoing(cls, hostport, process_name=None, serve_hostport=None, @@ -57,6 +60,9 @@ def result(self): def set_close_callback(self, cb): pass + def set_outbound_pending_change_callback(self, cb): + pass + class Peer(_Peer): connection_class = FakeConnection @@ -96,7 +102,7 @@ def test_choose(benchmark): peer = group.peers[random.randint(0, NUM_PEERS-1)] if peer.hostport in connected_peers: continue - peer.register_incoming(FakeConnection(peer.hostport)) + peer.register_incoming_conn(FakeConnection(peer.hostport)) connected_peers.add(peer.hostport) benchmark(group.choose) From 30a30d8c6b72afde1cd83fbb41656f96a8504ae5 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Mon, 28 Mar 2016 11:53:41 -0700 Subject: [PATCH 60/67] Update CHANGELOG --- CHANGES.rst | 2 ++ tchannel/tornado/peer.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index bf9a969c..c9567db5 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,6 +4,8 @@ Changes by Version 0.21.11 (unreleased) -------------------- +- Peer selection is now constant time instead of linear time. This should + significantly reduce CPU load per request. - Fixed a bug where certain errors while reading requests would propagate as TimeoutErrors. - Attempting to register endpoints against a synchronous TChannel now logs an diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py index 5d87e96b..ac03b729 100644 --- a/tchannel/tornado/peer.py +++ b/tchannel/tornado/peer.py @@ -24,7 +24,6 @@ import sys import logging -import sys from collections import deque from itertools import takewhile, dropwhile From 143e4a72772a62c45688779b651f5e1ac56d9ffa Mon Sep 17 00:00:00 2001 From: Bryce Lampe Date: Tue, 29 Mar 2016 15:03:04 -0700 Subject: [PATCH 61/67] Reduce advertise interval to 3 minutes --- CHANGES.rst | 1 + tchannel/tornado/hyperbahn.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index bf9a969c..51160c1e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,7 @@ Changes by Version TimeoutErrors. - Attempting to register endpoints against a synchronous TChannel now logs an INFO level message. +- Reduced default advertisement interval to 3 minutes. 0.21.10 (2016-03-17) diff --git a/tchannel/tornado/hyperbahn.py b/tchannel/tornado/hyperbahn.py index bb1edd97..3e694396 100644 --- a/tchannel/tornado/hyperbahn.py +++ b/tchannel/tornado/hyperbahn.py @@ -34,7 +34,7 @@ EXPO_BASE = 1.4 # try this first MAX_DELAY = 10 # sec MAX_ATTEMPT = 7 # pow(1.4, 8) > 10 -DELAY = 60 * 1000 # ms delay time for successful advertise +DELAY = 3 * 60 * 1000 # ms delay time for successful advertise FIRST_ADVERTISE_TIME = 30 # sec log = logging.getLogger('tchannel') From 566cd8d042b90c090b8410a3c18426105677ab4c Mon Sep 17 00:00:00 2001 From: Junchao Wu Date: Tue, 29 Mar 2016 16:15:38 -0700 Subject: [PATCH 62/67] Shuffle peers when adding to peer heap to avoid bias Currently, there's a bias on the first request causing it to select earlier peers. This changes add to shuffle peers around rather than having order dependent upon when it was added. --- tchannel/peer_heap.py | 19 +++++++++++++++++++ tchannel/tornado/peer.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/tchannel/peer_heap.py b/tchannel/peer_heap.py index 34152c7d..e324d6dd 100644 --- a/tchannel/peer_heap.py +++ b/tchannel/peer_heap.py @@ -115,6 +115,13 @@ def push_peer(self, peer): peer.order = self.order + random.randint(0, self.size()) heap.push(self, peer) + def add_and_shuffle(self, peer): + """Push a new peer into the heap and shuffle the heap""" + self.push_peer(peer) + + r = random.randint(0, self.size() - 1) + self.swap_order(peer.index, r) + def peek_peer(self): """Return the top peer of the heap :return @@ -155,3 +162,15 @@ def smallest_peer(self, predicate): ) except NoMatchError: return None + + def swap_order(self, index, r): + if index == r: + return + + p1 = self.peers[index] + p2 = self.peers[r] + + (p1.order, p2.order) = (p2.order, p1.order) + + heap.fix(self, p1.index) + heap.fix(self, p2.index) diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py index ac03b729..99859d8b 100644 --- a/tchannel/tornado/peer.py +++ b/tchannel/tornado/peer.py @@ -668,7 +668,7 @@ def add(self, peer): ) peer.rank = self.rank_calculator.get_rank(peer) self._peers[peer.hostport] = peer - self.peer_heap.push_peer(peer) + self.peer_heap.add_and_shuffle(peer) def _update_heap(self, peer): """Recalculate the peer's rank and update itself in the peer heap.""" From 159b0c54b95e11ce67ee7e3cce69c37369333a57 Mon Sep 17 00:00:00 2001 From: Junchao Wu Date: Tue, 29 Mar 2016 17:01:10 -0700 Subject: [PATCH 63/67] Fix naming issues --- benchmarks/test_concurrent_requests.py | 12 +++++------- tchannel/peer_heap.py | 8 ++++---- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/benchmarks/test_concurrent_requests.py b/benchmarks/test_concurrent_requests.py index 09898702..e5950554 100644 --- a/benchmarks/test_concurrent_requests.py +++ b/benchmarks/test_concurrent_requests.py @@ -30,7 +30,7 @@ client = None -def setUpServer(num): +def setup_server(num): servers = [] for i in xrange(num): server = TChannel('server' + str(i)) @@ -40,12 +40,11 @@ def hello(request): return 'hello' server.listen() servers.append(server) - return servers @gen.coroutine -def setUpClient(servers): +def setup_client(servers): router = [] for i in xrange(1000): router.append('1.1.1.1:'+str(i)) @@ -54,7 +53,6 @@ def setUpClient(servers): @client.raw.register def hello(request): return 'hello' - client.listen() for i in xrange(100): @@ -71,7 +69,7 @@ def hello(request): @gen.coroutine def peer_test(): - global servers, client + global client fs = [] for _ in xrange(100): fs.append(client.raw( @@ -90,8 +88,8 @@ def stress_test(): @gen.coroutine def setup(): global servers, client - servers = setUpServer(100) - client = yield setUpClient(servers) + servers = setup_server(100) + client = yield setup_client(servers) def test_peer_heap(benchmark): diff --git a/tchannel/peer_heap.py b/tchannel/peer_heap.py index e324d6dd..5793b17d 100644 --- a/tchannel/peer_heap.py +++ b/tchannel/peer_heap.py @@ -163,12 +163,12 @@ def smallest_peer(self, predicate): except NoMatchError: return None - def swap_order(self, index, r): - if index == r: + def swap_order(self, index1, index2): + if index1 == index2: return - p1 = self.peers[index] - p2 = self.peers[r] + p1 = self.peers[index1] + p2 = self.peers[index2] (p1.order, p2.order) = (p2.order, p1.order) From 1143edf5bde723c66dbceb259b82a29fa87cb3b7 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Wed, 30 Mar 2016 10:17:19 -0700 Subject: [PATCH 64/67] connection: simply pending outbound count tracking We don't need to maintain references to the requests and responses inside a dictionary. We can simply track the count inside a single integer. --- tchannel/tornado/connection.py | 45 +++++++++------------------------- tests/tornado/test_peer.py | 18 ++++++-------- 2 files changed, 20 insertions(+), 43 deletions(-) diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py index 9383d13a..4da018e5 100644 --- a/tchannel/tornado/connection.py +++ b/tchannel/tornado/connection.py @@ -124,11 +124,8 @@ def __init__(self, connection, tchannel=None, direction=None): # Map from message ID to futures for responses of outgoing calls. self._outbound_pending_call = {} - # Map from message ID to outgoing request that is being sent. - self._outbound_pending_req = {} - - # Map from message ID to outgoing response that is being sent. - self._outbound_pending_res = {} + # Total number of pending outbound requests and responses. + self.total_outbound_pendings = 0 # Collection of request IDs known to have timed out. self._request_tombstones = Cemetery() @@ -148,13 +145,6 @@ def __init__(self, connection, tchannel=None, direction=None): connection.set_close_callback(self._on_close) - @property - def total_outbound_pendings(self): - """Returns the total number of pending outbound requests and - responses.""" - return (len(self._outbound_pending_res) + - len(self._outbound_pending_req)) - def set_outbound_pending_change_callback(self, cb): """Specify a function to be called when outbound pending request or response list changed. @@ -494,26 +484,15 @@ def ping(self): def pong(self): return self.writer.put(messages.PingResponseMessage()) - def _outbound_pending_change(self): - """Called when there is a change in the outbound pending list.""" + def add_pending_outbound(self): + self.total_outbound_pendings += 1 if self._outbound_pending_change_cb: self._outbound_pending_change_cb() - def add_outbound_pending_res(self, res): - self._outbound_pending_res[res.id] = res - self._outbound_pending_change() - - def add_outbound_pending_req(self, req): - self._outbound_pending_req[req.id] = req - self._outbound_pending_change() - - def remove_outbound_pending_res(self, res): - self._outbound_pending_res.pop(res.id, None) - self._outbound_pending_change() - - def remove_outbound_pending_req(self, req): - self._outbound_pending_req.pop(req.id, None) - self._outbound_pending_change() + def remove_pending_outbound(self): + self.total_outbound_pendings -= 1 + if self._outbound_pending_change_cb: + self._outbound_pending_change_cb() class StreamConnection(TornadoConnection): @@ -587,7 +566,7 @@ def _stream(self, context, message_factory): @tornado.gen.coroutine def post_response(self, response): try: - self.add_outbound_pending_res(response) + self.add_pending_outbound() # TODO: before_send_response yield self._stream(response, self.response_message_factory) @@ -597,7 +576,7 @@ def post_response(self, response): response, ) finally: - self.remove_outbound_pending_res(response) + self.remove_pending_outbound() response.close_argstreams(force=True) def stream_request(self, request): @@ -630,9 +609,9 @@ def send_request(self, request): future = tornado.gen.Future() self._outbound_pending_call[request.id] = future - self.add_outbound_pending_req(request) + self.add_pending_outbound() self.stream_request(request).add_done_callback( - lambda f: self.remove_outbound_pending_req(request) + lambda f: self.remove_pending_outbound() ) if request.ttl: diff --git a/tests/tornado/test_peer.py b/tests/tornado/test_peer.py index 828bb69a..c13f5501 100644 --- a/tests/tornado/test_peer.py +++ b/tests/tornado/test_peer.py @@ -31,8 +31,6 @@ from tchannel import TChannel from tchannel.errors import NoAvailablePeerError from tchannel.tornado import peer as tpeer -from tchannel.tornado import Request -from tchannel.tornado import Response from tchannel.tornado.connection import TornadoConnection from tchannel.tornado.peer import Peer from tchannel.tornado.stream import InMemStream @@ -252,13 +250,13 @@ def outbound_pending_change_callback(): outbound_pending_change_callback ) - connection.add_outbound_pending_req(Request()) + connection.add_pending_outbound() assert c[0] == 1 - connection.add_outbound_pending_res(Response()) + connection.add_pending_outbound() assert c[0] == 2 - connection.remove_outbound_pending_req(Request()) + connection.remove_pending_outbound() assert c[0] == 3 - connection.remove_outbound_pending_res(Response()) + connection.remove_pending_outbound() assert c[0] == 4 @@ -276,13 +274,13 @@ def conn_change_db(peer): peer.set_on_conn_change_callback(conn_change_db) - connection.add_outbound_pending_req(Request()) + connection.add_pending_outbound() assert b[0] == 1 - connection.add_outbound_pending_res(Response()) + connection.add_pending_outbound() assert b[0] == 2 - connection.remove_outbound_pending_req(Request()) + connection.remove_pending_outbound() assert b[0] == 3 - connection.remove_outbound_pending_res(Response()) + connection.remove_pending_outbound() assert b[0] == 4 From 02f17928ff42027b9ab36a33b29a3d7825840277 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Wed, 30 Mar 2016 10:37:36 -0700 Subject: [PATCH 65/67] peerheap: make rank selection slighly cheaper We shouldn't be allocating lists from deques each time we need to recalculate the rank. --- tchannel/peer_strategy.py | 2 +- tchannel/tornado/peer.py | 42 +++++++++++++++++---------------------- 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/tchannel/peer_strategy.py b/tchannel/peer_strategy.py index 5e6042e7..23ea4545 100644 --- a/tchannel/peer_strategy.py +++ b/tchannel/peer_strategy.py @@ -54,7 +54,7 @@ def get_rank(self, peer): if peer.is_ephemeral or not peer.connections: return self.TIERS[0] - if not peer.incoming_connections: + if not peer.has_incoming_connections: return self.TIERS[1] + peer.total_outbound_pendings return self.TIERS[2] + peer.total_outbound_pendings diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py index 99859d8b..9b768f99 100644 --- a/tchannel/tornado/peer.py +++ b/tchannel/tornado/peer.py @@ -68,8 +68,8 @@ class Peer(object): 'order', 'chosen_count', 'on_conn_change', + 'connections', - '_connections', '_connecting', '_on_conn_change_cb', ) @@ -88,7 +88,8 @@ def __init__(self, tchannel, hostport, rank=None, on_conn_change=None): Host-port this Peer is for. :param rank: The rank of a peer will affect the chance that the peer gets - selected when the client sends outbound requests. + selected when the client sends outbound requests. Lower rank is + better. :param on_conn_change: A callback method takes Peer object as input and is called whenever there are connection changes in the peer. @@ -102,7 +103,7 @@ def __init__(self, tchannel, hostport, rank=None, on_conn_change=None): #: Collection of all connections for this Peer. Incoming connections #: are added to the left side of the deque and outgoing connections to #: the right side. - self._connections = deque() + self.connections = deque() # This contains a future to the TornadoConnection if we're already in # the process of making an outgoing connection to the peer. This @@ -139,10 +140,10 @@ def connect(self): A future containing a connection to this host. """ # Prefer incoming connections over outgoing connections. - if self._connections: + if self.connections: # First value is an incoming connection future = gen.Future() - future.set_result(self._connections[0]) + future.set_result(self.connections[0]) return future if self._connecting: @@ -172,16 +173,20 @@ def on_connect(_): def _set_on_close_cb(self, conn): def on_close(): - self._connections.remove(conn) + self.connections.remove(conn) self._on_conn_change() conn.set_close_callback(on_close) + @property + def has_incoming_connections(self): + return self.connections and self.connections[0].direction == INCOMING + def register_outgoing_conn(self, conn): """Add outgoing connection into the heap.""" assert conn, "conn is required" conn.set_outbound_pending_change_callback(self._on_conn_change) - self._connections.append(conn) + self.connections.append(conn) self._set_on_close_cb(conn) self._on_conn_change() @@ -189,7 +194,7 @@ def register_incoming_conn(self, conn): """Add incoming connection into the heap.""" assert conn, "conn is required" conn.set_outbound_pending_change_callback(self._on_conn_change) - self._connections.appendleft(conn) + self.connections.appendleft(conn) self._set_on_close_cb(conn) self._on_conn_change() @@ -203,20 +208,13 @@ def hostport(self): """The host-port this Peer is for.""" return "%s:%d" % (self.host, self.port) - @property - def connections(self): - """Returns an iterator over all connections for this peer. - - Incoming connections are listed first.""" - return list(self._connections) - @property def outgoing_connections(self): """Returns a list of all outgoing connections for this peer.""" # Outgoing connections are on the right return list( - dropwhile(lambda c: c.direction != OUTGOING, self._connections) + dropwhile(lambda c: c.direction != OUTGOING, self.connections) ) @property @@ -225,17 +223,13 @@ def incoming_connections(self): # Incoming connections are on the left. return list( - takewhile(lambda c: c.direction == INCOMING, self._connections) + takewhile(lambda c: c.direction == INCOMING, self.connections) ) @property def total_outbound_pendings(self): """Return the total number of out pending req/res among connections""" - num = 0 - for con in self.connections: - num += con.total_outbound_pendings - - return num + return sum(c.total_outbound_pendings for c in self.connections) @property def is_ephemeral(self): @@ -246,10 +240,10 @@ def is_ephemeral(self): def connected(self): """Return True if this Peer is connected.""" - return len(self._connections) > 0 + return len(self.connections) > 0 def close(self): - for connection in list(self._connections): + for connection in self.connections: # closing the connection will mutate the deque so create a copy connection.close() From 9255fec01d68086405267d78d238f1271aa41f81 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Wed, 30 Mar 2016 11:13:01 -0700 Subject: [PATCH 66/67] Fix deque mutation during iteration --- tchannel/tornado/peer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py index 9b768f99..ced1d6e7 100644 --- a/tchannel/tornado/peer.py +++ b/tchannel/tornado/peer.py @@ -243,7 +243,7 @@ def connected(self): return len(self.connections) > 0 def close(self): - for connection in self.connections: + for connection in list(self.connections): # closing the connection will mutate the deque so create a copy connection.close() From c14a1e2255a67236c40b20db9ac0c4ce395b5d6d Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Wed, 30 Mar 2016 11:17:37 -0700 Subject: [PATCH 67/67] Don't use globals in test_concurrent_requests. --- benchmarks/test_concurrent_requests.py | 44 ++++++++++++-------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/benchmarks/test_concurrent_requests.py b/benchmarks/test_concurrent_requests.py index e5950554..e9296da3 100644 --- a/benchmarks/test_concurrent_requests.py +++ b/benchmarks/test_concurrent_requests.py @@ -22,41 +22,43 @@ absolute_import, unicode_literals, print_function, division ) -from tornado import gen, ioloop -from tchannel import TChannel - +from tornado import gen +from tornado.ioloop import IOLoop -servers = None -client = None +from tchannel import TChannel -def setup_server(num): +def setup_servers(num): servers = [] + for i in xrange(num): server = TChannel('server' + str(i)) @server.raw.register def hello(request): return 'hello' + server.listen() servers.append(server) + return servers @gen.coroutine def setup_client(servers): - router = [] + known_peers = [] for i in xrange(1000): - router.append('1.1.1.1:'+str(i)) - client = TChannel('client', known_peers=router) + known_peers.append('1.1.1.1:'+str(i)) + client = TChannel('client', known_peers=known_peers) + # Add a bunch of unconnected peers @client.raw.register def hello(request): return 'hello' client.listen() - for i in xrange(100): - server = servers[i] + # Open incoming connection from the server to the client. + for server in servers: yield server.raw( service='server', endpoint='hello', @@ -68,8 +70,7 @@ def hello(request): @gen.coroutine -def peer_test(): - global client +def peer_test(client): fs = [] for _ in xrange(100): fs.append(client.raw( @@ -81,17 +82,12 @@ def peer_test(): yield fs -def stress_test(): - ioloop.IOLoop.current().run_sync(peer_test) - - -@gen.coroutine -def setup(): - global servers, client - servers = setup_server(100) - client = yield setup_client(servers) +def stress_test(client): + IOLoop.current().run_sync(lambda: peer_test(client)) def test_peer_heap(benchmark): - ioloop.IOLoop.current().run_sync(setup) - benchmark(stress_test) + servers = setup_servers(100) + client = IOLoop.current().run_sync(lambda: setup_client(servers)) + + benchmark(stress_test, client)