From 7358b7d5a0d5eddd804ac99177a24b4e6fd68c20 Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Mon, 29 Feb 2016 19:38:42 -0800
Subject: [PATCH 01/67] Serialize writes

---
 tchannel/tornado/connection.py | 20 +++++++++++--
 tchannel/tornado/util.py       | 51 ----------------------------------
 2 files changed, 17 insertions(+), 54 deletions(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 234d7da6..b0781497 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -53,7 +53,6 @@
 from ..messages.types import Types
 from .message_factory import build_raw_error_message
 from .message_factory import MessageFactory
-from .util import chain
 from .tombstone import Cemetery
 
 log = logging.getLogger('tchannel')
@@ -138,6 +137,9 @@ def __init__(self, connection, tchannel=None, direction=None):
         self.tchannel = tchannel
         self._close_cb = None
 
+        self._write_queue = tornado.queues.Queue()
+        self._drain_write_queue()
+
         connection.set_close_callback(self._on_close)
 
     def set_close_callback(self, cb):
@@ -337,7 +339,12 @@ def write(self, message):
 
         fragments = message_factory.fragment(message)
 
-        return chain(fragments, self._write)
+        for fragment in fragments:
+            future = self._write(fragment)
+
+        # We're done writing the message once our last future
+        # resolves.
+        return future
 
     def _write(self, message):
         """Writes the given message up the wire.
@@ -358,8 +365,15 @@ def _write(self, message):
             payload=payload
         )
         body = frame.frame_rw.write(f, BytesIO()).getvalue()
+        return self._write_queue.put(body)
 
-        return self.connection.write(body)
+    @tornado.gen.coroutine
+    def _drain_write_queue(self):
+        """Ensures ordering of our writes."""
+
+        while not self.closed:
+            body = yield self._write_queue.get()
+            yield self.connection.write(body)
 
     def close(self):
         if not self.closed:
diff --git a/tchannel/tornado/util.py b/tchannel/tornado/util.py
index 3c2197e1..a2953a69 100644
--- a/tchannel/tornado/util.py
+++ b/tchannel/tornado/util.py
@@ -39,54 +39,3 @@ def get_arg(context, index):
         raise tornado.gen.Return(arg)
     else:
         raise TChannelError()
-
-
-def chain(iterable, func):
-    """Apply an asynchronous function to a list of items in order.
-
-    Where previously you would write
-
-    .. code-block:: python
-
-        @gen.coroutine
-        def bar():
-            for thing in things:
-                yield async_operation(thing)
-
-    You can instead write:
-
-    .. code-block:: python
-
-        future = chain(things, async_operation)
-
-    This is most useful in coroutine that are called frequently, as this is
-    significantly faster.
-
-    Returns a future that resolves when all of the given futures have
-    completed.
-
-    If any future raises an exception, the remainder of the chain will not be
-    processed, and the exception is propagated to the returned future.
-    """
-
-    all_done_future = tornado.concurrent.Future()
-
-    generator = iter(iterable)
-
-    def handle(future):
-        if future.exception():
-            all_done_future.set_exc_info(future.exc_info())
-        else:
-            go()
-
-    def go():
-        try:
-            arg = generator.next()
-        except StopIteration:
-            all_done_future.set_result(None)
-        else:
-            tornado.ioloop.IOLoop.current().add_future(func(arg), handle)
-
-    go()
-
-    return all_done_future

From 7300b2ff77e8119e78a556ef88ed26f4c1ea69ad Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Thu, 3 Mar 2016 15:39:36 -0800
Subject: [PATCH 02/67] Add a failing test

---
 examples/sync/fanout/client.py | 59 ++++++++++++++++++++++++++++++++++
 examples/sync/fanout/server.py | 44 +++++++++++++++++++++++++
 tests/test_examples.py         |  1 +
 3 files changed, 104 insertions(+)
 create mode 100644 examples/sync/fanout/client.py
 create mode 100644 examples/sync/fanout/server.py

diff --git a/examples/sync/fanout/client.py b/examples/sync/fanout/client.py
new file mode 100644
index 00000000..4b441825
--- /dev/null
+++ b/examples/sync/fanout/client.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2015 Uber Technologies, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+import json
+
+from tchannel import thrift
+from tchannel.sync import TChannel
+
+tchannel = TChannel('thrift-client')
+service = thrift.load(
+    path='tests/data/idls/ThriftTest.thrift',
+    service='thrift-server',
+    hostport='localhost:54498',
+)
+
+
+def make_requests():
+
+    # Fan-out
+    futures = [tchannel.thrift(
+        request=service.ThriftTest.testString(thing="req"),
+        headers={
+            'req': 'header',
+        },
+    ) for _ in xrange(20)]
+
+    # Fan-in
+    for future in futures:
+        response = future.result()
+
+    return response
+
+
+resp = make_requests()
+
+assert resp.headers == {
+    'resp': 'header',
+}
+assert resp.body == 'resp' * 100000
+
+print resp.body[:4]
+print json.dumps(resp.headers)
diff --git a/examples/sync/fanout/server.py b/examples/sync/fanout/server.py
new file mode 100644
index 00000000..aa5c83ea
--- /dev/null
+++ b/examples/sync/fanout/server.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2015 Uber Technologies, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+from __future__ import absolute_import
+
+from tornado import gen, ioloop
+from tchannel import TChannel, Response, thrift
+
+tchannel = TChannel('thrift-server', hostport='localhost:54498')
+service = thrift.load('tests/data/idls/ThriftTest.thrift')
+
+
+@tchannel.thrift.register(service.ThriftTest)
+@gen.coroutine
+def testString(request):
+
+    assert request.headers == {'req': 'header'}
+    assert request.body.thing == 'req'
+
+    return Response('resp' * 100000, headers={'resp': 'header'})
+
+
+tchannel.listen()
+
+print tchannel.hostport
+
+ioloop.IOLoop.current().start()
diff --git a/tests/test_examples.py b/tests/test_examples.py
index b9cfe359..01a77630 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -58,6 +58,7 @@ def popen(path, wait_for_listen=False):
         ('thrift', 'simple/thrift/'),
         ('bench', 'benchmark/thrift/'),
         ('guide', 'guide/keyvalue/keyvalue/'),
+        ('thrift', 'sync/fanout/'),
     )
 )
 def test_example(scheme, path):

From bcc7239b7243f2fc7bf0eb522955af21def9e562 Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Fri, 4 Mar 2016 21:31:54 -0800
Subject: [PATCH 03/67] Add a workaround for handshake coordination

---
 tchannel/tornado/connection.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index b0781497..e1d3038a 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -138,7 +138,7 @@ def __init__(self, connection, tchannel=None, direction=None):
         self._close_cb = None
 
         self._write_queue = tornado.queues.Queue()
-        self._drain_write_queue()
+        self._draining = False
 
         connection.set_close_callback(self._on_close)
 
@@ -243,6 +243,7 @@ def _loop(self):
         #
         # Must be started only after the handshake has been performed.
         self._loop_running = True
+
         while not self.closed:
             message = yield self._recv()
 
@@ -365,15 +366,24 @@ def _write(self, message):
             payload=payload
         )
         body = frame.frame_rw.write(f, BytesIO()).getvalue()
-        return self._write_queue.put(body)
+        done_writing_future = tornado.gen.Future()
+
+        self._write_queue.put((body, done_writing_future))
+
+        if not self._draining:
+            self._drain_write_queue()
+
+        return done_writing_future
 
     @tornado.gen.coroutine
     def _drain_write_queue(self):
         """Ensures ordering of our writes."""
+        self._draining = True
 
         while not self.closed:
-            body = yield self._write_queue.get()
-            yield self.connection.write(body)
+            body, done = yield self._write_queue.get()
+            result = yield self.connection.write(body)
+            done.set_result(result)
 
     def close(self):
         if not self.closed:
@@ -389,7 +399,7 @@ def initiate_handshake(self, headers):
             A future that resolves (with a value of None) when the handshake
             is complete.
         """
-        self._write(messages.InitRequestMessage(
+        yield self._write(messages.InitRequestMessage(
             version=PROTOCOL_VERSION,
             headers=headers
         ))
@@ -423,7 +433,7 @@ def expect_handshake(self, headers):
             )
         self._extract_handshake_headers(init_req)
 
-        self._write(
+        yield self._write(
             messages.InitResponseMessage(
                 PROTOCOL_VERSION, headers, init_req.id),
         )

From 9cbcb57cd04bd45d258142f4eada58d2ade17a8f Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Sat, 5 Mar 2016 00:23:18 -0800
Subject: [PATCH 04/67] Break out reader/writer classes, fix tests

---
 tchannel/tornado/connection.py           | 272 ++++++++++++++---------
 tchannel/tornado/peer.py                 |   4 +-
 tests/conftest.py                        |   2 +-
 tests/integration/test_error_handling.py |   2 +-
 tests/tornado/conftest.py                |   2 +-
 tests/tornado/test_connection.py         |  41 ++--
 6 files changed, 192 insertions(+), 131 deletions(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index e1d3038a..692d93c9 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -111,9 +111,6 @@ def __init__(self, connection, tchannel=None, direction=None):
         self.remote_process_name = None
         self.requested_version = PROTOCOL_VERSION
 
-        # Tracks message IDs for this connection.
-        self._id_sequence = 0
-
         # We need to use two separate message factories to avoid message ID
         # collision while assembling fragmented messages.
         self.request_message_factory = MessageFactory(self.remote_host,
@@ -137,8 +134,8 @@ def __init__(self, connection, tchannel=None, direction=None):
         self.tchannel = tchannel
         self._close_cb = None
 
-        self._write_queue = tornado.queues.Queue()
-        self._draining = False
+        self.reader = Reader(self.connection)
+        self.writer = Writer(self.connection)
 
         connection.set_close_callback(self._on_close)
 
@@ -154,10 +151,6 @@ def set_close_callback(self, cb):
         )
         self._close_cb = stack_context.wrap(cb)
 
-    def next_message_id(self):
-        self._id_sequence = (self._id_sequence + 1) % MAX_MESSAGE_ID
-        return self._id_sequence
-
     def _on_close(self):
         self.closed = True
         self._request_tombstones.clear()
@@ -188,53 +181,7 @@ def await(self):
         if self._loop_running:
             return self._messages.get()
         else:
-            return self._recv()
-
-    def _recv(self):
-        """Receive the next message off the wire.
-
-        :returns:
-            A Future that produces a Context object containing the next
-            message off the wire.
-        """
-
-        # This is the message_future we'll return for any inbound messages.
-        message_future = tornado.gen.Future()
-        io_loop = IOLoop.current()
-
-        def on_body(read_body_future, size):
-            if read_body_future.exception():
-                return on_error(read_body_future)
-
-            body = read_body_future.result()
-            f = frame.frame_rw.read(BytesIO(body), size=size)
-            message_rw = messages.RW[f.header.message_type]
-            message = message_rw.read(BytesIO(f.payload))
-            message.id = f.header.message_id
-            message_future.set_result(message)
-
-        def on_read_size(read_size_future):
-            if read_size_future.exception():
-                return on_error(read_size_future)
-
-            size_bytes = read_size_future.result()
-            size = frame.frame_rw.size_rw.read(BytesIO(size_bytes))
-            read_body_future = self.connection.read_bytes(size - size_width)
-
-            io_loop.add_future(
-                read_body_future,
-                lambda future: on_body(future, size)
-            )
-
-            return read_body_future
-
-        def on_error(future):
-            log.info("Failed to read data: %s", future.exception())
-
-        size_width = frame.frame_rw.size_rw.width()
-        read_bytes_future = self.connection.read_bytes(size_width)
-        io_loop.add_future(read_bytes_future, on_read_size)
-        return message_future
+            return self.reader.get()
 
     @tornado.gen.coroutine
     def _loop(self):
@@ -245,7 +192,7 @@ def _loop(self):
         self._loop_running = True
 
         while not self.closed:
-            message = yield self._recv()
+            message = yield self.reader.get()
 
             # TODO: There should probably be a try-catch on the yield.
             if message.message_type in self.CALL_REQ_TYPES:
@@ -313,7 +260,7 @@ def send(self, message):
             "Message '%s' can't use send" % repr(message)
         )
 
-        message.id = message.id or self.next_message_id()
+        message.id = message.id or self.writer.next_message_id()
         assert message.id not in self._outstanding, (
             "Message ID '%d' already being used" % message.id
         )
@@ -331,7 +278,7 @@ def write(self, message):
         :param message:
             Message to write.
         """
-        message.id = message.id or self.next_message_id()
+        message.id = message.id or self.writer.next_message_id()
 
         if message.message_type in self.CALL_REQ_TYPES:
             message_factory = self.request_message_factory
@@ -341,50 +288,12 @@ def write(self, message):
         fragments = message_factory.fragment(message)
 
         for fragment in fragments:
-            future = self._write(fragment)
+            future = self.writer.put(fragment)
 
         # We're done writing the message once our last future
         # resolves.
         return future
 
-    def _write(self, message):
-        """Writes the given message up the wire.
-
-        The message must be small enough to fit in a single frame.
-        """
-        message.id = message.id or self.next_message_id()
-
-        payload = messages.RW[message.message_type].write(
-            message, BytesIO()
-        ).getvalue()
-
-        f = frame.Frame(
-            header=frame.FrameHeader(
-                message_type=message.message_type,
-                message_id=message.id,
-            ),
-            payload=payload
-        )
-        body = frame.frame_rw.write(f, BytesIO()).getvalue()
-        done_writing_future = tornado.gen.Future()
-
-        self._write_queue.put((body, done_writing_future))
-
-        if not self._draining:
-            self._drain_write_queue()
-
-        return done_writing_future
-
-    @tornado.gen.coroutine
-    def _drain_write_queue(self):
-        """Ensures ordering of our writes."""
-        self._draining = True
-
-        while not self.closed:
-            body, done = yield self._write_queue.get()
-            result = yield self.connection.write(body)
-            done.set_result(result)
-
     def close(self):
         if not self.closed:
             self.connection.close()
@@ -399,11 +308,11 @@ def initiate_handshake(self, headers):
             A future that resolves (with a value of None) when the handshake
             is complete.
         """
-        yield self._write(messages.InitRequestMessage(
+        self.writer.put(messages.InitRequestMessage(
             version=PROTOCOL_VERSION,
             headers=headers
         ))
-        init_res = yield self._recv()
+        init_res = yield self.reader.get()
         if init_res.message_type != Types.INIT_RES:
             raise errors.InvalidMessageError(
                 "Expected handshake response, got %s" % repr(init_res)
@@ -426,14 +335,14 @@ def expect_handshake(self, headers):
             A future that resolves (with a value of None) when the handshake
             is complete.
         """
-        init_req = yield self._recv()
+        init_req = yield self.reader.get()
         if init_req.message_type != Types.INIT_REQ:
             raise errors.InvalidMessageError(
                 "You need to shake my hand first. Got %s" % repr(init_req)
             )
         self._extract_handshake_headers(init_req)
 
-        yield self._write(
+        self.writer.put(
             messages.InitResponseMessage(
                 PROTOCOL_VERSION, headers, init_req.id),
         )
@@ -544,7 +453,7 @@ def send_error(self, error):
         """
 
         error_message = build_raw_error_message(error)
-        write_future = self._write(error_message)
+        write_future = self.writer.put(error_message)
         write_future.add_done_callback(
             lambda f: self.tchannel.event_emitter.fire(
                 EventType.after_send_error,
@@ -554,10 +463,10 @@ def send_error(self, error):
         return write_future
 
     def ping(self):
-        return self._write(messages.PingRequestMessage())
+        return self.writer.put(messages.PingRequestMessage())
 
     def pong(self):
-        return self._write(messages.PingResponseMessage())
+        return self.writer.put(messages.PingResponseMessage())
 
 
 class StreamConnection(TornadoConnection):
@@ -725,3 +634,156 @@ def _request_timed_out(self, req_id, req_ttl, future):
         # while.
         future.set_exception(errors.TimeoutError())
         self._request_tombstones.add(req_id, req_ttl)
+
+
+class Reader(object):
+
+    def __init__(self, io_stream):
+        self.queue = tornado.queues.Queue()
+        self.filling = False
+        self.io_stream = io_stream
+
+    def fill(self):
+        self.filling = True
+
+        io_loop = IOLoop.current()
+
+        def keep_reading(f):
+            if f.exception():
+                return log(f.exception())
+            # connect these two in the case when put blocks
+            self.queue.put(f.result())
+            io_loop.spawn_callback(self.fill)
+
+        io_loop.add_future(self._dequeue(), keep_reading)
+
+    def get(self):
+        """Receive the next message off the wire.
+
+        :returns:
+            A Future that produces a Context object containing the next
+            message off the wire.
+        """
+        if self.filling is False:
+            self.fill()
+
+        return self.queue.get()
+
+    def _dequeue(self):
+        # This is the message_future we'll return for any inbound messages.
+        message_future = tornado.gen.Future()
+        io_loop = IOLoop.current()
+
+        def on_body(read_body_future, size):
+            if read_body_future.exception():
+                return on_error(read_body_future)
+
+            body = read_body_future.result()
+            f = frame.frame_rw.read(BytesIO(body), size=size)
+            message_rw = messages.RW[f.header.message_type]
+            message = message_rw.read(BytesIO(f.payload))
+            message.id = f.header.message_id
+            message_future.set_result(message)
+
+        def on_read_size(read_size_future):
+            if read_size_future.exception():
+                return on_error(read_size_future)
+
+            size_bytes = read_size_future.result()
+            size = frame.frame_rw.size_rw.read(BytesIO(size_bytes))
+            read_body_future = self.io_stream.read_bytes(size - size_width)
+
+            io_loop.add_future(
+                read_body_future,
+                lambda future: on_body(future, size)
+            )
+
+            return read_body_future
+
+        def on_error(future):
+            log.info("Failed to read data: %s", future.exception())
+
+        size_width = frame.frame_rw.size_rw.width()
+        read_bytes_future = self.io_stream.read_bytes(size_width)
+        io_loop.add_future(read_bytes_future, on_read_size)
+
+        return message_future
+
+
+class Writer(object):
+
+    def __init__(self, io_stream):
+        self.queue = tornado.queues.Queue()
+        self.draining = False
+        self.io_stream = io_stream
+        # Tracks message IDs for this connection.
+        self._id_sequence = 0
+
+    def drain(self):
+        self.draining = True
+
+        io_loop = IOLoop.current()
+
+        def on_write(f, done):
+            if f.exception():
+                log.error(f.exception())
+                done.set_exception(f.exception())
+            else:
+                done.set_result(f.result())
+
+            io_loop.spawn_callback(next_write)
+
+        def on_message(f):
+            if f.exception():
+                io_loop.spawn_callback(next_write)
+                log.error(f.exception())
+                return
+            message, done = f.result()
+            io_loop.add_future(
+                self.io_stream.write(message),
+                lambda f: on_write(f, done),
+            )
+
+        def next_write():
+            if self.io_stream.closed():
+                return
+
+            io_loop.add_future(self.queue.get(), on_message)
+
+        io_loop.spawn_callback(next_write)
+
+    def put(self, message):
+        """Enqueues the given message for writing to the wire.
+
+        The message must be small enough to fit in a single frame.
+        """
+        if self.draining is False:
+            self.drain()
+
+        return self._enqueue(message)
+
+    def next_message_id(self):
+        self._id_sequence = (self._id_sequence + 1) % MAX_MESSAGE_ID
+        return self._id_sequence
+
+    def _enqueue(self, message):
+        message.id = message.id or self.next_message_id()
+
+        payload = messages.RW[message.message_type].write(
+            message, BytesIO()
+        ).getvalue()
+
+        f = frame.Frame(
+            header=frame.FrameHeader(
+                message_type=message.message_type,
+                message_id=message.id,
+            ),
+            payload=payload
+        )
+        body = frame.frame_rw.write(f, BytesIO()).getvalue()
+
+        done_writing_future = tornado.gen.Future()
+
+        self.queue.put((body, done_writing_future))
+
+        return done_writing_future
diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py
index 61aad961..71323957 100644
--- a/tchannel/tornado/peer.py
+++ b/tchannel/tornado/peer.py
@@ -357,7 +357,7 @@ def send(
         request = Request(
             service=self.service,
             argstreams=[InMemStream(endpoint), arg2, arg3],
-            id=connection.next_message_id(),
+            id=connection.writer.next_message_id(),
             headers=headers,
             endpoint=endpoint,
             ttl=ttl,
@@ -475,7 +475,7 @@ def prepare_next_request(self, request, blacklist):
 
         connection = yield peer.connect()
         # roll back request
-        request.rewind(connection.next_message_id())
+        request.rewind(connection.writer.next_message_id())
 
         raise gen.Return((peer, connection))
 
diff --git a/tests/conftest.py b/tests/conftest.py
index d5313ef6..193dcf64 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -47,7 +47,7 @@ def connection():
 
 
 @pytest.yield_fixture
-def mock_server():
+def mock_server(io_loop):
     with MockServer() as server:
         yield server
 
diff --git a/tests/integration/test_error_handling.py b/tests/integration/test_error_handling.py
index e7723213..879520b5 100644
--- a/tests/integration/test_error_handling.py
+++ b/tests/integration/test_error_handling.py
@@ -114,7 +114,7 @@ def test_invalid_message_during_streaming(mock_server):
     # bypass the default checksum calculation
     # set a wrong checksum
     callreqcontinue.checksum = (ChecksumType.crc32c, 1)
-    yield connection._write(callreqcontinue)
+    yield connection.writer.put(callreqcontinue)
 
     with pytest.raises(FatalProtocolError) as e:
         resp = yield resp_future
diff --git a/tests/tornado/conftest.py b/tests/tornado/conftest.py
index cebd4551..b6a80005 100644
--- a/tests/tornado/conftest.py
+++ b/tests/tornado/conftest.py
@@ -29,7 +29,7 @@
 
 
 @pytest.yield_fixture
-def tornado_pair():
+def tornado_pair(io_loop):
     server, client = socket.socketpair()
 
     server_stream = tornado.iostream.IOStream(server)
diff --git a/tests/tornado/test_connection.py b/tests/tornado/test_connection.py
index edf501a9..5fe9adaf 100644
--- a/tests/tornado/test_connection.py
+++ b/tests/tornado/test_connection.py
@@ -23,7 +23,6 @@
 import mock
 import pytest
 import tornado.ioloop
-import tornado.testing
 from tornado import gen
 
 from tchannel import TChannel
@@ -39,33 +38,33 @@ def dummy_headers():
     }
 
 
-class ConnectionTestCase(tornado.testing.AsyncTestCase):
-    @pytest.fixture(autouse=True)
-    def make_server_client(self, tornado_pair):
-        self.server, self.client = tornado_pair
+@pytest.mark.gen_test
+def test_handshake(tornado_pair):
+    """Verify we handshake in an async manner."""
+    server, client = tornado_pair
+    headers = dummy_headers()
+
+    print 'IOLOOP from test is', tornado.ioloop.IOLoop.current()
 
-    @tornado.testing.gen_test
-    def test_handshake(self):
-        """Verify we handshake in an async manner."""
-        headers = dummy_headers()
+    client.initiate_handshake(headers=headers)
+    yield server.expect_handshake(headers=headers)
 
-        self.client.initiate_handshake(headers=headers)
-        yield self.server.expect_handshake(headers=headers)
+    assert client.requested_version == server.requested_version
 
-        assert self.client.requested_version == self.server.requested_version
 
-    @tornado.testing.gen_test
-    def test_pings(self):
-        """Verify calls are sent to handler properly."""
-        self.client.ping()
+@pytest.mark.gen_test
+def test_pings(tornado_pair):
+    """Verify calls are sent to handler properly."""
+    server, client = tornado_pair
+    client.ping()
 
-        ping = yield self.server.await()
-        assert ping.message_type == Types.PING_REQ
+    ping = yield server.await()
+    assert ping.message_type == Types.PING_REQ
 
-        self.server.pong()
+    server.pong()
 
-        pong = yield self.client.await()
-        assert pong.message_type == Types.PING_RES
+    pong = yield client.await()
+    assert pong.message_type == Types.PING_RES
 
 
 @pytest.mark.gen_test

From 67e249105a7b988f129a7603d53b9016c9f906f8 Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Sat, 5 Mar 2016 16:49:22 -0800
Subject: [PATCH 05/67] Catch an error condition with queue.put

---
 tchannel/tornado/connection.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 692d93c9..135f5724 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -290,8 +290,7 @@ def write(self, message):
         for fragment in fragments:
             future = self.writer.put(fragment)
 
-        # We're done writing the message once our last future
-        # resolves.
+        # We're done writing the message once our last future resolves.
         return future
 
     def close(self):
@@ -784,6 +783,12 @@ def _enqueue(self, message):
 
         done_writing_future = tornado.gen.Future()
 
-        self.queue.put((body, done_writing_future))
+        def on_queue_error(f):
+            if f.exception():
+                done_writing_future.set_exception(f.exception())
+
+        self.queue.put(
+            (body, done_writing_future)
+        ).add_done_callback(on_queue_error)
 
         return done_writing_future

From 17f906d0aed73c64e837bf44c7c388866dc16fc3 Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Sun, 6 Mar 2016 09:42:50 -0800
Subject: [PATCH 06/67] Add some backpressure

---
 tchannel/tornado/connection.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 135f5724..f3e7bf6c 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -637,8 +637,8 @@ def _request_timed_out(self, req_id, req_ttl, future):
 
 class Reader(object):
 
-    def __init__(self, io_stream):
-        self.queue = tornado.queues.Queue()
+    def __init__(self, io_stream, capacity=64):
+        self.queue = tornado.queues.Queue(capacity)
         self.filling = False
         self.io_stream = io_stream
 
@@ -711,8 +711,8 @@ def on_error(future):
 
 class Writer(object):
 
-    def __init__(self, io_stream):
-        self.queue = tornado.queues.Queue()
+    def __init__(self, io_stream, capacity=64):
+        self.queue = tornado.queues.Queue(capacity)
         self.draining = False
         self.io_stream = io_stream
         # Tracks message IDs for this connection.

From ea023d630c63181ec60b97879d84fd07ed7f300c Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Sun, 6 Mar 2016 10:30:11 -0800
Subject: [PATCH 07/67] Add a roundtrip benchmark

---
 .travis.yml                  |  1 +
 Makefile                     |  4 +++
 benchmarks/test_roundtrip.py | 57 ++++++++++++++++++++++++++++++++++++
 tox.ini                      |  3 ++
 4 files changed, 65 insertions(+)
 create mode 100644 benchmarks/test_roundtrip.py

diff --git a/.travis.yml b/.travis.yml
index 3ce97ba2..0328e5d4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -19,6 +19,7 @@ env:
     - TOX_ENV=cover
     - TOX_ENV=flake8
     - TOX_ENV=docs
+    - TOX_ENV=benchmark
 
 install:
   - make install
diff --git a/Makefile b/Makefile
index e1257765..7565c692 100644
--- a/Makefile
+++ b/Makefile
@@ -48,6 +48,10 @@ test: clean
 test_ci: clean
 	tox -e $(TOX_ENV) -- tests
 
+.PHONY: benchmark
+benchmark:
+	py.test benchmarks --benchmark-autosave --benchmark-save-data --benchmark-warmup --benchmark-disable-gc --benchmark-histogram
+
 .PHONY: testhtml
 testhtml: clean
 	$(pytest) $(html_report) && open htmlcov/index.html
diff --git a/benchmarks/test_roundtrip.py b/benchmarks/test_roundtrip.py
new file mode 100644
index 00000000..c1eb7d87
--- /dev/null
+++ b/benchmarks/test_roundtrip.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2015 Uber Technologies, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+from tornado import ioloop, gen
+
+from tchannel import TChannel, thrift
+
+
+def test_roundtrip(benchmark):
+    loop = ioloop.IOLoop.current()
+
+    server = TChannel('benchmark-server')
+    server.listen()
+
+    clients = [TChannel('benchmark-client') for _ in range(10)]
+
+    service = thrift.load(
+        path='examples/guide/keyvalue/service.thrift',
+        service='benchmark-server',
+    )
+    service.hostport = server.hostport
+
+    @server.thrift.register(service.KeyValue)
+    def getValue(request):
+        return 'bar'
+
+    def roundtrip():
+        @gen.coroutine
+        def doit():
+            yield [
+                client.thrift(service.KeyValue.getValue("foo"))
+                for client in clients
+            ]
+
+        return loop.run_sync(doit)
+
+    # Establish initial connection
+    roundtrip()
+
+    benchmark(roundtrip)
diff --git a/tox.ini b/tox.ini
index 76649d9c..83941bf6 100644
--- a/tox.ini
+++ b/tox.ini
@@ -23,6 +23,9 @@ basepython = python
 commands =
   py.test --cov tchannel --cov-report=xml --cov-report=term-missing {posargs}
 
+[testenv:benchmark]
+basepython = python
+commands = make benchmark
 
 [testenv:docs]
 basepython = python

From 3097de22ca2e22b6b28ca008258c94a35a7bf49d Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Sun, 6 Mar 2016 21:05:51 -0800
Subject: [PATCH 08/67] Add fail_to decorator to help with
 future-callback-style Tornado code.

---
 tchannel/_future.py  | 78 ++++++++++++++++++++++++++++++++++++++++++++
 tests/test_future.py | 72 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 150 insertions(+)
 create mode 100644 tchannel/_future.py
 create mode 100644 tests/test_future.py

diff --git a/tchannel/_future.py b/tchannel/_future.py
new file mode 100644
index 00000000..cf11d96c
--- /dev/null
+++ b/tchannel/_future.py
@@ -0,0 +1,78 @@
+# Copyright (c) 2015 Uber Technologies, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+from __future__ import (
+    absolute_import, unicode_literals, division, print_function
+)
+
+import sys
+from functools import wraps
+
+from tornado.gen import is_future
+
+
+def fail_to(future):
+    """A decorator for function callbacks to catch uncaught non-async
+    exceptions and forward them to the given future.
+
+    The primary use for this is to catch exceptions in async callbacks and
+    propagate them to futures. For example, consider,
+
+    .. code-block:: python
+
+        answer = Future()
+
+        def on_done(future):
+            foo = bar()
+            answer.set_result(foo)
+
+        some_async_operation().add_done_callback(on_done)
+
+    If ``bar()`` fails, ``answer`` will never get filled with an exception or
+    a result. Now if we change ``on_done`` to,
+
+    .. code-block:: python
+
+        @fail_to(answer)
+        def on_done(future):
+            foo = bar()
+            answer.set_result(foo)
+
+    Uncaught exceptions in ``on_done`` will be caught and propagated to
+    ``answer``. Note that ``on_done`` will return None if an exception was
+    caught.
+
+    :param answer:
+        Future to which the result will be written.
+    """
+    assert is_future(future), 'you forgot to pass a future'
+
+    def decorator(f):
+
+        @wraps(f)
+        def new_f(*args, **kwargs):
+            try:
+                return f(*args, **kwargs)
+            except Exception:
+                future.set_exc_info(sys.exc_info())
+
+        return new_f
+
+    return decorator
diff --git a/tests/test_future.py b/tests/test_future.py
new file mode 100644
index 00000000..01dfd8bd
--- /dev/null
+++ b/tests/test_future.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2015 Uber Technologies, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+from __future__ import (
+    absolute_import, unicode_literals, division, print_function
+)
+
+from tornado import gen
+
+import pytest
+
+from tchannel._future import fail_to
+
+
+def test_fail_to_no_failure():
+    answer = gen.Future()
+
+    @fail_to(answer)
+    def f():
+        return 42
+
+    assert f() == 42
+    assert answer.running()
+
+
+@pytest.mark.gen_test
+def test_fail_to_failure():
+    answer = gen.Future()
+
+    @fail_to(answer)
+    def f():
+        raise GreatSadness
+
+    assert f() is None
+    with pytest.raises(GreatSadness):
+        yield answer
+
+
+@pytest.mark.gen_test
+@pytest.mark.gen_test
+def test_fail_to_failure_in_coroutine():
+    answer = gen.Future()
+
+    @fail_to(answer)
+    @gen.coroutine
+    def f():
+        raise GreatSadness
+
+    with pytest.raises(GreatSadness):
+        yield f()
+    assert answer.running()
+
+
+class GreatSadness(Exception):
+    pass

From 4eaa2922092893ec87a9983ce911956437b12792 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Sun, 6 Mar 2016 21:17:34 -0800
Subject: [PATCH 09/67] Move message reading logic to separate function.

---
 tchannel/tornado/connection.py | 98 +++++++++++++++++++---------------
 1 file changed, 56 insertions(+), 42 deletions(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index f3e7bf6c..24923bea 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -24,7 +24,7 @@
 import os
 import socket
 import sys
-
+from functools import partial
 
 import tornado.gen
 import tornado.iostream
@@ -37,6 +37,7 @@
 from .. import errors
 from .. import frame
 from .. import messages
+from .._future import fail_to
 from ..errors import NetworkError
 from ..errors import TChannelError
 from ..event import EventType
@@ -654,7 +655,7 @@ def keep_reading(f):
             self.queue.put(f.result())
             io_loop.spawn_callback(self.fill)
 
-        io_loop.add_future(self._dequeue(), keep_reading)
+        io_loop.add_future(read_message(self.io_stream), keep_reading)
 
     def get(self):
         """Receive the next message off the wire.
@@ -668,46 +669,6 @@ def get(self):
 
         return self.queue.get()
 
-    def _dequeue(self):
-        # This is the message_future we'll return for any inbound messages.
-        message_future = tornado.gen.Future()
-        io_loop = IOLoop.current()
-
-        def on_body(read_body_future, size):
-            if read_body_future.exception():
-                return on_error(read_body_future)
-
-            body = read_body_future.result()
-            f = frame.frame_rw.read(BytesIO(body), size=size)
-            message_rw = messages.RW[f.header.message_type]
-            message = message_rw.read(BytesIO(f.payload))
-            message.id = f.header.message_id
-            message_future.set_result(message)
-
-        def on_read_size(read_size_future):
-            if read_size_future.exception():
-                return on_error(read_size_future)
-
-            size_bytes = read_size_future.result()
-            size = frame.frame_rw.size_rw.read(BytesIO(size_bytes))
-            read_body_future = self.io_stream.read_bytes(size - size_width)
-
-            io_loop.add_future(
-                read_body_future,
-                lambda future: on_body(future, size)
-            )
-
-            return read_body_future
-
-        def on_error(future):
-            log.info("Failed to read data: %s", future.exception())
-
-        size_width = frame.frame_rw.size_rw.width()
-        read_bytes_future = self.io_stream.read_bytes(size_width)
-        io_loop.add_future(read_bytes_future, on_read_size)
-
-        return message_future
-
 
 class Writer(object):
 
@@ -792,3 +753,56 @@ def on_queue_error(f):
         ).add_done_callback(on_queue_error)
 
         return done_writing_future
+
+##############################################################################
+
+
+FRAME_SIZE_WIDTH = frame.frame_rw.size_rw.width()
+
+
+def read_message(stream):
+    """Reads a message from the given IOStream.
+
+    :param IOStream stream:
+        IOStream to read from.
+    """
+    answer = tornado.gen.Future()
+    io_loop = IOLoop.current()
+
+    def on_error(future):
+        log.info('Failed to read data: %s', future.exception())
+        return answer.set_exc_info(future.exc_info())
+
+    @fail_to(answer)
+    def on_body(size, future):
+        if future.exception():
+            return on_error(future)
+
+        body = future.result()
+        f = frame.frame_rw.read(BytesIO(body), size=size)
+        message_type = f.header.message_type
+        message_rw = messages.RW.get(message_type)
+        if not message_rw:
+            exc = errors.FatalProtocolError(
+                'Unknown message type %s', str(message_type)
+            )
+            return answer.set_exception(exc)
+
+        message = message_rw.read(BytesIO(f.payload))
+        message.id = f.header.message_id
+        answer.set_result(message)
+
+    @fail_to(answer)
+    def on_read_size(future):
+        if future.exception():
+            return answer.set_exc_info(future.exc_info())
+
+        size_bytes = future.result()
+        size = frame.frame_rw.size_rw.read(BytesIO(size_bytes))
+        io_loop.add_future(
+            stream.read_bytes(size - FRAME_SIZE_WIDTH),
+            partial(on_body, size),
+        )
+
+    io_loop.add_future(stream.read_bytes(FRAME_SIZE_WIDTH), on_read_size)
+    return answer

From 5622497de087dbf626cf2b22af0dada9aa00e256 Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Mon, 7 Mar 2016 17:00:57 -0800
Subject: [PATCH 10/67] Use exc_info in a couple spots

---
 tchannel/tornado/connection.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 24923bea..b035c42f 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -601,9 +601,9 @@ def adapt_result(self, f, request, response_future):
             return
 
         if f.exception():
-            protocol_exception = f.exception()
+            cls, protocol_exception, tb = f.exc_info()
             protocol_exception.tracing = request.tracing
-            response_future.set_exception(protocol_exception)
+            response_future.set_exc_info((cls, protocol_exception, tb))
 
         else:
             response = f.result()
@@ -650,6 +650,7 @@ def fill(self):
 
         def keep_reading(f):
             if f.exception():
+                self.filling = False
                 return log(f.exception())
             # connect these two in the case when put blocks
             self.queue.put(f.result())
@@ -687,7 +688,7 @@ def drain(self):
         def on_write(f, done):
             if f.exception():
                 log.error(f.exception())
-                done.set_exception(f.exception())
+                done.set_exc_info(f.exc_info())
             else:
                 done.set_result(f.result())
 
@@ -746,7 +747,7 @@ def _enqueue(self, message):
 
         def on_queue_error(f):
             if f.exception():
-                done_writing_future.set_exception(f.exception())
+                done_writing_future.set_exc_info(f.exc_info())
 
         self.queue.put(
             (body, done_writing_future)

From 0a280effd864dbfc41afb3e480cf45bb67a3e41a Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Mon, 7 Mar 2016 17:03:03 -0800
Subject: [PATCH 11/67] Changelog

---
 CHANGES.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGES.rst b/CHANGES.rst
index f1c3e5d5..fa7cbffb 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -6,6 +6,8 @@ Changes by Version
 
 - Tornado 4.2 was listed as a requirement but this was corrected to be 4.3
   which introduced the locks module.
+- Fixed in issue where clients could incorrectly time out when reading large
+  response bodies.
 
 
 0.21.4 (2016-02-15)

From 66cba92babe715222fc53221d4ea3e55b7f1f1b9 Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Mon, 7 Mar 2016 17:45:56 -0800
Subject: [PATCH 12/67] Generate 10 requests per client

---
 benchmarks/test_roundtrip.py | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/benchmarks/test_roundtrip.py b/benchmarks/test_roundtrip.py
index c1eb7d87..73c60b66 100644
--- a/benchmarks/test_roundtrip.py
+++ b/benchmarks/test_roundtrip.py
@@ -23,6 +23,12 @@
 from tchannel import TChannel, thrift
 
 
+service = thrift.load(
+    path='examples/guide/keyvalue/service.thrift',
+    service='benchmark-server',
+)
+
+
 def test_roundtrip(benchmark):
     loop = ioloop.IOLoop.current()
 
@@ -31,12 +37,6 @@ def test_roundtrip(benchmark):
 
     clients = [TChannel('benchmark-client') for _ in range(10)]
 
-    service = thrift.load(
-        path='examples/guide/keyvalue/service.thrift',
-        service='benchmark-server',
-    )
-    service.hostport = server.hostport
-
     @server.thrift.register(service.KeyValue)
     def getValue(request):
         return 'bar'
@@ -44,10 +44,17 @@ def getValue(request):
     def roundtrip():
         @gen.coroutine
         def doit():
-            yield [
-                client.thrift(service.KeyValue.getValue("foo"))
-                for client in clients
-            ]
+            futures = []
+            # 10 clients send 10 requests concurrently
+            for client in clients:
+                for _ in range(10):
+                    futures.append(
+                        client.thrift(
+                            service.KeyValue.getValue("foo"),
+                            hostport=server.hostport,
+                        )
+                    )
+            yield futures
 
         return loop.run_sync(doit)
 

From 713c91e43d2af6d18e199992bb9dac84f1050a6a Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Tue, 8 Mar 2016 13:06:26 -0800
Subject: [PATCH 13/67] abg feedback

---
 tchannel/tornado/connection.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index b035c42f..4e6bf992 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -638,7 +638,9 @@ def _request_timed_out(self, req_id, req_ttl, future):
 
 class Reader(object):
 
-    def __init__(self, io_stream, capacity=64):
+    def __init__(self, io_stream, capacity=None):
+        capacity = capacity or 64
+
         self.queue = tornado.queues.Queue(capacity)
         self.filling = False
         self.io_stream = io_stream
@@ -653,8 +655,9 @@ def keep_reading(f):
                 self.filling = False
                 return log(f.exception())
             # connect these two in the case when put blocks
-            self.queue.put(f.result())
-            io_loop.spawn_callback(self.fill)
+            self.queue.put(f.result()).add_done_callback(
+                lambda f: io_loop.spawn_callback(self.fill),
+            )
 
         io_loop.add_future(read_message(self.io_stream), keep_reading)
 
@@ -662,10 +665,9 @@ def get(self):
         """Receive the next message off the wire.
 
         :returns:
-            A Future that produces a Context object containing the next
-            message off the wire.
+            A Future that resolves to the next message off the wire.
         """
-        if self.filling is False:
+        if not self.filling:
             self.fill()
 
         return self.queue.get()
@@ -673,7 +675,9 @@ def get(self):
 
 class Writer(object):
 
-    def __init__(self, io_stream, capacity=64):
+    def __init__(self, io_stream, capacity=None):
+        capacity = capacity or 64
+
         self.queue = tornado.queues.Queue(capacity)
         self.draining = False
         self.io_stream = io_stream
@@ -748,6 +752,8 @@ def _enqueue(self, message):
         def on_queue_error(f):
             if f.exception():
                 done_writing_future.set_exc_info(f.exc_info())
+            else:
+                done_writing_future.set_result(None)
 
         self.queue.put(
             (body, done_writing_future)

From debf2848b5afa1ccd4d5f6aa7972a66b86797ecb Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Tue, 8 Mar 2016 13:16:49 -0800
Subject: [PATCH 14/67] More changelog

---
 CHANGES.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index fa7cbffb..d03d5fca 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -7,7 +7,8 @@ Changes by Version
 - Tornado 4.2 was listed as a requirement but this was corrected to be 4.3
   which introduced the locks module.
 - Fixed in issue where clients could incorrectly time out when reading large
-  response bodies.
+  response bodies. This was due to response fragments being dropped due to
+  out-of-order writes; writes are now serialized on a per-connection basis.
 
 
 0.21.4 (2016-02-15)

From c79bd974d36568063376066ce12a70f8c6198542 Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Tue, 8 Mar 2016 13:18:45 -0800
Subject: [PATCH 15/67] Remove a print statement

---
 tests/tornado/test_connection.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/tornado/test_connection.py b/tests/tornado/test_connection.py
index 5fe9adaf..bd5fb7dc 100644
--- a/tests/tornado/test_connection.py
+++ b/tests/tornado/test_connection.py
@@ -44,8 +44,6 @@ def test_handshake(tornado_pair):
     server, client = tornado_pair
     headers = dummy_headers()
 
-    print 'IOLOOP from test is', tornado.ioloop.IOLoop.current()
-
     client.initiate_handshake(headers=headers)
     yield server.expect_handshake(headers=headers)
 

From 40413752a8d885b0a1b4886c1b13f4cb6443e651 Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Tue, 8 Mar 2016 13:42:57 -0800
Subject: [PATCH 16/67] Remove some dead code

---
 tchannel/tornado/connection.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 4e6bf992..21a4210b 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -752,8 +752,6 @@ def _enqueue(self, message):
         def on_queue_error(f):
             if f.exception():
                 done_writing_future.set_exc_info(f.exc_info())
-            else:
-                done_writing_future.set_result(None)
 
         self.queue.put(
             (body, done_writing_future)

From ee9b8ce0571e9e296b90bb2f347d056c95db7b28 Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Tue, 8 Mar 2016 15:55:57 -0800
Subject: [PATCH 17/67] Preparing release 0.21.5

---
 CHANGES.rst          | 2 +-
 tchannel/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index d03d5fca..10cab5ac 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,7 +1,7 @@
 Changes by Version
 ==================
 
-0.21.5 (unreleased)
+0.21.5 (2016-03-08)
 -------------------
 
 - Tornado 4.2 was listed as a requirement but this was corrected to be 4.3
diff --git a/tchannel/__init__.py b/tchannel/__init__.py
index a14967a9..66d58160 100644
--- a/tchannel/__init__.py
+++ b/tchannel/__init__.py
@@ -22,7 +22,7 @@
     absolute_import, division, print_function, unicode_literals
 )
 
-__version__ = '0.21.5.dev0'
+__version__ = '0.21.5'
 # Update setup.py when changing this. zest.releaser doesn't support updating
 # both of them yet.
 

From 8d1c78afcceb8f1f66192ae8f9cc4bc50cdc397e Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Tue, 8 Mar 2016 15:57:08 -0800
Subject: [PATCH 18/67] Back to development: 0.21.6

---
 CHANGES.rst          | 6 ++++++
 tchannel/__init__.py | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 10cab5ac..f4224722 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,6 +1,12 @@
 Changes by Version
 ==================
 
+0.21.6 (unreleased)
+-------------------
+
+- Nothing changed yet.
+
+
 0.21.5 (2016-03-08)
 -------------------
 
diff --git a/tchannel/__init__.py b/tchannel/__init__.py
index 66d58160..ebd8ac86 100644
--- a/tchannel/__init__.py
+++ b/tchannel/__init__.py
@@ -22,7 +22,7 @@
     absolute_import, division, print_function, unicode_literals
 )
 
-__version__ = '0.21.5'
+__version__ = '0.21.6.dev0'
 # Update setup.py when changing this. zest.releaser doesn't support updating
 # both of them yet.
 

From c756c145fac740a67b846d370b8fc4fd62af33e5 Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Tue, 8 Mar 2016 16:08:21 -0800
Subject: [PATCH 19/67] Fix a logging error on failed reads

---
 CHANGES.rst                    | 2 +-
 tchannel/tornado/connection.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index f4224722..29ee6f7b 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -4,7 +4,7 @@ Changes by Version
 0.21.6 (unreleased)
 -------------------
 
-- Nothing changed yet.
+- Fixed a logging error on failed reads.
 
 
 0.21.5 (2016-03-08)
diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 21a4210b..5823cc01 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -653,7 +653,7 @@ def fill(self):
         def keep_reading(f):
             if f.exception():
                 self.filling = False
-                return log(f.exception())
+                return log.exception(f.exception())
             # connect these two in the case when put blocks
             self.queue.put(f.result()).add_done_callback(
                 lambda f: io_loop.spawn_callback(self.fill),

From 5bf6a78c72e40d261d991f19d09a32c06175bcf6 Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Tue, 8 Mar 2016 16:20:21 -0800
Subject: [PATCH 20/67] Preparing release 0.21.6

---
 CHANGES.rst          | 2 +-
 tchannel/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 29ee6f7b..0008edf4 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,7 +1,7 @@
 Changes by Version
 ==================
 
-0.21.6 (unreleased)
+0.21.6 (2016-03-08)
 -------------------
 
 - Fixed a logging error on failed reads.
diff --git a/tchannel/__init__.py b/tchannel/__init__.py
index ebd8ac86..600511e5 100644
--- a/tchannel/__init__.py
+++ b/tchannel/__init__.py
@@ -22,7 +22,7 @@
     absolute_import, division, print_function, unicode_literals
 )
 
-__version__ = '0.21.6.dev0'
+__version__ = '0.21.6'
 # Update setup.py when changing this. zest.releaser doesn't support updating
 # both of them yet.
 

From e5aaed332890037de9d655f55e7c6a9991dbab8d Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Tue, 8 Mar 2016 16:22:12 -0800
Subject: [PATCH 21/67] Back to development: 0.21.7

---
 CHANGES.rst          | 6 ++++++
 tchannel/__init__.py | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 0008edf4..b408d1fb 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,6 +1,12 @@
 Changes by Version
 ==================
 
+0.21.7 (unreleased)
+-------------------
+
+- Nothing changed yet.
+
+
 0.21.6 (2016-03-08)
 -------------------
 
diff --git a/tchannel/__init__.py b/tchannel/__init__.py
index 600511e5..8b429f03 100644
--- a/tchannel/__init__.py
+++ b/tchannel/__init__.py
@@ -22,7 +22,7 @@
     absolute_import, division, print_function, unicode_literals
 )
 
-__version__ = '0.21.6'
+__version__ = '0.21.7.dev0'
 # Update setup.py when changing this. zest.releaser doesn't support updating
 # both of them yet.
 

From cae8d2632590984cc1f453dbbdd47cec8f809665 Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Tue, 8 Mar 2016 17:03:00 -0800
Subject: [PATCH 22/67] Fix an unhelpful error message

---
 CHANGES.rst                    | 2 +-
 tchannel/tornado/connection.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index b408d1fb..54254221 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -4,7 +4,7 @@ Changes by Version
 0.21.7 (unreleased)
 -------------------
 
-- Nothing changed yet.
+- Fixed an unhelpful stack trace on failed reads.
 
 
 0.21.6 (2016-03-08)
diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 5823cc01..d1e59ef7 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -653,7 +653,7 @@ def fill(self):
         def keep_reading(f):
             if f.exception():
                 self.filling = False
-                return log.exception(f.exception())
+                return log.error("read error", exc_info=f.exc_info())
             # connect these two in the case when put blocks
             self.queue.put(f.result()).add_done_callback(
                 lambda f: io_loop.spawn_callback(self.fill),

From 7e244302f4e0cae8058d6ff5dc1b3541b1f4f8bb Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Tue, 8 Mar 2016 17:11:01 -0800
Subject: [PATCH 23/67] Preparing release 0.21.7

---
 CHANGES.rst          | 2 +-
 tchannel/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 54254221..0746e954 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,7 +1,7 @@
 Changes by Version
 ==================
 
-0.21.7 (unreleased)
+0.21.7 (2016-03-08)
 -------------------
 
 - Fixed an unhelpful stack trace on failed reads.
diff --git a/tchannel/__init__.py b/tchannel/__init__.py
index 8b429f03..c1647fae 100644
--- a/tchannel/__init__.py
+++ b/tchannel/__init__.py
@@ -22,7 +22,7 @@
     absolute_import, division, print_function, unicode_literals
 )
 
-__version__ = '0.21.7.dev0'
+__version__ = '0.21.7'
 # Update setup.py when changing this. zest.releaser doesn't support updating
 # both of them yet.
 

From 42531e59523404280a7c67d246016183c271678c Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Tue, 8 Mar 2016 17:13:19 -0800
Subject: [PATCH 24/67] Back to development: 0.21.8

---
 CHANGES.rst          | 6 ++++++
 tchannel/__init__.py | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 0746e954..a849a3e7 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,6 +1,12 @@
 Changes by Version
 ==================
 
+0.21.8 (unreleased)
+-------------------
+
+- Nothing changed yet.
+
+
 0.21.7 (2016-03-08)
 -------------------
 
diff --git a/tchannel/__init__.py b/tchannel/__init__.py
index c1647fae..8694aeab 100644
--- a/tchannel/__init__.py
+++ b/tchannel/__init__.py
@@ -22,7 +22,7 @@
     absolute_import, division, print_function, unicode_literals
 )
 
-__version__ = '0.21.7'
+__version__ = '0.21.8.dev0'
 # Update setup.py when changing this. zest.releaser doesn't support updating
 # both of them yet.
 

From e32bef6a5a9d8aab03fc7f208217a9d41394d189 Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Tue, 8 Mar 2016 18:09:32 -0800
Subject: [PATCH 25/67] Reduce read errors to info

---
 tchannel/tornado/connection.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index d1e59ef7..b30fee3e 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -653,7 +653,8 @@ def fill(self):
         def keep_reading(f):
             if f.exception():
                 self.filling = False
-                return log.error("read error", exc_info=f.exc_info())
+                # This is usually StreamClosed due to a client disconnecting.
+                return log.info("read error", exc_info=f.exc_info())
             # connect these two in the case when put blocks
             self.queue.put(f.result()).add_done_callback(
                 lambda f: io_loop.spawn_callback(self.fill),

From b96e492e66424145656183963cfaa3ec9930b0fa Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Tue, 8 Mar 2016 18:18:14 -0800
Subject: [PATCH 26/67] Changes

---
 CHANGES.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index a849a3e7..a887374d 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -4,7 +4,7 @@ Changes by Version
 0.21.8 (unreleased)
 -------------------
 
-- Nothing changed yet.
+- Reduce read errors due to clients disconnecting to INFO from ERROR.
 
 
 0.21.7 (2016-03-08)

From aadd3eba5b423cc86f299e7088e996a4767dd7d8 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Wed, 9 Mar 2016 16:37:03 -0800
Subject: [PATCH 27/67] Lower read errors to INFO on StreamClosedError only.

---
 tchannel/tornado/connection.py   |  5 ++++-
 tests/tornado/test_connection.py | 33 ++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index b30fee3e..40239fae 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -654,7 +654,10 @@ def keep_reading(f):
             if f.exception():
                 self.filling = False
                 # This is usually StreamClosed due to a client disconnecting.
-                return log.info("read error", exc_info=f.exc_info())
+                if isinstance(f.exception(), StreamClosedError):
+                    return log.info("read error", exc_info=f.exc_info())
+                else:
+                    return log.error("read error", exc_info=f.exc_info())
             # connect these two in the case when put blocks
             self.queue.put(f.result()).add_done_callback(
                 lambda f: io_loop.spawn_callback(self.fill),
diff --git a/tests/tornado/test_connection.py b/tests/tornado/test_connection.py
index bd5fb7dc..26860c5f 100644
--- a/tests/tornado/test_connection.py
+++ b/tests/tornado/test_connection.py
@@ -24,6 +24,7 @@
 import pytest
 import tornado.ioloop
 from tornado import gen
+from datetime import timedelta
 
 from tchannel import TChannel
 from tchannel.errors import TimeoutError
@@ -113,3 +114,35 @@ def hello(request):
         yield gen.sleep(0.03)
 
     assert mock_warn.call_count == 0
+
+
+@pytest.mark.gen_test
+def test_stream_closed_error_on_read(tornado_pair):
+    # Test that we don't log an error for StreamClosedErrors while reading.
+    server, client = tornado_pair
+    future = server.await()
+    client.close()
+
+    with mock.patch.object(connection, 'log') as mock_log:  # :(
+        with pytest.raises(gen.TimeoutError):
+            yield gen.with_timeout(timedelta(milliseconds=100), future)
+
+    assert mock_log.error.call_count == 0
+    assert mock_log.info.call_count == 1
+
+
+@pytest.mark.gen_test
+def test_other_error_on_read(tornado_pair):
+    # Test that we do log errors for non-StreamClosedError failures while
+    # reading.
+    server, client = tornado_pair
+
+    future = server.await()
+    yield client.connection.write(b'\x00\x02\x00\x00')  # bad payload
+
+    with mock.patch.object(connection, 'log') as mock_log:  # :(
+        with pytest.raises(gen.TimeoutError):
+            yield gen.with_timeout(timedelta(milliseconds=100), future)
+
+    assert mock_log.error.call_count == 1
+    assert mock_log.info.call_count == 0

From bbfc6cfaed58cce2b33b00ddee8a85871219c2a9 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Thu, 10 Mar 2016 10:22:33 -0800
Subject: [PATCH 28/67] Preparing release 0.21.8

---
 CHANGES.rst          | 2 +-
 tchannel/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index a887374d..ec92f7df 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,7 +1,7 @@
 Changes by Version
 ==================
 
-0.21.8 (unreleased)
+0.21.8 (2016-03-10)
 -------------------
 
 - Reduce read errors due to clients disconnecting to INFO from ERROR.
diff --git a/tchannel/__init__.py b/tchannel/__init__.py
index 8694aeab..acb50087 100644
--- a/tchannel/__init__.py
+++ b/tchannel/__init__.py
@@ -22,7 +22,7 @@
     absolute_import, division, print_function, unicode_literals
 )
 
-__version__ = '0.21.8.dev0'
+__version__ = '0.21.8'
 # Update setup.py when changing this. zest.releaser doesn't support updating
 # both of them yet.
 

From 50249f4b9e34eb1ede89079b83c780cb9a76c835 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Thu, 10 Mar 2016 10:23:44 -0800
Subject: [PATCH 29/67] Back to development: 0.21.9

---
 CHANGES.rst          | 6 ++++++
 tchannel/__init__.py | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index ec92f7df..0a4ca54e 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,6 +1,12 @@
 Changes by Version
 ==================
 
+0.21.9 (unreleased)
+-------------------
+
+- Nothing changed yet.
+
+
 0.21.8 (2016-03-10)
 -------------------
 
diff --git a/tchannel/__init__.py b/tchannel/__init__.py
index acb50087..9cb34aee 100644
--- a/tchannel/__init__.py
+++ b/tchannel/__init__.py
@@ -22,7 +22,7 @@
     absolute_import, division, print_function, unicode_literals
 )
 
-__version__ = '0.21.8'
+__version__ = '0.21.9.dev0'
 # Update setup.py when changing this. zest.releaser doesn't support updating
 # both of them yet.
 

From 07e5f15c79c5d9851793c4fe3d684063b7a4e770 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Fri, 11 Mar 2016 10:50:42 -0800
Subject: [PATCH 30/67] connection: log stack traces on write failures

---
 tchannel/tornado/connection.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 40239fae..32d8074d 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -695,7 +695,7 @@ def drain(self):
 
         def on_write(f, done):
             if f.exception():
-                log.error(f.exception())
+                log.error("write failed", exc_info=f.exc_info())
                 done.set_exc_info(f.exc_info())
             else:
                 done.set_result(f.result())
@@ -705,7 +705,7 @@ def on_write(f, done):
         def on_message(f):
             if f.exception():
                 io_loop.spawn_callback(next_write)
-                log.error(f.exception())
+                log.error("queue get failed", exc_info=f.exc_info())
                 return
             message, done = f.result()
             io_loop.add_future(

From 09510f8858f358fe722dd467563a8a31f9bca585 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Fri, 11 Mar 2016 10:51:02 -0800
Subject: [PATCH 31/67] fix Response class used by advertise test

---
 tests/test_tchannel.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_tchannel.py b/tests/test_tchannel.py
index be332e30..967ef0f1 100644
--- a/tests/test_tchannel.py
+++ b/tests/test_tchannel.py
@@ -325,6 +325,7 @@ def test_event_hook_register():
 
 @pytest.mark.gen_test
 def test_advertise_should_take_a_router_file():
+    from tchannel.tornado.response import Response as TornadoResponse
 
     host_path = os.path.join(
         os.path.dirname(os.path.realpath(__file__)),
@@ -343,7 +344,7 @@ def test_advertise_should_take_a_router_file():
     ) as mock_advertise:
         f = gen.Future()
         mock_advertise.return_value = f
-        f.set_result(Response())
+        f.set_result(TornadoResponse())
         tchannel.advertise(router_file=host_path)
 
         mock_advertise.assert_called_once_with(ANY, routers=routers,

From d8c5c2accdc2661f6666e5ac3b84f3d36c6c3ff3 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Fri, 11 Mar 2016 11:06:00 -0800
Subject: [PATCH 32/67] fix hypothesis warnings while running rw tests

---
 tests/test_rw.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/tests/test_rw.py b/tests/test_rw.py
index 024bd2f0..22f4100a 100644
--- a/tests/test_rw.py
+++ b/tests/test_rw.py
@@ -30,13 +30,13 @@
 from doubles import expect
 from hypothesis import assume
 from hypothesis import given
-from hypothesis import specifiers
+from hypothesis import strategies as st
 
 from tchannel import rw
 from tchannel.errors import ReadError
 from tchannel.io import BytesIO
 
-number_width = specifiers.sampled_from([1, 2, 4, 8])
+number_width = st.sampled_from((1, 2, 4, 8))
 
 
 def bio(bs):
@@ -47,19 +47,19 @@ def roundtrip(value, v_rw):
     return v_rw.read(bio(v_rw.write(value, BytesIO()).getvalue()))
 
 
-@given(int, number_width)
+@given(st.integers(), number_width)
 def test_number_roundtrip(num, width):
     num = num % (2 ** width - 1)
     assert roundtrip(num, rw.number(width)) == num
 
 
-@given(unicode, number_width)
+@given(st.text(), number_width)
 def test_len_prefixed_string_roundtrip(s, len_width):
     assume(len(s.encode('utf-8')) <= 2 ** len_width - 1)
     assert roundtrip(s, rw.len_prefixed_string(rw.number(len_width))) == s
 
 
-@given(str, number_width)
+@given(st.binary(), number_width)
 def test_len_prefixed_string_binary_roundtrip(s, len_width):
     assume(len(s) <= 2 ** len_width - 1)
     assert roundtrip(
@@ -67,12 +67,10 @@ def test_len_prefixed_string_binary_roundtrip(s, len_width):
     ) == s
 
 
-@given(str)
+@given(st.binary())
 def test_none_r(bs):
     stream = bio(bs)
     assert rw.none().read(stream) is None
-    stream.read() == 'a b c'
-
     assert rw.none().width() == 0
 
 

From e09e9153ec7986a8cb01ed3a1d30a0fcd340c3b5 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Fri, 11 Mar 2016 11:39:21 -0800
Subject: [PATCH 33/67] Remove usages of BaseException.message.

That's been deprecated since 2.6
---
 tchannel/event.py                        | 5 +++--
 tchannel/rw.py                           | 8 ++------
 tchannel/testing/vcr/patch.py            | 3 +--
 tchannel/testing/vcr/server.py           | 4 ++--
 tchannel/tornado/connection.py           | 5 +++--
 tchannel/tornado/dispatch.py             | 2 +-
 tests/integration/test_error_handling.py | 5 ++---
 7 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/tchannel/event.py b/tchannel/event.py
index 92864a64..51d9b34e 100644
--- a/tchannel/event.py
+++ b/tchannel/event.py
@@ -20,6 +20,7 @@
 
 from __future__ import absolute_import
 
+import sys
 import collections
 import functools
 import logging
@@ -144,8 +145,8 @@ def fire(self, event, *args, **kwargs):
         for hook in self.hooks[event]:
             try:
                 hook(*args, **kwargs)
-            except Exception as e:
-                log.error(e.message)
+            except Exception:
+                log.error("error calling hook", exc_info=sys.exc_info())
 
 
 class EventRegistrar(object):
diff --git a/tchannel/rw.py b/tchannel/rw.py
index efda9762..cb773b0e 100644
--- a/tchannel/rw.py
+++ b/tchannel/rw.py
@@ -496,9 +496,7 @@ def read(self, stream):
                 if name != skip:
                     result[name] = value
             except ReadError as e:
-                raise ReadError(
-                    "Failed to read %s: %s" % (name, e.message)
-                )
+                raise ReadError("Failed to read %s: %s" % (name, e))
         return result
 
     def write(self, obj, stream):
@@ -538,9 +536,7 @@ def read(self, stream):
                 if attr != skip:
                     kwargs[attr] = value
         except ReadError as e:
-            raise ReadError(
-                "Failed to read %s: %s" % (self._cls, e.message)
-            )
+            raise ReadError("Failed to read %s: %s" % (self._cls, e))
 
         return self._cls(**kwargs)
 
diff --git a/tchannel/testing/vcr/patch.py b/tchannel/testing/vcr/patch.py
index 3ddce727..9eebd7c2 100644
--- a/tchannel/testing/vcr/patch.py
+++ b/tchannel/testing/vcr/patch.py
@@ -111,8 +111,7 @@ def send(self, arg1, arg2, arg3,
             raise TChannelError.from_code(
                 e.code,
                 description=(
-                    "The remote service threw a protocol error: %s" %
-                    e.message
+                    "The remote service threw a protocol error: %s" % (e,)
                 )
             )
 
diff --git a/tchannel/testing/vcr/server.py b/tchannel/testing/vcr/server.py
index ddf16579..ee5cf430 100644
--- a/tchannel/testing/vcr/server.py
+++ b/tchannel/testing/vcr/server.py
@@ -53,7 +53,7 @@ def new_f(*args, **kwargs):
                     # TODO maybe use traceback.format_exc to also send a
                     # traceback?
                     raise e
-                raise proxy.VCRServiceError(e.message)
+                raise proxy.VCRServiceError(str(e))
             else:
                 raise gen.Return(result)
 
@@ -150,7 +150,7 @@ def send(self, request):
         except TChannelError as e:
             raise proxy.RemoteServiceError(
                 code=e.code,
-                message=e.message,
+                message=str(e),
             )
         response_headers = yield response.get_header()
         response_body = yield response.get_body()
diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 32d8074d..abc548c7 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -532,9 +532,10 @@ def _stream(self, context, message_factory):
             yield self.write(message)
             context.state = StreamState.completed
         # Stop streamming immediately if exception occurs on the handler side
-        except TChannelError as e:
+        except TChannelError:
             # raise by tchannel intentionally
-            log.info("Stop Outgoing Streams because of error: %s", e.message)
+            log.info("Stopped outgoing streams because of an error",
+                     exc_info=sys.exc_info())
 
     @tornado.gen.coroutine
     def post_response(self, response):
diff --git a/tchannel/tornado/dispatch.py b/tchannel/tornado/dispatch.py
index 8232a740..486437f7 100644
--- a/tchannel/tornado/dispatch.py
+++ b/tchannel/tornado/dispatch.py
@@ -225,7 +225,7 @@ def handle_call(self, request, connection):
             connection.send_error(e)
         except Exception as e:
             error = UnexpectedError(
-                description="Unexpected Error: '%s'" % e.message,
+                description="Unexpected Error: '%s'" % e,
                 id=request.id,
                 tracing=request.tracing,
             )
diff --git a/tests/integration/test_error_handling.py b/tests/integration/test_error_handling.py
index 879520b5..edb9392a 100644
--- a/tests/integration/test_error_handling.py
+++ b/tests/integration/test_error_handling.py
@@ -121,7 +121,7 @@ def test_invalid_message_during_streaming(mock_server):
         yield resp.get_header()
         yield resp.get_body()
 
-    assert e.value.message == u"Checksum does not match!"
+    assert 'Checksum does not match' in str(e)
 
 
 @pytest.mark.gen_test
@@ -143,5 +143,4 @@ def test_continue_message_error(mock_server):
     with pytest.raises(FatalProtocolError) as e:
         yield connection.send(callreqcontinue)
 
-    assert (e.value.message ==
-            u"missing call message after receiving continue message")
+    assert 'missing call message after receiving continue message' in str(e)

From 3627f6a2a225a98cd3baa01ea39d88786db9bbfe Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Fri, 11 Mar 2016 11:27:28 -0800
Subject: [PATCH 34/67] connection.Writer: re-raise StreamClosedError

The future returned by Writer.put() will never resolve if the IOStream was
already closed by the time put was called. This is because IOStream.write
raises a StreamClosedError synchronously rather than inside a future if it's
already closed at the time write() is called.
---
 tchannel/tornado/connection.py   | 14 ++++++++++----
 tests/tornado/test_connection.py | 24 +++++++++++++++++++++---
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index abc548c7..0b2cf7dc 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -708,11 +708,17 @@ def on_message(f):
                 io_loop.spawn_callback(next_write)
                 log.error("queue get failed", exc_info=f.exc_info())
                 return
+
             message, done = f.result()
-            io_loop.add_future(
-                self.io_stream.write(message),
-                lambda f: on_write(f, done),
-            )
+            try:
+                # write() may raise if the stream was closed while we were
+                # waiting for an entry in the queue.
+                write_future = self.io_stream.write(message)
+            except Exception:
+                io_loop.spawn_callback(next_write)
+                done.set_exc_info(sys.exc_info())
+            else:
+                io_loop.add_future(write_future, lambda f: on_write(f, done))
 
         def next_write():
             if self.io_stream.closed():
diff --git a/tests/tornado/test_connection.py b/tests/tornado/test_connection.py
index 26860c5f..06825d2e 100644
--- a/tests/tornado/test_connection.py
+++ b/tests/tornado/test_connection.py
@@ -22,14 +22,16 @@
 
 import mock
 import pytest
+import socket
 import tornado.ioloop
 from tornado import gen
 from datetime import timedelta
+from tornado.iostream import IOStream, StreamClosedError
 
 from tchannel import TChannel
+from tchannel import messages
 from tchannel.errors import TimeoutError
 from tchannel.tornado import connection
-from tchannel.messages import Types
 
 
 def dummy_headers():
@@ -58,12 +60,12 @@ def test_pings(tornado_pair):
     client.ping()
 
     ping = yield server.await()
-    assert ping.message_type == Types.PING_REQ
+    assert ping.message_type == messages.Types.PING_REQ
 
     server.pong()
 
     pong = yield client.await()
-    assert pong.message_type == Types.PING_RES
+    assert pong.message_type == messages.Types.PING_RES
 
 
 @pytest.mark.gen_test
@@ -146,3 +148,19 @@ def test_other_error_on_read(tornado_pair):
 
     assert mock_log.error.call_count == 1
     assert mock_log.info.call_count == 0
+
+
+@pytest.mark.gen_test
+def test_writer_write_error():
+    server, client = socket.socketpair()
+    reader = connection.Reader(IOStream(server))
+    writer = connection.Writer(IOStream(client))
+
+    # one successful message first
+    yield writer.put(messages.PingRequestMessage())
+    ping = yield reader.get()
+    assert isinstance(ping, messages.PingRequestMessage)
+
+    writer.io_stream.close()
+    with pytest.raises(StreamClosedError):
+        yield writer.put(messages.PingResponseMessage())

From 63c46022589b3b634c18e715e745d57ade4fb671 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Fri, 11 Mar 2016 13:20:20 -0800
Subject: [PATCH 35/67] Update CHANGELOG

---
 CHANGES.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 0a4ca54e..2a51ffc6 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -4,7 +4,8 @@ Changes by Version
 0.21.9 (unreleased)
 -------------------
 
-- Nothing changed yet.
+- Fixed a bug that caused silent failures when a write attempt was made to a
+  closed connection.
 
 
 0.21.8 (2016-03-10)

From 42c6f1884e1838a5e39e941baf28378c8e960296 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Fri, 11 Mar 2016 11:50:35 -0800
Subject: [PATCH 36/67] Ensure Connection.stream_request always consumes its
 exceptions

---
 tchannel/tornado/connection.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 0b2cf7dc..b4bf195b 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -555,13 +555,12 @@ def stream_request(self, request):
         """send the given request and response is not required"""
         request.close_argstreams()
 
-        stream_future = self._stream(request, self.request_message_factory)
-
-        IOLoop.current().add_future(
-            stream_future,
-            lambda f: request.close_argstreams(force=True)
-        )
+        def on_done(future):
+            future.exception()  # < to avoid "unconsumed exception" logs
+            request.close_argstreams(force=True)
 
+        stream_future = self._stream(request, self.request_message_factory)
+        stream_future.add_done_callback(on_done)
         return stream_future
 
     def send_request(self, request):

From 4f098192543f8384c5d0e42a9dc1d850366a4dca Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Fri, 11 Mar 2016 14:11:14 -0800
Subject: [PATCH 37/67] Fix unconsumed StreamClosedError logs

The tests were logging unconsumed StreamClosedErrors a lot. The IOLoop was
getting stopped before it could tell the future to call the callback, so the
future exception was never getting consumed.

I think this will solve the same problem for uses of `IOLoop.run_sync` that
time out as well.
---
 CHANGES.rst                    | 1 +
 tchannel/tornado/connection.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 2a51ffc6..1b1ec97c 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -6,6 +6,7 @@ Changes by Version
 
 - Fixed a bug that caused silent failures when a write attempt was made to a
   closed connection.
+- Reduce ``StreamClosedError`` log noisiness for certain scenarios.
 
 
 0.21.8 (2016-03-10)
diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index b4bf195b..70764f1a 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -663,7 +663,7 @@ def keep_reading(f):
                 lambda f: io_loop.spawn_callback(self.fill),
             )
 
-        io_loop.add_future(read_message(self.io_stream), keep_reading)
+        read_message(self.io_stream).add_done_callback(keep_reading)
 
     def get(self):
         """Receive the next message off the wire.

From 41c22c721425544ab9c76e0e27edc4ec306bc486 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Fri, 11 Mar 2016 17:00:19 -0800
Subject: [PATCH 38/67] Make TChannel.advertise() idempotent and thread safe.

Resloves #319.
---
 CHANGES.rst            |  1 +
 tchannel/tchannel.py   | 55 +++++++++++++++++++++-------
 tests/test_tchannel.py | 81 +++++++++++++++++++++++++++++++++++++-----
 3 files changed, 117 insertions(+), 20 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 1b1ec97c..91d93700 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -7,6 +7,7 @@ Changes by Version
 - Fixed a bug that caused silent failures when a write attempt was made to a
   closed connection.
 - Reduce ``StreamClosedError`` log noisiness for certain scenarios.
+- Make ``TChannel.advertise`` idempotent and thread-safe.
 
 
 0.21.8 (2016-03-10)
diff --git a/tchannel/tchannel.py b/tchannel/tchannel.py
index b060d846..f8e2110f 100644
--- a/tchannel/tchannel.py
+++ b/tchannel/tchannel.py
@@ -121,6 +121,11 @@ def __init__(self, name, hostport=None, process_name=None,
         # register default health endpoint
         self.thrift.register(Meta)(health)
 
+        # advertise_response is the Future containing the response of calling
+        # advertise().
+        self._advertise_response = None
+        self._advertise_lock = Lock()
+
     def is_listening(self):
         return self._dep_tchannel.is_listening()
 
@@ -260,7 +265,6 @@ def decorator(fn):
         else:
             return decorator(handler)
 
-    @gen.coroutine
     def advertise(self, routers=None, name=None, timeout=None,
                   router_file=None):
         """Advertise with Hyperbahn.
@@ -274,6 +278,9 @@ def advertise(self, routers=None, name=None, timeout=None,
         minute). Hyperbahn will eject us from the network if it doesn't get a
         re-advertise from us after 5 minutes.
 
+        This function may be called multiple times if it fails. If it
+        succeeds, all consecutive calls are ignored.
+
         :param list routers:
             A seed list of known Hyperbahn addresses to attempt contact with.
             Entries should be of the form ``"host:port"``.
@@ -315,14 +322,38 @@ def advertise(self, routers=None, name=None, timeout=None,
                 log.exception('Failed to read seed routers list.')
                 raise
 
-        dep_result = yield self._dep_tchannel.advertise(
-            routers=routers,
-            name=name,
-            timeout=timeout
-        )
-
-        body = yield dep_result.get_body()
-        headers = yield dep_result.get_header()
-        response = Response(json.loads(body), headers or {})
-
-        raise gen.Return(response)
+        @gen.coroutine
+        def _advertise():
+            result = yield self._dep_tchannel.advertise(
+                routers=routers,
+                name=name,
+                timeout=timeout,
+            )
+            body = yield result.get_body()
+            headers = yield result.get_header()
+            response = Response(json.loads(body), headers or {})
+            raise gen.Return(response)
+
+        def _on_advertise(future):
+            if not future.exception():
+                return
+
+            # If the request failed, clear the response so that we can try
+            # again.
+            with self._advertise_lock:
+                # `is` comparison to ensure we're not deleting another Future.
+                if self._advertise_response is future:
+                    self._advertise_response = None
+
+        with self._advertise_lock:
+            if self._advertise_response is not None:
+                return self._advertise_response
+            future = self._advertise_response = _advertise()
+
+        # We call add_done_callback here rather than when we call _advertise()
+        # because if the future has already resolved by the time we call
+        # add_done_callback, the callback will immediately be executed. The
+        # callback will try to acquire the advertise_lock which we already
+        # hold and end up in a deadlock.
+        future.add_done_callback(_on_advertise)
+        return future
diff --git a/tests/test_tchannel.py b/tests/test_tchannel.py
index 967ef0f1..a480656c 100644
--- a/tests/test_tchannel.py
+++ b/tests/test_tchannel.py
@@ -33,6 +33,7 @@
 import pytest
 from tornado import gen
 
+from tchannel.tornado.stream import InMemStream
 from tchannel import TChannel, Request, Response, schemes, errors, thrift
 from tchannel.errors import AlreadyListeningError, TimeoutError
 from tchannel.event import EventHook
@@ -323,17 +324,18 @@ def test_event_hook_register():
         mock_register.called
 
 
+@pytest.fixture
+def router_file():
+    return os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                        'data/hosts.json')
+
+
 @pytest.mark.gen_test
-def test_advertise_should_take_a_router_file():
+def test_advertise_should_take_a_router_file(router_file):
     from tchannel.tornado.response import Response as TornadoResponse
 
-    host_path = os.path.join(
-        os.path.dirname(os.path.realpath(__file__)),
-        'data/hosts.json',
-    )
-
     tchannel = TChannel(name='client')
-    with open(host_path, 'r') as json_data:
+    with open(router_file, 'r') as json_data:
         routers = json.load(json_data)
 
     with (
@@ -345,7 +347,7 @@ def test_advertise_should_take_a_router_file():
         f = gen.Future()
         mock_advertise.return_value = f
         f.set_result(TornadoResponse())
-        tchannel.advertise(router_file=host_path)
+        tchannel.advertise(router_file=router_file)
 
         mock_advertise.assert_called_once_with(ANY, routers=routers,
                                                name=ANY, timeout=ANY)
@@ -362,6 +364,69 @@ def test_advertise_should_raise_on_invalid_router_file():
         yield tchannel.advertise(routers='lala', router_file='?~~lala')
 
 
+@pytest.mark.gen_test
+def test_advertise_is_idempotent(router_file):
+    from tchannel.tornado.response import Response as TornadoResponse
+
+    def new_advertise(*args, **kwargs):
+        f = gen.Future()
+        f.set_result(TornadoResponse(
+            argstreams=[closed_stream(b'{}') for i in range(3)],
+        ))
+        return f
+
+    tchannel = TChannel(name='client')
+    with patch(
+        'tchannel.tornado.TChannel.advertise', autospec=True
+    ) as mock_advertise:
+        mock_advertise.side_effect = new_advertise
+
+        yield tchannel.advertise(router_file=router_file)
+        yield tchannel.advertise(router_file=router_file)
+        yield tchannel.advertise(router_file=router_file)
+
+        assert mock_advertise.call_count == 1
+
+
+@pytest.mark.gen_test
+def test_advertise_is_retryable(router_file):
+    from tchannel.tornado.response import Response as TornadoResponse
+
+    def new_advertise(*args, **kwargs):
+        f = gen.Future()
+        f.set_result(TornadoResponse(
+            argstreams=[closed_stream(b'{}') for i in range(3)],
+        ))
+        return f
+
+    tchannel = TChannel(name='client')
+    with patch(
+        'tchannel.tornado.TChannel.advertise', autospec=True
+    ) as mock_advertise:
+        f = gen.Future()
+        f.set_exception(Exception('great sadness'))
+        mock_advertise.return_value = f
+
+        with pytest.raises(Exception) as e:
+            yield tchannel.advertise(router_file=router_file)
+        assert 'great sadness' in str(e)
+        assert mock_advertise.call_count == 1
+
+        mock_advertise.side_effect = new_advertise
+        yield tchannel.advertise(router_file=router_file)
+        yield tchannel.advertise(router_file=router_file)
+        yield tchannel.advertise(router_file=router_file)
+        yield tchannel.advertise(router_file=router_file)
+
+        assert mock_advertise.call_count == 2
+
+
+def closed_stream(body):
+    stream = InMemStream(body)
+    stream.close()
+    return stream
+
+
 def test_listen_different_ports():
     server = TChannel(name='test_server')
     server.listen()

From d93c68016108c8bd924264beaf35ceef5dba37cb Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Mon, 14 Mar 2016 15:33:56 -0700
Subject: [PATCH 39/67] Preparing release 0.21.9

---
 CHANGES.rst          | 2 +-
 tchannel/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 91d93700..09c67e71 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,7 +1,7 @@
 Changes by Version
 ==================
 
-0.21.9 (unreleased)
+0.21.9 (2016-03-14)
 -------------------
 
 - Fixed a bug that caused silent failures when a write attempt was made to a
diff --git a/tchannel/__init__.py b/tchannel/__init__.py
index 9cb34aee..4806ab7c 100644
--- a/tchannel/__init__.py
+++ b/tchannel/__init__.py
@@ -22,7 +22,7 @@
     absolute_import, division, print_function, unicode_literals
 )
 
-__version__ = '0.21.9.dev0'
+__version__ = '0.21.9'
 # Update setup.py when changing this. zest.releaser doesn't support updating
 # both of them yet.
 

From a9dce498fcbe3ebfa1fb826e1f8f452b9c43d0c6 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Mon, 14 Mar 2016 15:34:21 -0700
Subject: [PATCH 40/67] Back to development: 0.21.10

---
 CHANGES.rst          | 6 ++++++
 tchannel/__init__.py | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 09c67e71..0df7391d 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,6 +1,12 @@
 Changes by Version
 ==================
 
+0.21.10 (unreleased)
+--------------------
+
+- Nothing changed yet.
+
+
 0.21.9 (2016-03-14)
 -------------------
 
diff --git a/tchannel/__init__.py b/tchannel/__init__.py
index 4806ab7c..0eedec9e 100644
--- a/tchannel/__init__.py
+++ b/tchannel/__init__.py
@@ -22,7 +22,7 @@
     absolute_import, division, print_function, unicode_literals
 )
 
-__version__ = '0.21.9'
+__version__ = '0.21.10.dev0'
 # Update setup.py when changing this. zest.releaser doesn't support updating
 # both of them yet.
 

From 94200202129d3a2f1591645b287e007cc08a218e Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Fri, 11 Mar 2016 11:51:54 -0800
Subject: [PATCH 41/67] connection: read_message should propagate
 StreamClosedError

If the IOStream has already been closed by the time read_message is called, it
will fail with a synchronous StreamClosedError which read_message needs to
catch.
---
 tchannel/tornado/connection.py   | 11 ++++++++---
 tests/tornado/test_connection.py | 20 ++++++++++++++++++++
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 70764f1a..993bd412 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -24,7 +24,6 @@
 import os
 import socket
 import sys
-from functools import partial
 
 import tornado.gen
 import tornado.iostream
@@ -816,8 +815,14 @@ def on_read_size(future):
         size = frame.frame_rw.size_rw.read(BytesIO(size_bytes))
         io_loop.add_future(
             stream.read_bytes(size - FRAME_SIZE_WIDTH),
-            partial(on_body, size),
+            lambda f: on_body(size, f)
         )
 
-    io_loop.add_future(stream.read_bytes(FRAME_SIZE_WIDTH), on_read_size)
+    try:
+        # read_bytes may fail if the stream has already been closed
+        read_size_future = stream.read_bytes(FRAME_SIZE_WIDTH)
+    except Exception:
+        answer.set_exc_info(sys.exc_info())
+    else:
+        read_size_future.add_done_callback(on_read_size)
     return answer
diff --git a/tests/tornado/test_connection.py b/tests/tornado/test_connection.py
index 06825d2e..6ecc32f6 100644
--- a/tests/tornado/test_connection.py
+++ b/tests/tornado/test_connection.py
@@ -164,3 +164,23 @@ def test_writer_write_error():
     writer.io_stream.close()
     with pytest.raises(StreamClosedError):
         yield writer.put(messages.PingResponseMessage())
+
+
+@pytest.mark.gen_test
+def test_reader_read_error():
+    server, client = socket.socketpair()
+    reader = connection.Reader(IOStream(server))
+    writer = connection.Writer(IOStream(client))
+
+    yield writer.put(messages.PingRequestMessage())
+    ping = yield reader.get()
+    assert isinstance(ping, messages.PingRequestMessage)
+
+    reader.io_stream.close()
+    future = reader.get()
+    with pytest.raises(gen.TimeoutError):
+        yield gen.with_timeout(timedelta(milliseconds=100), future)
+
+    # TODO(abg): Need to fix Reader.get to actually raise StreamClosedError
+    # rather than freezing forever. Then this can be changed to assert that a
+    # StreamClosedError is raised.

From 5ff6eaf27b8ccd6e90da8feecad1b37b4b5b5cc9 Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Tue, 15 Mar 2016 18:33:23 -0700
Subject: [PATCH 42/67] Include a cn annotation for all spans

---
 CHANGES.rst                     | 2 +-
 tchannel/zipkin/zipkin_trace.py | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 0df7391d..aa51a13b 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -4,7 +4,7 @@ Changes by Version
 0.21.10 (unreleased)
 --------------------
 
-- Nothing changed yet.
+- Zipkin traces now include a server-side 'cn' annotation to identify callers.
 
 
 0.21.9 (2016-03-14)
diff --git a/tchannel/zipkin/zipkin_trace.py b/tchannel/zipkin/zipkin_trace.py
index 59d4db3b..c5d60ba4 100644
--- a/tchannel/zipkin/zipkin_trace.py
+++ b/tchannel/zipkin/zipkin_trace.py
@@ -82,8 +82,13 @@ def before_receive_request(self, request):
         if not request.tracing.traceflags:
             return
 
-        ann = annotation.server_recv()
-        request.tracing.annotations.append(ann)
+        request.tracing.annotations.append(annotation.server_recv())
+
+        caller_name = request.headers.get('cn')
+        if caller_name:
+            request.tracing.annotations.append(
+                annotation.string('cn', caller_name),
+            )
 
     def after_send_response(self, response):
         if not response.tracing.traceflags:

From b2e6d1543dff69fa81edea917743a0cc6e2ffb47 Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Wed, 16 Mar 2016 08:41:00 -0700
Subject: [PATCH 43/67] reduce unconsumed messages to info

---
 CHANGES.rst                    | 3 +++
 tchannel/tornado/connection.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index aa51a13b..7a8cdc15 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -5,6 +5,9 @@ Changes by Version
 --------------------
 
 - Zipkin traces now include a server-side 'cn' annotation to identify callers.
+- Reduced "unconsumed message" warnings to INFO. These are typically generated
+  when Hyperbahn garbage collects your process due to a timed-out
+  advertisement.
 
 
 0.21.9 (2016-03-14)
diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 993bd412..594c9cd2 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -241,7 +241,7 @@ def _loop(self):
                 # Recently timed out. Safe to ignore.
                 continue
 
-            log.warn('Unconsumed message %s', message)
+            log.info('Unconsumed message %s', message)
 
     # Basically, the only difference between send and write is that send
     # sets up a Future to get the response. That's ideal for peers making

From 51cca951173b2900ffcefdc318cbe8ddecb06472 Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Thu, 17 Mar 2016 13:14:56 -0700
Subject: [PATCH 44/67] catch StreamClosedError on handshake

---
 CHANGES.rst                    |  2 ++
 tchannel/tornado/connection.py | 22 ++++++++++++----------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 7a8cdc15..f7b182cb 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -8,6 +8,8 @@ Changes by Version
 - Reduced "unconsumed message" warnings to INFO. These are typically generated
   when Hyperbahn garbage collects your process due to a timed-out
   advertisement.
+- Handshake timeouts were incorrectly being surfaced as StreamClosedError but
+  are now raised as NetworkError.
 
 
 0.21.9 (2016-03-14)
diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 594c9cd2..45354c25 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -399,22 +399,24 @@ def outgoing(cls, hostport, process_name=None, serve_hostport=None,
         log.debug("Connecting to %s", hostport)
         try:
             yield stream.connect((host, int(port)))
+
+            connection = cls(stream, tchannel, direction=OUTGOING)
+
+            log.debug("Performing handshake with %s", hostport)
+
+            yield connection.initiate_handshake(headers={
+                'host_port': serve_hostport,
+                'process_name': process_name,
+                'tchannel_language': TCHANNEL_LANGUAGE,
+                'tchannel_language_version': TCHANNEL_LANGUAGE_VERSION,
+                'tchannel_version': TCHANNEL_VERSION,
+            })
         except (StreamClosedError, socket.error) as e:
             log.warn("Couldn't connect to %s", hostport)
             raise NetworkError(
                 "Couldn't connect to %s" % hostport, e
             )
 
-        connection = cls(stream, tchannel, direction=OUTGOING)
-        log.debug("Performing handshake with %s", hostport)
-        yield connection.initiate_handshake(headers={
-            'host_port': serve_hostport,
-            'process_name': process_name,
-            'tchannel_language': TCHANNEL_LANGUAGE,
-            'tchannel_language_version': TCHANNEL_LANGUAGE_VERSION,
-            'tchannel_version': TCHANNEL_VERSION,
-        })
-
         if handler:
             connection.serve(handler)
 

From 1d3b5f807d1f9f6ae89727d97f15baf24cf98761 Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Thu, 17 Mar 2016 13:19:03 -0700
Subject: [PATCH 45/67] reduce default sample rate

---
 CHANGES.rst                     | 1 +
 tchannel/zipkin/zipkin_trace.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index f7b182cb..83817f77 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -10,6 +10,7 @@ Changes by Version
   advertisement.
 - Handshake timeouts were incorrectly being surfaced as StreamClosedError but
   are now raised as NetworkError.
+- Reduced default tracing sample rate from 100% to 1%.
 
 
 0.21.9 (2016-03-14)
diff --git a/tchannel/zipkin/zipkin_trace.py b/tchannel/zipkin/zipkin_trace.py
index c5d60ba4..ecd96c69 100644
--- a/tchannel/zipkin/zipkin_trace.py
+++ b/tchannel/zipkin/zipkin_trace.py
@@ -29,7 +29,7 @@
 class ZipkinTraceHook(EventHook):
     """generate zipkin-style span for tracing"""
 
-    DEFAULT_RATE = 1.0
+    DEFAULT_RATE = 0.01
 
     def __init__(self, tchannel=None, dst=None, sample_rate=None):
         """Log zipkin style trace.

From 08a7e6686266548f0b2e79bc20987ca9d11a1dbc Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Thu, 17 Mar 2016 14:45:37 -0700
Subject: [PATCH 46/67] Log all response write errors except StreamClosedErorrs

This fixes yet another "Future exception was never retrieved" issue.
---
 tchannel/tornado/dispatch.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tchannel/tornado/dispatch.py b/tchannel/tornado/dispatch.py
index 486437f7..b578f48f 100644
--- a/tchannel/tornado/dispatch.py
+++ b/tchannel/tornado/dispatch.py
@@ -26,6 +26,7 @@
 import tornado
 import tornado.gen
 from tornado import gen
+from tornado.iostream import StreamClosedError
 
 from tchannel.request import Request
 from tchannel.request import TransportHeaders
@@ -170,7 +171,18 @@ def handle_call(self, request, connection):
             serializer=handler.resp_serializer,
         )
 
-        connection.post_response(response)
+        def _on_post_response(future):
+            if not future.exception():
+                return
+
+            # Failed to write response because client disappeared. Nothing to
+            # do.
+            if isinstance(future.exception(), StreamClosedError):
+                return
+
+            log.error('failed to write response', exc_info=future.exc_info())
+
+        connection.post_response(response).add_done_callback(_on_post_response)
 
         try:
             # New impl - the handler takes a request and returns a response

From 7943b7e01123fce071ac6e97bb9ecf2125b517ed Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Thu, 17 Mar 2016 15:54:31 -0700
Subject: [PATCH 47/67] Preparing release 0.21.10

---
 CHANGES.rst          | 2 +-
 tchannel/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 83817f77..a1044158 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,7 +1,7 @@
 Changes by Version
 ==================
 
-0.21.10 (unreleased)
+0.21.10 (2016-03-17)
 --------------------
 
 - Zipkin traces now include a server-side 'cn' annotation to identify callers.
diff --git a/tchannel/__init__.py b/tchannel/__init__.py
index 0eedec9e..094892b7 100644
--- a/tchannel/__init__.py
+++ b/tchannel/__init__.py
@@ -22,7 +22,7 @@
     absolute_import, division, print_function, unicode_literals
 )
 
-__version__ = '0.21.10.dev0'
+__version__ = '0.21.10'
 # Update setup.py when changing this. zest.releaser doesn't support updating
 # both of them yet.
 

From 9d68797cbc380a298e6703b858d1b24ddd269c4f Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Thu, 17 Mar 2016 15:55:00 -0700
Subject: [PATCH 48/67] Back to development: 0.21.11

---
 CHANGES.rst          | 6 ++++++
 tchannel/__init__.py | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index a1044158..b15c426e 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,6 +1,12 @@
 Changes by Version
 ==================
 
+0.21.11 (unreleased)
+--------------------
+
+- Nothing changed yet.
+
+
 0.21.10 (2016-03-17)
 --------------------
 
diff --git a/tchannel/__init__.py b/tchannel/__init__.py
index 094892b7..31cffaa2 100644
--- a/tchannel/__init__.py
+++ b/tchannel/__init__.py
@@ -22,7 +22,7 @@
     absolute_import, division, print_function, unicode_literals
 )
 
-__version__ = '0.21.10'
+__version__ = '0.21.11.dev0'
 # Update setup.py when changing this. zest.releaser doesn't support updating
 # both of them yet.
 

From dbbe2c1f8b4c4b55d14ee5aeb72c80e57998e6e1 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Mon, 14 Mar 2016 19:30:14 -0700
Subject: [PATCH 49/67] Swap out tornado Queue for custom implementation.

---
 CHANGES.rst                      |   3 +-
 tchannel/_queue.py               | 195 +++++++++++++++++++++
 tchannel/tornado/connection.py   |  13 +-
 tests/test_queue.py              | 282 +++++++++++++++++++++++++++++++
 tests/tornado/test_connection.py |  19 +--
 5 files changed, 494 insertions(+), 18 deletions(-)
 create mode 100644 tchannel/_queue.py
 create mode 100644 tests/test_queue.py

diff --git a/CHANGES.rst b/CHANGES.rst
index b15c426e..20a188ef 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -4,7 +4,8 @@ Changes by Version
 0.21.11 (unreleased)
 --------------------
 
-- Nothing changed yet.
+- Fixed a bug where certain errors while reading requests would propagate as
+  TimeoutErrors.
 
 
 0.21.10 (2016-03-17)
diff --git a/tchannel/_queue.py b/tchannel/_queue.py
new file mode 100644
index 00000000..7806f519
--- /dev/null
+++ b/tchannel/_queue.py
@@ -0,0 +1,195 @@
+# Copyright (c) 2015 Uber Technologies, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+from __future__ import (
+    absolute_import, unicode_literals, division, print_function
+)
+
+from collections import deque
+
+from tornado.gen import TimeoutError
+from tornado.ioloop import IOLoop
+from tornado.queues import QueueEmpty
+from tornado.concurrent import Future
+
+__all__ = ['Queue', 'QueueEmpty', 'TimeoutError']
+
+
+class Queue(object):
+    """A specialized version of Tornado's Queue class.
+
+    This class is an almost drop-in replacement for Tornado's Queue class. It
+    behaves similar to Tornado's Queue except it provides a ``terminate()``
+    function to fail all outstanding operations.
+
+    :param int maxsize:
+        If specified, this is the buffer size for the queue. Once the capacity
+        is reached, we will start applying backpressure on putters. If
+        unspecified or None, the queue is unbuffered.
+    """
+
+    __slots__ = ('_getters', '_putters', '_surplus', 'maxsize')
+
+    # How this works:
+    #
+    # Reads:
+    # - Check if we have a value sitting in surplus. If yes, use that.
+    #   Otherwise,
+    # - Check if we have a putter waiting to provide a value. If yes, use
+    #   that. Otherwise,
+    # - Store the future in getters for later.
+    #
+    # Writes:
+    # - Check if we have a future waiting for a value in getters. If yes, use
+    #   that. Othrewise,
+    # - Check if we have room in surplus. If yes, use that. Otherwise,
+    # - Store the value and future in putters for later.
+    #
+    # Invariants:
+    # - Either getters is empty or both, surplus and putters are empty.
+    # - If putters is non-empty, surplus is maxsize (which is more than 0).
+
+    def __init__(self, maxsize=None):
+        if maxsize is None:
+            maxsize = 0
+        self.maxsize = maxsize
+
+        # collection of futures waiting for values
+        self._getters = deque()
+
+        # collection of (value, future) pairs waiting to put values.
+        self._putters = deque()
+
+        # collection of values that have not yet been consumed
+        self._surplus = deque()
+
+    def terminate(self, exc):
+        """Terminate all outstanding get requests with the given exception.
+
+        :param exc:
+            An exception or an exc_info triple.
+        """
+        if isinstance(exc, tuple):
+            fail = (lambda f: f.set_exc_info(exc))
+        else:
+            fail = (lambda f: f.set_exception(exc))
+
+        while self._putters:
+            _, future = self._putters.popleft()
+            if future.running():
+                fail(future)
+
+        while self._getters:
+            future = self._getters.popleft()
+            if future.running():
+                fail(future)
+
+    def __receive_put(self):
+        """Receive a value from a waiting putter."""
+        while self._putters:
+            value, future = self._putters.popleft()
+            if future.running():
+                self._surplus.append(value)
+                future.set_result(None)
+                return
+
+    def get(self, timeout=None):
+        """Get the next item from the queue.
+
+        Returns a future that resolves to the next item.
+
+        :param timeout:
+            If set, the future will resolve to a TimeoutError if a value is
+            not received within the given time. The value for ``timeout`` may
+            be anything accepted by ``IOLoop.add_timeout`` (a ``timedelta`` or
+            an **absolute** time relative to ``IOLoop.time``).
+        """
+        self.__receive_put()
+
+        answer = Future()
+        if self._surplus:
+            answer.set_result(self._surplus.popleft())
+            return answer
+
+        # Wait for a value
+        if timeout is not None:
+            _add_timeout(timeout, answer)
+        self._getters.append(answer)
+        return answer
+
+    def get_nowait(self):
+        """Returns a value from the queue without waiting.
+
+        Raises ``QueueEmpty`` if no values are available right now.
+        """
+        self.__receive_put()
+
+        if self._surplus:
+            return self._surplus.popleft()
+        raise QueueEmpty()
+
+    def put(self, value, timeout=None):
+        """Puts an item into the queue.
+
+        Returns a future that resolves to None once the value has been
+        accepted by the queue.
+
+        The value is accepted immediately if there is room in the queue or
+        maxsize was not specified.
+
+        :param timeout:
+            If set, the future will resolve to a TimeoutError if a value is
+            not accepted within the given time. The value for ``timeout`` may
+            be anything accepted by ``IOLoop.add_timeout`` (a ``timedelta`` or
+            an **absolute** time relative to ``IOLoop.time``).
+        """
+
+        answer = Future()
+
+        # If there's a getter waiting, send it the result.
+        while self._getters:
+            future = self._getters.popleft()
+            if future.running():
+                future.set_result(value)
+                answer.set_result(None)
+                return answer
+
+        # We have room. Put the value into surplus.
+        if self.maxsize < 1 or len(self._surplus) < self.maxsize:
+            self._surplus.append(value)
+            answer.set_result(None)
+            return answer
+
+        # Wait until there is room.
+        if timeout is not None:
+            _add_timeout(timeout, answer)
+        self._putters.append((value, answer))
+        return answer
+
+
+def _add_timeout(timeout, future):
+    io_loop = IOLoop.current()
+
+    def on_timeout():
+        if future.running():
+            future.set_exception(TimeoutError("timed out"))
+
+    t = io_loop.add_timeout(timeout, on_timeout)
+    future.add_done_callback(lambda _: io_loop.remove_timeout(t))
diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 45354c25..dbdcc904 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -27,7 +27,6 @@
 
 import tornado.gen
 import tornado.iostream
-import tornado.queues as queues
 
 from tornado import stack_context
 from tornado.ioloop import IOLoop
@@ -36,6 +35,7 @@
 from .. import errors
 from .. import frame
 from .. import messages
+from .. import _queue as queues
 from .._future import fail_to
 from ..errors import NetworkError
 from ..errors import TChannelError
@@ -192,7 +192,10 @@ def _loop(self):
         self._loop_running = True
 
         while not self.closed:
-            message = yield self.reader.get()
+            try:
+                message = yield self.reader.get()
+            except StreamClosedError:
+                break
 
             # TODO: There should probably be a try-catch on the yield.
             if message.message_type in self.CALL_REQ_TYPES:
@@ -642,7 +645,7 @@ class Reader(object):
     def __init__(self, io_stream, capacity=None):
         capacity = capacity or 64
 
-        self.queue = tornado.queues.Queue(capacity)
+        self.queue = queues.Queue(capacity)
         self.filling = False
         self.io_stream = io_stream
 
@@ -654,7 +657,7 @@ def fill(self):
         def keep_reading(f):
             if f.exception():
                 self.filling = False
-                # This is usually StreamClosed due to a client disconnecting.
+                self.queue.terminate(f.exc_info())
                 if isinstance(f.exception(), StreamClosedError):
                     return log.info("read error", exc_info=f.exc_info())
                 else:
@@ -683,7 +686,7 @@ class Writer(object):
     def __init__(self, io_stream, capacity=None):
         capacity = capacity or 64
 
-        self.queue = tornado.queues.Queue(capacity)
+        self.queue = queues.Queue(capacity)
         self.draining = False
         self.io_stream = io_stream
         # Tracks message IDs for this connection.
diff --git a/tests/test_queue.py b/tests/test_queue.py
new file mode 100644
index 00000000..b81cae90
--- /dev/null
+++ b/tests/test_queue.py
@@ -0,0 +1,282 @@
+# Copyright (c) 2015 Uber Technologies, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+from __future__ import (
+    absolute_import, unicode_literals, division, print_function
+)
+
+import sys
+import pytest
+from datetime import timedelta
+from six.moves import range
+from tornado import gen
+
+from tchannel._queue import Queue, QueueEmpty, TimeoutError
+
+
+def assert_invariant(q):
+    assert not (q._getters and (q._surplus or q._putters))
+    assert (not q._putters or len(q._surplus) == q.maxsize)
+
+
+@pytest.fixture
+def items():
+    return list(range(100))
+
+
+@pytest.mark.gen_test
+def test_unbounded_put_then_get(items):
+    queue = Queue()
+    assert_invariant(queue)
+
+    for item in items:
+        yield queue.put(item)
+        assert_invariant(queue)
+
+    got_futures = []
+    for i in range(len(items)):
+        got_futures.append(queue.get())
+        assert_invariant(queue)
+
+    got = yield got_futures
+    assert got == items
+
+    with pytest.raises(TimeoutError):
+        yield queue.get(timedelta(seconds=0.1))
+
+
+@pytest.mark.gen_test
+def test_bounded_put_then_get(items):
+    queue = Queue(5)
+    assert_invariant(queue)
+
+    for item in items[:5]:
+        yield queue.put(item)
+        assert_invariant(queue)
+
+    put_futures = []
+    for item in items[5:]:
+        put_futures.append(queue.put(item))
+
+    got = []
+    for i in range(len(items)):
+        item = yield queue.get()
+        assert_invariant(queue)
+        got.append(item)
+
+    assert got == items
+    yield put_futures
+
+
+@pytest.mark.gen_test
+def test_bounded_put_timeout_then_get(items):
+    queue = Queue(1)
+    assert_invariant(queue)
+
+    yield queue.put(items[0])
+
+    put_futures = []
+    for item in range(len(items)):
+        put_futures.append(queue.put(item, timedelta(seconds=0.001)))
+
+    yield gen.sleep(0.01)
+
+    get_futures = []
+    for item in range(len(items)):
+        get_futures.append(queue.get())
+
+    for future in put_futures:
+        with pytest.raises(TimeoutError):
+            yield future
+
+    for item in items[1:]:
+        yield queue.put(item)
+
+    got = yield get_futures
+    assert got == items
+
+
+@pytest.mark.gen_test
+def test_unbounded_put_then_get_nowait(items):
+    queue = Queue()
+
+    for item in items:
+        yield queue.put(item)
+        assert_invariant(queue)
+
+    got = []
+    for i in range(len(items)):
+        got.append(queue.get_nowait())
+        assert_invariant(queue)
+
+    assert got == items
+
+    with pytest.raises(QueueEmpty):
+        queue.get_nowait()
+
+
+@pytest.mark.gen_test
+def test_unbounded_get_then_put(items):
+    queue = Queue()
+
+    got_futures = []
+    for i in range(len(items)):
+        got_futures.append(queue.get())
+        assert_invariant(queue)
+
+    for item in items:
+        yield queue.put(item)
+        assert_invariant(queue)
+
+    got = yield got_futures
+    assert got == items
+
+
+@pytest.mark.gen_test
+def test_unbounded_expired_gets_then_get_first():
+    queue = Queue()
+
+    expired_gets = []
+    for i in range(99):
+        expired_gets.append(queue.get(timedelta(seconds=0.001)))
+        assert_invariant(queue)
+
+    yield gen.sleep(0.01)  # 99 * 0.001 = 0.099
+
+    # get the value before putting it
+    real_get = queue.get()
+    yield queue.put(42)
+    assert 42 == (yield real_get)
+    assert_invariant(queue)
+
+    for future in expired_gets:
+        with pytest.raises(TimeoutError):
+            yield future
+
+
+@pytest.mark.gen_test
+def test_unbounded_expired_gets_then_put_first():
+    queue = Queue()
+
+    expired_gets = []
+    for i in range(99):
+        expired_gets.append(queue.get(timedelta(seconds=0.001)))
+        assert_invariant(queue)
+
+    yield gen.sleep(0.01)
+
+    # put the value first, get afterwards
+    yield queue.put(42)
+    assert 42 == (yield queue.get())
+    assert_invariant(queue)
+
+    for future in expired_gets:
+        with pytest.raises(TimeoutError):
+            yield future
+
+
+@pytest.mark.gen_test
+def test_terminate_get_with_exception():
+
+    class GreatSadness(Exception):
+        pass
+
+    queue = Queue()
+    expired_gets = []
+    terminated_gets = []
+
+    flag = True
+    for i in range(99):
+        if flag:
+            expired_gets.append(queue.get(timedelta(seconds=0.001)))
+        else:
+            terminated_gets.append(queue.get(timedelta(seconds=1)))
+        flag = not flag
+
+    yield gen.sleep(0.01)
+
+    queue.terminate(GreatSadness())
+
+    for future in expired_gets:
+        with pytest.raises(TimeoutError):
+            yield future
+
+    for future in terminated_gets:
+        with pytest.raises(GreatSadness):
+            yield future
+
+
+@pytest.mark.gen_test
+def test_terminate_put_with_exception():
+
+    class GreatSadness(Exception):
+        pass
+
+    queue = Queue(10)
+    expired_puts = []
+    terminated_puts = []
+
+    for i in range(10):
+        yield queue.put(i)
+
+    flag = True
+    for i in range(99):
+        if flag:
+            expired_puts.append(queue.put(i, timedelta(seconds=0.001)))
+        else:
+            terminated_puts.append(queue.put(i, timedelta(seconds=1)))
+        flag = not flag
+
+    yield gen.sleep(0.01)
+
+    try:
+        raise GreatSadness()
+    except GreatSadness:
+        queue.terminate(sys.exc_info())
+    else:
+        assert False, "expected failure"
+
+    for future in expired_puts:
+        with pytest.raises(TimeoutError):
+            yield future
+
+    for future in terminated_puts:
+        with pytest.raises(GreatSadness):
+            yield future
+
+
+@pytest.mark.gen_test
+def test_get_timeout_too_late():
+    queue = Queue()
+    future = queue.get(timedelta(seconds=0.01))
+    yield queue.put(42)
+    yield gen.sleep(0.01)
+    assert 42 == (yield future)
+
+
+@pytest.mark.gen_test
+def test_put_timeout_too_late():
+    queue = Queue(1)
+    yield queue.put(1)
+    future = queue.put(2, timedelta(seconds=0.01))
+    assert (yield queue.get()) == 1
+    assert (yield queue.get()) == 2
+    yield gen.sleep(0.01)
+    yield future
diff --git a/tests/tornado/test_connection.py b/tests/tornado/test_connection.py
index 6ecc32f6..c1cc69c8 100644
--- a/tests/tornado/test_connection.py
+++ b/tests/tornado/test_connection.py
@@ -25,12 +25,11 @@
 import socket
 import tornado.ioloop
 from tornado import gen
-from datetime import timedelta
 from tornado.iostream import IOStream, StreamClosedError
 
 from tchannel import TChannel
 from tchannel import messages
-from tchannel.errors import TimeoutError
+from tchannel.errors import TimeoutError, ReadError
 from tchannel.tornado import connection
 
 
@@ -126,8 +125,8 @@ def test_stream_closed_error_on_read(tornado_pair):
     client.close()
 
     with mock.patch.object(connection, 'log') as mock_log:  # :(
-        with pytest.raises(gen.TimeoutError):
-            yield gen.with_timeout(timedelta(milliseconds=100), future)
+        with pytest.raises(StreamClosedError):
+            yield future
 
     assert mock_log.error.call_count == 0
     assert mock_log.info.call_count == 1
@@ -143,8 +142,8 @@ def test_other_error_on_read(tornado_pair):
     yield client.connection.write(b'\x00\x02\x00\x00')  # bad payload
 
     with mock.patch.object(connection, 'log') as mock_log:  # :(
-        with pytest.raises(gen.TimeoutError):
-            yield gen.with_timeout(timedelta(milliseconds=100), future)
+        with pytest.raises(ReadError):
+            yield future
 
     assert mock_log.error.call_count == 1
     assert mock_log.info.call_count == 0
@@ -178,9 +177,5 @@ def test_reader_read_error():
 
     reader.io_stream.close()
     future = reader.get()
-    with pytest.raises(gen.TimeoutError):
-        yield gen.with_timeout(timedelta(milliseconds=100), future)
-
-    # TODO(abg): Need to fix Reader.get to actually raise StreamClosedError
-    # rather than freezing forever. Then this can be changed to assert that a
-    # StreamClosedError is raised.
+    with pytest.raises(StreamClosedError):
+        yield future

From 393d3074e8a6f15de88df1241e26349db218cf6d Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Wed, 16 Mar 2016 14:10:33 -0700
Subject: [PATCH 50/67] connection/reader: Remove need for terminate()

---
 tchannel/tornado/connection.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index dbdcc904..867458b0 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -655,15 +655,17 @@ def fill(self):
         io_loop = IOLoop.current()
 
         def keep_reading(f):
+            put_future = self.queue.put(f)
+
             if f.exception():
                 self.filling = False
-                self.queue.terminate(f.exc_info())
                 if isinstance(f.exception(), StreamClosedError):
                     return log.info("read error", exc_info=f.exc_info())
                 else:
                     return log.error("read error", exc_info=f.exc_info())
+
             # connect these two in the case when put blocks
-            self.queue.put(f.result()).add_done_callback(
+            put_future.add_done_callback(
                 lambda f: io_loop.spawn_callback(self.fill),
             )
 
@@ -678,7 +680,20 @@ def get(self):
         if not self.filling:
             self.fill()
 
-        return self.queue.get()
+        answer = tornado.gen.Future()
+
+        def _on_result(future):
+            if future.exception():
+                return answer.set_exc_info(future.exc_info())
+            answer.set_result(future.result())
+
+        def _on_item(future):
+            if future.exception():
+                return answer.set_exc_info(future.exc_info())
+            future.result().add_done_callback(_on_result)
+
+        self.queue.get().add_done_callback(_on_item)
+        return answer
 
 
 class Writer(object):

From 923b9d89fcdf1262917c81f37d815ea605e62e09 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Wed, 16 Mar 2016 11:39:41 -0700
Subject: [PATCH 51/67] queue: much simpler queue implementation.

This implementation does not support terminate() or backpressure, but it's a
much simpler implementation and easier to make thread-safe.
---
 tchannel/_queue.py             | 240 ++++++++++++++---------------
 tchannel/tornado/connection.py |   4 +-
 tests/test_queue.py            | 270 ++++++++++++---------------------
 3 files changed, 213 insertions(+), 301 deletions(-)

diff --git a/tchannel/_queue.py b/tchannel/_queue.py
index 7806f519..cbf7101f 100644
--- a/tchannel/_queue.py
+++ b/tchannel/_queue.py
@@ -22,116 +22,105 @@
     absolute_import, unicode_literals, division, print_function
 )
 
-from collections import deque
+import threading
+from collections import namedtuple
 
-from tornado.gen import TimeoutError
 from tornado.ioloop import IOLoop
 from tornado.queues import QueueEmpty
 from tornado.concurrent import Future
 
-__all__ = ['Queue', 'QueueEmpty', 'TimeoutError']
+__all__ = ['Queue', 'QueueEmpty']
 
 
-class Queue(object):
-    """A specialized version of Tornado's Queue class.
+Node = namedtuple('Node', 'value next')
 
-    This class is an almost drop-in replacement for Tornado's Queue class. It
-    behaves similar to Tornado's Queue except it provides a ``terminate()``
-    function to fail all outstanding operations.
 
-    :param int maxsize:
-        If specified, this is the buffer size for the queue. Once the capacity
-        is reached, we will start applying backpressure on putters. If
-        unspecified or None, the queue is unbuffered.
-    """
+class Queue(object):
+    """An unbounded, thread-safe asynchronous queue."""
 
-    __slots__ = ('_getters', '_putters', '_surplus', 'maxsize')
+    __slots__ = ('_get', '_put', '_lock')
 
     # How this works:
     #
-    # Reads:
-    # - Check if we have a value sitting in surplus. If yes, use that.
-    #   Otherwise,
-    # - Check if we have a putter waiting to provide a value. If yes, use
-    #   that. Otherwise,
-    # - Store the future in getters for later.
+    # _get and _put are futures maintaining pointers to a linked list of
+    # futures. The linked list is implemented as Node objects holding the
+    # value and the next future.
+    #
+    #     Node
+    #   +---+---+   +---+---+  E: Empty future
+    #   | 1 | F-|-->| 2 | E |  F: Filled future
+    #   +---+---+   +---+---+
+    #         ^           ^
+    #   +---+ |     +---+ |
+    #   | F-|-+     | F-|-+
+    #   +---+       +---+
+    #    _get        _put
+    #
+    # When there's a put, we fill the current empty future with a Node
+    # containing the value and a pointer to the next, newly created empty
+    # future.
+    #
+    #   +---+---+   +---+---+   +---+---+
+    #   | 1 | F-|-->| 2 | F-|-->| 3 | E |
+    #   +---+---+   +---+---+   +---+---+
+    #         ^                       ^
+    #   +---+ |                 +---+ |
+    #   | F-|-+                 | F-|-+
+    #   +---+                   +---+
+    #    _get                    _put
     #
-    # Writes:
-    # - Check if we have a future waiting for a value in getters. If yes, use
-    #   that. Othrewise,
-    # - Check if we have room in surplus. If yes, use that. Otherwise,
-    # - Store the value and future in putters for later.
+    # When there's a get, we read the value from the current Node, and move
+    # _get to the next future.
     #
-    # Invariants:
-    # - Either getters is empty or both, surplus and putters are empty.
-    # - If putters is non-empty, surplus is maxsize (which is more than 0).
+    #   +---+---+   +---+---+
+    #   | 2 | F-|-->| 3 | E |
+    #   +---+---+   +---+---+
+    #         ^           ^
+    #   +---+ |     +---+ |
+    #   | F-|-+     | F-|-+
+    #   +---+       +---+
+    #    _get        _put
+
+    def __init__(self):
+        self._lock = threading.Lock()
+
+        # Space for the next Node.
+        hole = Future()
+
+        # Pointer to the Future that will contain the next Node.
+        self._get = Future()
+        self._get.set_result(hole)
+
+        # Pointer to the next empty Future that should be filled with a Node.
+        self._put = Future()
+        self._put.set_result(hole)
+
+    def put(self, value):
+        """Puts an item into the queue.
 
-    def __init__(self, maxsize=None):
-        if maxsize is None:
-            maxsize = 0
-        self.maxsize = maxsize
+        Returns a Future that resolves to None once the value has been
+        accepted by the queue.
+        """
+        io_loop = IOLoop.current()
+        new_hole = Future()
 
-        # collection of futures waiting for values
-        self._getters = deque()
+        new_put = Future()
+        new_put.set_result(new_hole)
 
-        # collection of (value, future) pairs waiting to put values.
-        self._putters = deque()
+        with self._lock:
+            self._put, put = new_put, self._put
 
-        # collection of values that have not yet been consumed
-        self._surplus = deque()
+        answer = Future()
 
-    def terminate(self, exc):
-        """Terminate all outstanding get requests with the given exception.
+        def _on_put(future):
+            if future.exception():  # pragma: no cover (never happens)
+                return answer.set_exc_info(future.exc_info())
 
-        :param exc:
-            An exception or an exc_info triple.
-        """
-        if isinstance(exc, tuple):
-            fail = (lambda f: f.set_exc_info(exc))
-        else:
-            fail = (lambda f: f.set_exception(exc))
-
-        while self._putters:
-            _, future = self._putters.popleft()
-            if future.running():
-                fail(future)
-
-        while self._getters:
-            future = self._getters.popleft()
-            if future.running():
-                fail(future)
-
-    def __receive_put(self):
-        """Receive a value from a waiting putter."""
-        while self._putters:
-            value, future = self._putters.popleft()
-            if future.running():
-                self._surplus.append(value)
-                future.set_result(None)
-                return
-
-    def get(self, timeout=None):
-        """Get the next item from the queue.
-
-        Returns a future that resolves to the next item.
-
-        :param timeout:
-            If set, the future will resolve to a TimeoutError if a value is
-            not received within the given time. The value for ``timeout`` may
-            be anything accepted by ``IOLoop.add_timeout`` (a ``timedelta`` or
-            an **absolute** time relative to ``IOLoop.time``).
-        """
-        self.__receive_put()
+            old_hole = put.result()
+            old_hole.set_result(Node(value, new_hole))
+            answer.set_result(None)
 
-        answer = Future()
-        if self._surplus:
-            answer.set_result(self._surplus.popleft())
-            return answer
-
-        # Wait for a value
-        if timeout is not None:
-            _add_timeout(timeout, answer)
-        self._getters.append(answer)
+        io_loop.add_future(put, _on_put)
         return answer
 
     def get_nowait(self):
@@ -139,57 +128,50 @@ def get_nowait(self):
 
         Raises ``QueueEmpty`` if no values are available right now.
         """
-        self.__receive_put()
+        new_get = Future()
 
-        if self._surplus:
-            return self._surplus.popleft()
-        raise QueueEmpty()
+        with self._lock:
+            if not self._get.done():
+                raise QueueEmpty
+            get, self._get = self._get, new_get
 
-    def put(self, value, timeout=None):
-        """Puts an item into the queue.
+        hole = get.result()
+        if not hole.done():
+            # Restore the unfinished hole.
+            new_get.set_result(hole)
+            raise QueueEmpty
 
-        Returns a future that resolves to None once the value has been
-        accepted by the queue.
+        value, new_hole = hole.result()
+        new_get.set_result(new_hole)
+        return value
 
-        The value is accepted immediately if there is room in the queue or
-        maxsize was not specified.
+    def get(self):
+        """Gets the next item from the queue.
 
-        :param timeout:
-            If set, the future will resolve to a TimeoutError if a value is
-            not accepted within the given time. The value for ``timeout`` may
-            be anything accepted by ``IOLoop.add_timeout`` (a ``timedelta`` or
-            an **absolute** time relative to ``IOLoop.time``).
+        Returns a Future that resolves to the next item once it is available.
         """
+        io_loop = IOLoop.current()
+        new_get = Future()
+
+        with self._lock:
+            get, self._get = self._get, new_get
 
         answer = Future()
 
-        # If there's a getter waiting, send it the result.
-        while self._getters:
-            future = self._getters.popleft()
-            if future.running():
-                future.set_result(value)
-                answer.set_result(None)
-                return answer
-
-        # We have room. Put the value into surplus.
-        if self.maxsize < 1 or len(self._surplus) < self.maxsize:
-            self._surplus.append(value)
-            answer.set_result(None)
-            return answer
+        def _on_node(future):
+            if future.exception():  # pragma: no cover (never happens)
+                return answer.set_exc_info(future.exc_info())
 
-        # Wait until there is room.
-        if timeout is not None:
-            _add_timeout(timeout, answer)
-        self._putters.append((value, answer))
-        return answer
+            value, new_hole = future.result()
+            new_get.set_result(new_hole)
+            answer.set_result(value)
 
+        def _on_get(future):
+            if future.exception():  # pragma: no cover (never happens)
+                return answer.set_exc_info(future.exc_info())
 
-def _add_timeout(timeout, future):
-    io_loop = IOLoop.current()
+            hole = future.result()
+            io_loop.add_future(hole, _on_node)
 
-    def on_timeout():
-        if future.running():
-            future.set_exception(TimeoutError("timed out"))
-
-    t = io_loop.add_timeout(timeout, on_timeout)
-    future.add_done_callback(lambda _: io_loop.remove_timeout(t))
+        io_loop.add_future(get, _on_get)
+        return answer
diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 867458b0..1ef9bda3 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -645,7 +645,7 @@ class Reader(object):
     def __init__(self, io_stream, capacity=None):
         capacity = capacity or 64
 
-        self.queue = queues.Queue(capacity)
+        self.queue = queues.Queue()
         self.filling = False
         self.io_stream = io_stream
 
@@ -701,7 +701,7 @@ class Writer(object):
     def __init__(self, io_stream, capacity=None):
         capacity = capacity or 64
 
-        self.queue = queues.Queue(capacity)
+        self.queue = queues.Queue()
         self.draining = False
         self.io_stream = io_stream
         # Tracks message IDs for this connection.
diff --git a/tests/test_queue.py b/tests/test_queue.py
index b81cae90..ed0d156b 100644
--- a/tests/test_queue.py
+++ b/tests/test_queue.py
@@ -22,18 +22,13 @@
     absolute_import, unicode_literals, division, print_function
 )
 
-import sys
 import pytest
-from datetime import timedelta
-from six.moves import range
+import threading
+from collections import deque, defaultdict
 from tornado import gen
+from tornado.ioloop import IOLoop
 
-from tchannel._queue import Queue, QueueEmpty, TimeoutError
-
-
-def assert_invariant(q):
-    assert not (q._getters and (q._surplus or q._putters))
-    assert (not q._putters or len(q._surplus) == q.maxsize)
+from tchannel._queue import Queue, QueueEmpty
 
 
 @pytest.fixture
@@ -42,241 +37,176 @@ def items():
 
 
 @pytest.mark.gen_test
-def test_unbounded_put_then_get(items):
+def test_put_then_get(items):
     queue = Queue()
-    assert_invariant(queue)
 
     for item in items:
         yield queue.put(item)
-        assert_invariant(queue)
 
     got_futures = []
     for i in range(len(items)):
         got_futures.append(queue.get())
-        assert_invariant(queue)
 
     got = yield got_futures
     assert got == items
 
-    with pytest.raises(TimeoutError):
-        yield queue.get(timedelta(seconds=0.1))
-
-
-@pytest.mark.gen_test
-def test_bounded_put_then_get(items):
-    queue = Queue(5)
-    assert_invariant(queue)
-
-    for item in items[:5]:
-        yield queue.put(item)
-        assert_invariant(queue)
-
-    put_futures = []
-    for item in items[5:]:
-        put_futures.append(queue.put(item))
-
-    got = []
-    for i in range(len(items)):
-        item = yield queue.get()
-        assert_invariant(queue)
-        got.append(item)
-
-    assert got == items
-    yield put_futures
-
 
 @pytest.mark.gen_test
-def test_bounded_put_timeout_then_get(items):
-    queue = Queue(1)
-    assert_invariant(queue)
-
-    yield queue.put(items[0])
-
-    put_futures = []
-    for item in range(len(items)):
-        put_futures.append(queue.put(item, timedelta(seconds=0.001)))
-
-    yield gen.sleep(0.01)
-
-    get_futures = []
-    for item in range(len(items)):
-        get_futures.append(queue.get())
-
-    for future in put_futures:
-        with pytest.raises(TimeoutError):
-            yield future
-
-    for item in items[1:]:
-        yield queue.put(item)
-
-    got = yield get_futures
-    assert got == items
-
-
-@pytest.mark.gen_test
-def test_unbounded_put_then_get_nowait(items):
+def test_put_then_get_nowait(items):
     queue = Queue()
 
     for item in items:
         yield queue.put(item)
-        assert_invariant(queue)
 
     got = []
     for i in range(len(items)):
         got.append(queue.get_nowait())
-        assert_invariant(queue)
 
     assert got == items
 
     with pytest.raises(QueueEmpty):
         queue.get_nowait()
 
+    future = queue.get()
+    yield queue.put(42)
+    assert 42 == (yield future)
+
 
 @pytest.mark.gen_test
-def test_unbounded_get_then_put(items):
+def test_get_then_put(items):
     queue = Queue()
 
     got_futures = []
     for i in range(len(items)):
         got_futures.append(queue.get())
-        assert_invariant(queue)
 
     for item in items:
         yield queue.put(item)
-        assert_invariant(queue)
 
     got = yield got_futures
     assert got == items
 
 
 @pytest.mark.gen_test
-def test_unbounded_expired_gets_then_get_first():
-    queue = Queue()
+@pytest.mark.concurrency_test
+def test_concurrent_producers_single_consumer():
+    num_threads = 1000
+    num_items = 10
+    q = Queue()
 
-    expired_gets = []
-    for i in range(99):
-        expired_gets.append(queue.get(timedelta(seconds=0.001)))
-        assert_invariant(queue)
+    def producer(i):
 
-    yield gen.sleep(0.01)  # 99 * 0.001 = 0.099
+        @gen.coroutine
+        def run():
+            for j in range(num_items):
+                yield q.put((i, j))
 
-    # get the value before putting it
-    real_get = queue.get()
-    yield queue.put(42)
-    assert 42 == (yield real_get)
-    assert_invariant(queue)
+        IOLoop(make_current=False).run_sync(run)
 
-    for future in expired_gets:
-        with pytest.raises(TimeoutError):
-            yield future
+    producers = [
+        threading.Thread(target=producer, args=(i,))
+        for i in range(num_threads)
+    ]
 
+    for p in producers:
+        p.start()
 
-@pytest.mark.gen_test
-def test_unbounded_expired_gets_then_put_first():
-    queue = Queue()
+    items = defaultdict(lambda: [])
+    for x in range(num_items * num_threads):
+        i, j = yield q.get()
+        items[i].append(j)
 
-    expired_gets = []
-    for i in range(99):
-        expired_gets.append(queue.get(timedelta(seconds=0.001)))
-        assert_invariant(queue)
+    for v in items.values():
+        assert v == list(range(num_items))
 
-    yield gen.sleep(0.01)
+    for p in producers:
+        p.join()
 
-    # put the value first, get afterwards
-    yield queue.put(42)
-    assert 42 == (yield queue.get())
-    assert_invariant(queue)
 
-    for future in expired_gets:
-        with pytest.raises(TimeoutError):
-            yield future
+@pytest.mark.gen_test
+@pytest.mark.concurrency_test
+def test_concurrent_consumers_single_producer():
+    num_threads = 1000
+    num_items = 10
 
+    q = Queue()
+    items = deque()
 
-@pytest.mark.gen_test
-def test_terminate_get_with_exception():
+    def consumer():
 
-    class GreatSadness(Exception):
-        pass
+        @gen.coroutine
+        def run():
+            for i in range(num_items):
+                x = yield q.get()
+                items.append(x)
 
-    queue = Queue()
-    expired_gets = []
-    terminated_gets = []
+        IOLoop(make_current=False).run_sync(run)
 
-    flag = True
-    for i in range(99):
-        if flag:
-            expired_gets.append(queue.get(timedelta(seconds=0.001)))
-        else:
-            terminated_gets.append(queue.get(timedelta(seconds=1)))
-        flag = not flag
+    consumers = [
+        threading.Thread(target=consumer) for i in range(num_threads)
+    ]
 
-    yield gen.sleep(0.01)
+    for c in consumers:
+        c.start()
 
-    queue.terminate(GreatSadness())
+    for x in range(num_items * num_threads):
+        yield q.put(x)
 
-    for future in expired_gets:
-        with pytest.raises(TimeoutError):
-            yield future
+    for c in consumers:
+        c.join()
 
-    for future in terminated_gets:
-        with pytest.raises(GreatSadness):
-            yield future
+    assert len(items) == num_items * num_threads
+    assert set(items) == set(range(num_items * num_threads))
+    # Order for concurrent consumers cannot be guaranteed because we don't
+    # know in which order consumers will wake up. The only guarantee we have
+    # is that none of the items were lost.
 
 
 @pytest.mark.gen_test
-def test_terminate_put_with_exception():
-
-    class GreatSadness(Exception):
-        pass
+@pytest.mark.concurrency_test
+def test_concurrent_producers_and_consumers():
+    num_threads = 1000
+    num_items = 10
 
-    queue = Queue(10)
-    expired_puts = []
-    terminated_puts = []
+    q = Queue()
+    items = defaultdict(lambda: [])
 
-    for i in range(10):
-        yield queue.put(i)
+    def producer(i):
 
-    flag = True
-    for i in range(99):
-        if flag:
-            expired_puts.append(queue.put(i, timedelta(seconds=0.001)))
-        else:
-            terminated_puts.append(queue.put(i, timedelta(seconds=1)))
-        flag = not flag
+        @gen.coroutine
+        def run():
+            for j in range(num_items):
+                yield q.put((i, j))
 
-    yield gen.sleep(0.01)
+        IOLoop(make_current=False).run_sync(run)
 
-    try:
-        raise GreatSadness()
-    except GreatSadness:
-        queue.terminate(sys.exc_info())
-    else:
-        assert False, "expected failure"
+    def consumer():
 
-    for future in expired_puts:
-        with pytest.raises(TimeoutError):
-            yield future
+        @gen.coroutine
+        def run():
+            for x in range(num_items):
+                i, j = yield q.get()
+                items[i].append(j)
 
-    for future in terminated_puts:
-        with pytest.raises(GreatSadness):
-            yield future
+        IOLoop(make_current=False).run_sync(run)
 
+    num_producers = int(num_threads / 2)
+    threads = [
+        threading.Thread(target=producer, args=(i,))
+        for i in range(num_producers)
+    ] + [
+        threading.Thread(target=consumer)
+        for i in range(num_threads - num_producers)
+    ]
 
-@pytest.mark.gen_test
-def test_get_timeout_too_late():
-    queue = Queue()
-    future = queue.get(timedelta(seconds=0.01))
-    yield queue.put(42)
-    yield gen.sleep(0.01)
-    assert 42 == (yield future)
+    for t in threads:
+        t.start()
 
+    for t in threads:
+        t.join()
 
-@pytest.mark.gen_test
-def test_put_timeout_too_late():
-    queue = Queue(1)
-    yield queue.put(1)
-    future = queue.put(2, timedelta(seconds=0.01))
-    assert (yield queue.get()) == 1
-    assert (yield queue.get()) == 2
-    yield gen.sleep(0.01)
-    yield future
+    for i in range(num_producers):
+        assert len(items[i]) == num_items
+        assert set(items[i]) == set(range(num_items))
+        # As with the previous concurrent consumers case, we cannot guarantee
+        # ordering, only that the items are all present.

From 3f0ea8a4c68d3feab352a05e867328b9a4bf33e3 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Thu, 17 Mar 2016 15:18:27 -0700
Subject: [PATCH 52/67] Lower number of threads for queue concurrency test

This allows it to run in less than a second
---
 tests/test_queue.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_queue.py b/tests/test_queue.py
index ed0d156b..76503ec6 100644
--- a/tests/test_queue.py
+++ b/tests/test_queue.py
@@ -90,7 +90,7 @@ def test_get_then_put(items):
 @pytest.mark.gen_test
 @pytest.mark.concurrency_test
 def test_concurrent_producers_single_consumer():
-    num_threads = 1000
+    num_threads = 100
     num_items = 10
     q = Queue()
 
@@ -126,7 +126,7 @@ def run():
 @pytest.mark.gen_test
 @pytest.mark.concurrency_test
 def test_concurrent_consumers_single_producer():
-    num_threads = 1000
+    num_threads = 100
     num_items = 10
 
     q = Queue()
@@ -165,7 +165,7 @@ def run():
 @pytest.mark.gen_test
 @pytest.mark.concurrency_test
 def test_concurrent_producers_and_consumers():
-    num_threads = 1000
+    num_threads = 100
     num_items = 10
 
     q = Queue()

From 218d167448bd958f8fe138683363bfb3e0608a62 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Fri, 18 Mar 2016 13:01:53 -0700
Subject: [PATCH 53/67] Lower "cannot register on sync TChannel" to info.

---
 CHANGES.rst             | 3 ++-
 tchannel/sync/client.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index b15c426e..218bc1d3 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -4,7 +4,8 @@ Changes by Version
 0.21.11 (unreleased)
 --------------------
 
-- Nothing changed yet.
+- Attempting to register endpoints against a synchronous TChannel now logs an
+  INFO level message.
 
 
 0.21.10 (2016-03-17)
diff --git a/tchannel/sync/client.py b/tchannel/sync/client.py
index c65b6ad3..b4117a86 100644
--- a/tchannel/sync/client.py
+++ b/tchannel/sync/client.py
@@ -141,7 +141,7 @@ def register(self, *args, **kwargs):
 
 
 def _register(*args, **kwargs):
-    log.warning("Registration not yet supported for sync tchannel.")
+    log.info("Registration not yet supported for sync tchannel.")
 
     def decorator(fn):
         return fn

From baa3dec0272ea17d92203c27e9e52017b8b69a2d Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Wed, 23 Mar 2016 15:15:07 -0700
Subject: [PATCH 54/67] Calls should reraise with the origianl traceback when
 retries run out

---
 tchannel/tornado/peer.py | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py
index 71323957..c6a6c1be 100644
--- a/tchannel/tornado/peer.py
+++ b/tchannel/tornado/peer.py
@@ -22,6 +22,7 @@
     absolute_import, division, print_function, unicode_literals
 )
 
+import sys
 import logging
 import random
 from collections import deque
@@ -431,19 +432,23 @@ def send_with_retry(self, request, peer, retry_limit, connection):
             try:
                 response = yield self._send(connection, request)
                 raise gen.Return(response)
-            except TChannelError as error:
-                blacklist.add(peer.hostport)
-                (peer, connection) = yield self._prepare_for_retry(
-                    request=request,
-                    connection=connection,
-                    protocol_error=error,
-                    blacklist=blacklist,
-                    num_of_attempt=num_of_attempt,
-                    max_retry_limit=retry_limit,
-                )
-
-                if not connection:
-                    raise error
+            except TChannelError:
+                (typ, error, tb) = sys.exc_info()
+                try:
+                    blacklist.add(peer.hostport)
+                    (peer, connection) = yield self._prepare_for_retry(
+                        request=request,
+                        connection=connection,
+                        protocol_error=error,
+                        blacklist=blacklist,
+                        num_of_attempt=num_of_attempt,
+                        max_retry_limit=retry_limit,
+                    )
+
+                    if not connection:
+                        raise typ, error, tb
+                finally:
+                    del tb  # for GC
 
     @gen.coroutine
     def _prepare_for_retry(

From c0f62f5aec94f04b64e717a8b2c69b71674e2f3c Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Thu, 24 Mar 2016 08:22:28 -0700
Subject: [PATCH 55/67] improve retry_limit docs

---
 tchannel/schemes/json.py   | 11 +++++++++--
 tchannel/schemes/raw.py    | 11 +++++++++--
 tchannel/schemes/thrift.py | 11 +++++++++--
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/tchannel/schemes/json.py b/tchannel/schemes/json.py
index 51d7bad0..f957b64d 100644
--- a/tchannel/schemes/json.py
+++ b/tchannel/schemes/json.py
@@ -82,8 +82,15 @@ def __call__(
         :param string retry_on:
             What events to retry on - valid values can be found in
             ``tchannel.retry``.
-        :param string retry_limit:
-            How many times to retry before
+        :param int retry_limit:
+            How many attempts should be made (in addition to the initial
+            attempt) to re-send this request when retryable error conditions
+            (specified by ``retry_on``) are encountered.
+
+            Defaults to ``tchannel.retry.DEFAULT_RETRY_LIMIT`` (4).
+
+            Note that the maximum possible time elapsed for a request is thus
+            ``(retry_limit + 1) * timeout``.
         :param string hostport:
             A 'host:port' value to use when making a request directly to a
             TChannel service, bypassing Hyperbahn.
diff --git a/tchannel/schemes/raw.py b/tchannel/schemes/raw.py
index ffdb12b0..f49113af 100644
--- a/tchannel/schemes/raw.py
+++ b/tchannel/schemes/raw.py
@@ -80,8 +80,15 @@ def __call__(
         :param string retry_on:
             What events to retry on - valid values can be found in
             ``tchannel.retry``.
-        :param string retry_limit:
-            How many times to retry before
+        :param int retry_limit:
+            How many attempts should be made (in addition to the initial
+            attempt) to re-send this request when retryable error conditions
+            (specified by ``retry_on``) are encountered.
+
+            Defaults to ``tchannel.retry.DEFAULT_RETRY_LIMIT`` (4).
+
+            Note that the maximum possible time elapsed for a request is thus
+            ``(retry_limit + 1) * timeout``.
         :param string hostport:
             A 'host:port' value to use when making a request directly to a
             TChannel service, bypassing Hyperbahn.
diff --git a/tchannel/schemes/thrift.py b/tchannel/schemes/thrift.py
index c1fbdf32..5b11df0b 100644
--- a/tchannel/schemes/thrift.py
+++ b/tchannel/schemes/thrift.py
@@ -102,8 +102,15 @@ def __call__(
         :param string retry_on:
             What events to retry on - valid values can be found in
             ``tchannel.retry``.
-        :param string retry_limit:
-            How many times to retry before
+        :param int retry_limit:
+            How many attempts should be made (in addition to the initial
+            attempt) to re-send this request when retryable error conditions
+            (specified by ``retry_on``) are encountered.
+
+            Defaults to ``tchannel.retry.DEFAULT_RETRY_LIMIT`` (4).
+
+            Note that the maximum possible time elapsed for a request is thus
+            ``(retry_limit + 1) * timeout``.
         :param routing_delegate:
             Name of a service to which the request router should forward the
             request instead of the service specified in the call req.

From 00f4a0692cba04096bce9e659a9fb663d9e4bd90 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Wed, 23 Mar 2016 15:15:07 -0700
Subject: [PATCH 56/67] Calls should reraise with the origianl traceback when
 retries run out

---
 tchannel/tornado/peer.py | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py
index 71323957..c6a6c1be 100644
--- a/tchannel/tornado/peer.py
+++ b/tchannel/tornado/peer.py
@@ -22,6 +22,7 @@
     absolute_import, division, print_function, unicode_literals
 )
 
+import sys
 import logging
 import random
 from collections import deque
@@ -431,19 +432,23 @@ def send_with_retry(self, request, peer, retry_limit, connection):
             try:
                 response = yield self._send(connection, request)
                 raise gen.Return(response)
-            except TChannelError as error:
-                blacklist.add(peer.hostport)
-                (peer, connection) = yield self._prepare_for_retry(
-                    request=request,
-                    connection=connection,
-                    protocol_error=error,
-                    blacklist=blacklist,
-                    num_of_attempt=num_of_attempt,
-                    max_retry_limit=retry_limit,
-                )
-
-                if not connection:
-                    raise error
+            except TChannelError:
+                (typ, error, tb) = sys.exc_info()
+                try:
+                    blacklist.add(peer.hostport)
+                    (peer, connection) = yield self._prepare_for_retry(
+                        request=request,
+                        connection=connection,
+                        protocol_error=error,
+                        blacklist=blacklist,
+                        num_of_attempt=num_of_attempt,
+                        max_retry_limit=retry_limit,
+                    )
+
+                    if not connection:
+                        raise typ, error, tb
+                finally:
+                    del tb  # for GC
 
     @gen.coroutine
     def _prepare_for_retry(

From dadf4fb33bf38c3152fd654208689ce8fedf4b64 Mon Sep 17 00:00:00 2001
From: junchaowu <junchaowu1989@gmail.com>
Date: Thu, 3 Dec 2015 15:56:37 -0800
Subject: [PATCH 57/67] Cherry-pick #320

Original message:

Merge pull request #320 from uber/pending

Add list to track outbound pending req/res
---
 tchannel/tornado/connection.py   | 44 +++++++++++++++++++++---------
 tchannel/tornado/peer.py         |  9 ++++++
 tests/tornado/test_connection.py | 47 ++++++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+), 13 deletions(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 1ef9bda3..7854e281 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -122,7 +122,13 @@ def __init__(self, connection, tchannel=None, direction=None):
         self._messages = queues.Queue()
 
         # Map from message ID to futures for responses of outgoing calls.
-        self._outstanding = {}
+        self._outbound_pending_call = {}
+
+        # Map from message ID to outgoing request that is being sent.
+        self._outbound_pending_req = {}
+
+        # Map from message ID to outgoing response that is being sent.
+        self._outbound_pending_res = {}
 
         # Collection of request IDs known to have timed out.
         self._request_tombstones = Cemetery()
@@ -139,6 +145,13 @@ def __init__(self, connection, tchannel=None, direction=None):
 
         connection.set_close_callback(self._on_close)
 
+    @property
+    def total_outbound_pendings(self):
+        """Returns the total number of pending outbound requests and
+        responses."""
+        return (len(self._outbound_pending_res) +
+                len(self._outbound_pending_req))
+
     def set_close_callback(self, cb):
         """Specify a function to be called when this connection is closed.
 
@@ -155,13 +168,13 @@ def _on_close(self):
         self.closed = True
         self._request_tombstones.clear()
 
-        for message_id, future in self._outstanding.iteritems():
+        for message_id, future in self._outbound_pending_call.iteritems():
             future.set_exception(
                 NetworkError(
                     "canceling outstanding request %d" % message_id
                 )
             )
-        self._outstanding = {}
+        self._outbound_pending_call = {}
 
         try:
             while True:
@@ -202,10 +215,10 @@ def _loop(self):
                 self._messages.put(message)
                 continue
 
-            elif message.id in self._outstanding:
+            elif message.id in self._outbound_pending_call:
                 # set exception if receive error message
                 if message.message_type == Types.ERROR:
-                    future = self._outstanding.pop(message.id)
+                    future = self._outbound_pending_call.pop(message.id)
                     if future.running():
                         error = TChannelError.from_code(
                             message.code,
@@ -232,9 +245,9 @@ def _loop(self):
                 if (message.message_type in self.CALL_RES_TYPES and
                         message.flags == FlagsType.fragment):
                     # still streaming, keep it for record
-                    future = self._outstanding.get(message.id)
+                    future = self._outbound_pending_call.get(message.id)
                 else:
-                    future = self._outstanding.pop(message.id)
+                    future = self._outbound_pending_call.pop(message.id)
 
                 if response and future.running():
                     future.set_result(response)
@@ -264,12 +277,12 @@ def send(self, message):
         )
 
         message.id = message.id or self.writer.next_message_id()
-        assert message.id not in self._outstanding, (
+        assert message.id not in self._outbound_pending_call, (
             "Message ID '%d' already being used" % message.id
         )
 
         future = tornado.gen.Future()
-        self._outstanding[message.id] = future
+        self._outbound_pending_call[message.id] = future
         self.write(message)
         return future
 
@@ -544,6 +557,7 @@ def _stream(self, context, message_factory):
     @tornado.gen.coroutine
     def post_response(self, response):
         try:
+            self._outbound_pending_res[response.id] = response
             # TODO: before_send_response
             yield self._stream(response, self.response_message_factory)
 
@@ -553,6 +567,7 @@ def post_response(self, response):
                 response,
             )
         finally:
+            self._outbound_pending_res.pop(response.id, None)
             response.close_argstreams(force=True)
 
     def stream_request(self, request):
@@ -579,13 +594,16 @@ def send_request(self, request):
         """
         assert self._loop_running, "Perform a handshake first."
 
-        assert request.id not in self._outstanding, (
+        assert request.id not in self._outbound_pending_call, (
             "Message ID '%d' already being used" % request.id
         )
 
         future = tornado.gen.Future()
-        self._outstanding[request.id] = future
-        self.stream_request(request)
+        self._outbound_pending_call[request.id] = future
+        self._outbound_pending_req[request.id] = request
+        self.stream_request(request).add_done_callback(
+            lambda f: self._outbound_pending_req.pop(request.id, None)
+        )
 
         if request.ttl:
             self._add_timeout(request, future)
@@ -616,7 +634,7 @@ def adapt_result(self, f, request, response_future):
 
     def remove_outstanding_request(self, request):
         """Remove request from pending request list"""
-        self._outstanding.pop(request.id, None)
+        self._outbound_pending_call.pop(request.id, None)
 
     def _add_timeout(self, request, future):
         """Adds a timeout for the given request to the given future."""
diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py
index c6a6c1be..9dc70ac8 100644
--- a/tchannel/tornado/peer.py
+++ b/tchannel/tornado/peer.py
@@ -177,6 +177,15 @@ def incoming_connections(self):
             takewhile(lambda c: c.direction == INCOMING, self._connections)
         )
 
+    @property
+    def total_outbound_pendings(self):
+        """Return the total number of out pending req/res among connections"""
+        num = 0
+        for con in self.connections:
+            num += con.total_outbound_pendings
+
+        return num
+
     @property
     def is_ephemeral(self):
         """Whether this Peer is ephemeral."""
diff --git a/tests/tornado/test_connection.py b/tests/tornado/test_connection.py
index c1cc69c8..f53aa533 100644
--- a/tests/tornado/test_connection.py
+++ b/tests/tornado/test_connection.py
@@ -31,6 +31,7 @@
 from tchannel import messages
 from tchannel.errors import TimeoutError, ReadError
 from tchannel.tornado import connection
+from tchannel.tornado.message_factory import MessageFactory
 
 
 def dummy_headers():
@@ -149,6 +150,52 @@ def test_other_error_on_read(tornado_pair):
     assert mock_log.info.call_count == 0
 
 
+@pytest.mark.gen_test
+def test_pending_outgoing():
+    server = TChannel('server')
+    server.listen()
+
+    @server.raw.register
+    def hello(request):
+        assert server._dep_tchannel.peers.peers[0].total_outbound_pendings == 1
+        return 'hi'
+
+    client = TChannel('client')
+    yield client.raw(
+        hostport=server.hostport,
+        body='work',
+        endpoint='hello',
+        service='server'
+    )
+
+    client_peer = client._dep_tchannel.peers.peers[0]
+    server_peer = server._dep_tchannel.peers.peers[0]
+    assert client_peer.total_outbound_pendings == 0
+    assert server_peer.total_outbound_pendings == 0
+
+    class FakeMessageFactory(MessageFactory):
+        def build_raw_message(self, context, args, is_completed=True):
+            assert client_peer.total_outbound_pendings == 1
+            return super(FakeMessageFactory, self).build_raw_message(
+                context, args, is_completed,
+            )
+
+    client_conn = client_peer.connections[0]
+    client_conn.request_message_factory = FakeMessageFactory(
+        client_conn.remote_host,
+        client_conn.remote_host_port,
+    )
+    yield client.raw(
+        hostport=server.hostport,
+        body='work',
+        endpoint='hello',
+        service='server'
+    )
+
+    assert client_peer.total_outbound_pendings == 0
+    assert server_peer.total_outbound_pendings == 0
+
+
 @pytest.mark.gen_test
 def test_writer_write_error():
     server, client = socket.socketpair()

From ad1a60acc50fd953aa74504363a8c9f45e6954cb Mon Sep 17 00:00:00 2001
From: junchaowu <junchaowu1989@gmail.com>
Date: Thu, 21 Jan 2016 15:15:25 -0800
Subject: [PATCH 58/67] Cherry-pick heap-based peer selection (#355).

Original message:

Merge pull request #355 from uber/heap

Heap selection branch merged into mash develop branch.
---
 benchmarks/test_concurrent_requests.py |  99 +++++++++++++
 tchannel/container/__init__.py         |   0
 tchannel/container/heap.py             | 179 +++++++++++++++++++++++
 tchannel/peer_heap.py                  | 157 ++++++++++++++++++++
 tchannel/peer_strategy.py              |  60 ++++++++
 tchannel/tornado/connection.py         |  38 ++++-
 tchannel/tornado/dispatch.py           |  13 +-
 tchannel/tornado/peer.py               | 110 ++++++++++----
 tchannel/tornado/tchannel.py           |   4 +-
 tests/conftest.py                      |   6 +
 tests/container/test_heap.py           | 187 ++++++++++++++++++++++++
 tests/test_event.py                    |   2 +-
 tests/test_peer_heap.py                | 192 +++++++++++++++++++++++++
 tests/test_peer_strategy.py            |  78 ++++++++++
 tests/tornado/test_connection.py       |  50 +++++++
 tests/tornado/test_peer.py             | 124 +++++++++++++++-
 16 files changed, 1262 insertions(+), 37 deletions(-)
 create mode 100644 benchmarks/test_concurrent_requests.py
 create mode 100644 tchannel/container/__init__.py
 create mode 100644 tchannel/container/heap.py
 create mode 100644 tchannel/peer_heap.py
 create mode 100644 tchannel/peer_strategy.py
 create mode 100644 tests/container/test_heap.py
 create mode 100644 tests/test_peer_heap.py
 create mode 100644 tests/test_peer_strategy.py

diff --git a/benchmarks/test_concurrent_requests.py b/benchmarks/test_concurrent_requests.py
new file mode 100644
index 00000000..09898702
--- /dev/null
+++ b/benchmarks/test_concurrent_requests.py
@@ -0,0 +1,99 @@
+# Copyright (c) 2015 Uber Technologies, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+from __future__ import (
+    absolute_import, unicode_literals, print_function, division
+)
+
+from tornado import gen, ioloop
+from tchannel import TChannel
+
+
+servers = None
+client = None
+
+
+def setUpServer(num):
+    servers = []
+    for i in xrange(num):
+        server = TChannel('server' + str(i))
+
+        @server.raw.register
+        def hello(request):
+            return 'hello'
+        server.listen()
+        servers.append(server)
+     
+    return servers
+
+
+@gen.coroutine
+def setUpClient(servers):
+    router = []
+    for i in xrange(1000):
+        router.append('1.1.1.1:'+str(i))
+    client = TChannel('client', known_peers=router)
+
+    @client.raw.register
+    def hello(request):
+        return 'hello'
+        
+    client.listen()
+
+    for i in xrange(100):
+        server = servers[i]
+        yield server.raw(
+            service='server',
+            endpoint='hello',
+            body='hi',
+            hostport=client.hostport,
+        )
+
+    raise gen.Return(client)
+
+
+@gen.coroutine
+def peer_test():
+    global servers, client
+    fs = []
+    for _ in xrange(100):
+        fs.append(client.raw(
+            service='server',
+            endpoint='hello',
+            body='hi',
+            timeout=1000
+        ))
+    yield fs
+
+
+def stress_test():
+    ioloop.IOLoop.current().run_sync(peer_test)
+
+
+@gen.coroutine
+def setup():
+    global servers, client
+    servers = setUpServer(100)
+    client = yield setUpClient(servers)
+
+
+def test_peer_heap(benchmark):
+    ioloop.IOLoop.current().run_sync(setup)
+    benchmark(stress_test)
diff --git a/tchannel/container/__init__.py b/tchannel/container/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tchannel/container/heap.py b/tchannel/container/heap.py
new file mode 100644
index 00000000..5eeb96ac
--- /dev/null
+++ b/tchannel/container/heap.py
@@ -0,0 +1,179 @@
+# Copyright (c) 2015 Uber Technologies, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+from __future__ import absolute_import
+from __future__ import division
+
+import math
+import six
+from collections import deque
+
+
+class NoMatchError(Exception):
+    pass
+
+
+class HeapOperation(object):
+    """HeapOperation defines the interface on how to manipulate the heap.
+    Any extension of heap class should implement all the heap operations in
+    order to have complete heap functions.
+    """
+
+    def lt(self, i, j):
+        """Compare the items in position i and j of the heap.
+
+        :param i: the first item's position of the heap list
+        :param j: the second item's position of the heap list
+        :return true if items[i] < items[j], otherwise false.
+        """
+        raise NotImplementedError()
+
+    def push(self, x):
+        """Push a new item into heap"""
+        raise NotImplementedError()
+
+    def pop(self):
+        """Pop an item from the heap"""
+        raise NotImplementedError()
+
+    def peek(self, i):
+        """Peek at the item at the given position without removing it from the
+        heap.
+
+        :param i:
+            0-indexed position of the iteam in the heap
+        """
+        raise NotImplementedError()
+
+    def swap(self, i, j):
+        """swap items between position i and j of the heap"""
+        raise NotImplementedError()
+
+    def size(self):
+        """Return length of the heap."""
+        raise NotImplementedError()
+
+
+def init(h):
+    """Initialize existing object into the heap."""
+    # heapify
+    n = h.size()
+    for i in six.moves.range(int(math.floor(n/2)) - 1, -1, -1):
+        down(h, i, n)
+
+
+def push(h, x):
+    """Push a new value into heap."""
+    h.push(x)
+    up(h, h.size()-1)
+
+
+def pop(h):
+    """Pop the heap value from the heap."""
+    n = h.size() - 1
+    h.swap(0, n)
+    down(h, 0, n)
+    return h.pop()
+
+
+def remove(h, i):
+    """Remove the item at position i of the heap."""
+    n = h.size() - 1
+    if n != i:
+        h.swap(i, n)
+        down(h, i, n)
+        up(h, i)
+
+    return h.pop()
+
+
+def fix(h, i):
+    """Rearrange the heap after the item at position i got updated."""
+    down(h, i, h.size())
+    up(h, i)
+
+
+def up(h, child):
+    while child > 0:
+        parent = int(math.floor((child - 1) / 2))
+        if not h.lt(child, parent):
+            break
+
+        h.swap(parent, child)
+        child = parent
+
+
+def down(h, parent, n):
+    while True:
+        child1 = 2 * parent + 1
+        if child1 >= n or child1 < 0:
+            break
+
+        min_child = child1
+        child2 = child1 + 1
+        if child2 < n and not h.lt(child1, child2):
+            min_child = child2
+
+        if not h.lt(min_child, parent):
+            break
+
+        h.swap(parent, min_child)
+        parent = min_child
+
+
+def smallest(heap, predicate):
+    """Finds the index of the smallest item in the heap that matches the given
+    predicate.
+
+    :param heap:
+        Heap on which this search is being performed.
+    :param predicate:
+        Function that accepts an item from the heap and returns true or false.
+    :returns:
+        Index of the first item for which ``predicate`` returned true.
+    :raises NoMatchError:
+        If no matching items were found.
+    """
+    n = heap.size()
+
+    # items contains indexes of items yet to be checked.
+    items = deque([0])
+    while items:
+        current = items.popleft()
+        if current >= n:
+            continue
+
+        if predicate(heap.peek(current)):
+            return current
+
+        child1 = 2 * current + 1
+        child2 = child1 + 1
+
+        if child1 < n and child2 < n and heap.lt(child2, child1):
+            # make sure we check the smaller child first.
+            child1, child2 = child2, child1
+
+        if child1 < n:
+            items.append(child1)
+
+        if child2 < n:
+            items.append(child2)
+
+    raise NoMatchError()
diff --git a/tchannel/peer_heap.py b/tchannel/peer_heap.py
new file mode 100644
index 00000000..34152c7d
--- /dev/null
+++ b/tchannel/peer_heap.py
@@ -0,0 +1,157 @@
+# Copyright (c) 2015 Uber Technologies, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+from __future__ import absolute_import
+
+import random
+
+from .container import heap
+from .container.heap import HeapOperation
+from .container.heap import NoMatchError
+from .container.heap import smallest
+
+
+class PeerHeap(HeapOperation):
+    """PeerHeap maintains a min-heap of peers based on their ranks.
+
+    Peer in the heap will be arranged based on the peer's rank and peer's
+    order. Order is equal to peer heap's current order number plus random value
+    within the current peer size. It solves two problems when peers are in
+    same rank:
+
+    Dead peers: If the order is completely random, then an unlucky peer
+    with a very bad assigned order may never get selected.
+
+    Determinism: If the insertion order is used as-is, then all TChannel
+    instances would follow the same selection pattern, causing load
+    imbalance. For example if they get the same static list of peers, they
+    will all pick the first one, then the second one, and so on, cycling
+    between which host gets overloaded with requests.
+
+    All in all, it will keep certain level randomization but at the same
+    time make the peer rank not deterministic among different tchannel
+    instances.
+
+    """
+
+    __slots__ = ('peers',)
+
+    def __init__(self):
+        self.peers = []
+        self.order = 0
+
+    def size(self):
+        return len(self.peers)
+
+    def lt(self, i, j):
+        """Compare the priority of two peers.
+
+        Primary comparator will be the rank of each peer. If the ``rank`` is
+        same then compare the ``order``. The ``order`` attribute of the peer
+        tracks the heap push order of the peer. This help solve the imbalance
+        problem caused by randomization when deal with same rank situation.
+
+        :param i: ith peer
+        :param j: jth peer
+        :return: True or False
+        """
+        if self.peers[i].rank == self.peers[j].rank:
+            return self.peers[i].order < self.peers[j].order
+
+        return self.peers[i].rank < self.peers[j].rank
+
+    def peek(self, i):
+        return self.peers[i]
+
+    def push(self, x):
+        x.index = len(self.peers)
+        self.peers.append(x)
+
+    def pop(self):
+        item = self.peers.pop()
+        return item
+
+    def swap(self, i, j):
+        self.peers[i], self.peers[j] = self.peers[j], self.peers[i]
+        self.peers[i].index = i
+        self.peers[j].index = j
+
+    def update_peer(self, peer):
+        """Update the peer's position in the heap after peer's rank changed"""
+        heap.fix(self, peer.index)
+
+    def pop_peer(self):
+        """Pop the top peer from the heap
+
+        :return
+            return the top peer and remove it from the heap if heap is not
+            empty.Otherwise return None.
+        """
+        if not self.peers:
+            return None
+        return heap.pop(self)
+
+    def push_peer(self, peer):
+        """Push a new peer into the heap"""
+
+        self.order += 1
+        peer.order = self.order + random.randint(0, self.size())
+        heap.push(self, peer)
+
+    def peek_peer(self):
+        """Return the top peer of the heap
+        :return
+            return the top peer if heap is not empty. Otherwise return None.
+        """
+
+        if not self.peers:
+            return None
+
+        return self.peers[0]
+
+    def remove_peer(self, peer):
+        """Remove the peer from the heap.
+
+        Return: removed peer if peer exists. If peer's index is out of range,
+        raise IndexError.
+        """
+        if peer.index < 0 or peer.index >= self.size():
+            raise IndexError('Peer index is out of range')
+
+        assert peer is self.peers[peer.index], "peer is not in the heap"
+
+        return heap.remove(self, peer.index)
+
+    def smallest_peer(self, predicate):
+        """Return the smallest peer in the heap that matches the given
+        predicate.
+
+        :param predicate:
+            Function that accepts an item from the heap and returns
+            true or false.
+        :returns:
+            The first peer for which ``predicate`` returned true.
+        """
+        try:
+            return self.peek(
+                smallest(self, predicate),
+            )
+        except NoMatchError:
+            return None
diff --git a/tchannel/peer_strategy.py b/tchannel/peer_strategy.py
new file mode 100644
index 00000000..5e6042e7
--- /dev/null
+++ b/tchannel/peer_strategy.py
@@ -0,0 +1,60 @@
+# Copyright (c) 2015 Uber Technologies, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+from __future__ import absolute_import
+
+import sys
+
+
+class RankCalculator(object):
+    """RankCalculator calculates the rank of a peer."""
+
+    def get_rank(self, peer):
+        raise NotImplementedError()
+
+
+class PreferIncomingCalculator(RankCalculator):
+
+    # TIERS lists three ranges for three different kinds of peers.
+    # 0: ephemeral peers or unconnected peers
+    # 1: peers with only outgoing connections
+    # 2: peers with incoming connections
+    TIERS = [sys.maxint, sys.maxint / 2, 0]
+
+    def get_rank(self, peer):
+        """Calculate the peer rank based on connections.
+
+        If the peer has no incoming connections, it will have largest rank.
+        In our peer selection strategy, the largest number has least priority
+        in the heap.
+
+        If the peer has incoming connections, we will return number of outbound
+        pending requests and responses.
+
+        :param peer: instance of `tchannel.tornado.peer.Peer`
+        :return: rank of the peer
+        """
+        if peer.is_ephemeral or not peer.connections:
+            return self.TIERS[0]
+
+        if not peer.incoming_connections:
+            return self.TIERS[1] + peer.total_outbound_pendings
+
+        return self.TIERS[2] + peer.total_outbound_pendings
diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 7854e281..9383d13a 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -139,6 +139,9 @@ def __init__(self, connection, tchannel=None, direction=None):
 
         self.tchannel = tchannel
         self._close_cb = None
+        # callback that will be called when there is a change in the outbound
+        # pending request/response lists.
+        self._outbound_pending_change_cb = None
 
         self.reader = Reader(self.connection)
         self.writer = Writer(self.connection)
@@ -152,6 +155,12 @@ def total_outbound_pendings(self):
         return (len(self._outbound_pending_res) +
                 len(self._outbound_pending_req))
 
+    def set_outbound_pending_change_callback(self, cb):
+        """Specify a function to be called when outbound pending request or
+        response list changed.
+        """
+        self._outbound_pending_change_cb = cb
+
     def set_close_callback(self, cb):
         """Specify a function to be called when this connection is closed.
 
@@ -485,6 +494,27 @@ def ping(self):
     def pong(self):
         return self.writer.put(messages.PingResponseMessage())
 
+    def _outbound_pending_change(self):
+        """Called when there is a change in the outbound pending list."""
+        if self._outbound_pending_change_cb:
+            self._outbound_pending_change_cb()
+
+    def add_outbound_pending_res(self, res):
+        self._outbound_pending_res[res.id] = res
+        self._outbound_pending_change()
+
+    def add_outbound_pending_req(self, req):
+        self._outbound_pending_req[req.id] = req
+        self._outbound_pending_change()
+
+    def remove_outbound_pending_res(self, res):
+        self._outbound_pending_res.pop(res.id, None)
+        self._outbound_pending_change()
+
+    def remove_outbound_pending_req(self, req):
+        self._outbound_pending_req.pop(req.id, None)
+        self._outbound_pending_change()
+
 
 class StreamConnection(TornadoConnection):
     """Streaming request/response into protocol messages and sent by tornado
@@ -557,7 +587,7 @@ def _stream(self, context, message_factory):
     @tornado.gen.coroutine
     def post_response(self, response):
         try:
-            self._outbound_pending_res[response.id] = response
+            self.add_outbound_pending_res(response)
             # TODO: before_send_response
             yield self._stream(response, self.response_message_factory)
 
@@ -567,7 +597,7 @@ def post_response(self, response):
                 response,
             )
         finally:
-            self._outbound_pending_res.pop(response.id, None)
+            self.remove_outbound_pending_res(response)
             response.close_argstreams(force=True)
 
     def stream_request(self, request):
@@ -600,9 +630,9 @@ def send_request(self, request):
 
         future = tornado.gen.Future()
         self._outbound_pending_call[request.id] = future
-        self._outbound_pending_req[request.id] = request
+        self.add_outbound_pending_req(request)
         self.stream_request(request).add_done_callback(
-            lambda f: self._outbound_pending_req.pop(request.id, None)
+            lambda f: self.remove_outbound_pending_req(request)
         )
 
         if request.ttl:
diff --git a/tchannel/tornado/dispatch.py b/tchannel/tornado/dispatch.py
index b578f48f..dc0fb3a3 100644
--- a/tchannel/tornado/dispatch.py
+++ b/tchannel/tornado/dispatch.py
@@ -138,10 +138,7 @@ def handle_call(self, request, connection):
         request.tracing.name = request.endpoint
         tchannel.event_emitter.fire(EventType.before_receive_request, request)
 
-        handler = self.handlers.get(request.endpoint)
-
-        if handler is None:
-            handler = self.handlers[self.FALLBACK]
+        handler = self.get_endpoint(request.endpoint)
 
         requested_as = request.headers.get('as', None)
         expected_as = handler.req_serializer.name
@@ -250,6 +247,14 @@ def _on_post_response(future):
 
         raise gen.Return(response)
 
+    def get_endpoint(self, name):
+        handler = self.handlers.get(name)
+
+        if handler is None:
+            handler = self.handlers[self.FALLBACK]
+
+        return handler
+
     def register(
             self,
             rule,
diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py
index 9dc70ac8..5d87e96b 100644
--- a/tchannel/tornado/peer.py
+++ b/tchannel/tornado/peer.py
@@ -24,9 +24,11 @@
 
 import sys
 import logging
-import random
+import sys
+
 from collections import deque
 from itertools import takewhile, dropwhile
+
 from tornado import gen
 from tornado.iostream import StreamClosedError
 
@@ -40,6 +42,8 @@
 from ..errors import NetworkError
 from ..event import EventType
 from ..glossary import DEFAULT_TIMEOUT
+from ..peer_heap import PeerHeap
+from ..peer_strategy import PreferIncomingCalculator
 from ..zipkin.annotation import Endpoint
 from ..zipkin.trace import Trace
 from .connection import StreamConnection
@@ -60,8 +64,15 @@ class Peer(object):
         'host',
         'port',
 
+        'rank',
+        'index',
+        'order',
+        'chosen_count',
+        'on_conn_change',
+
         '_connections',
         '_connecting',
+        '_on_conn_change_cb',
     )
 
     # Class used to create new outgoing connections.
@@ -69,13 +80,19 @@ class Peer(object):
     # It must support a .outgoing method.
     connection_class = StreamConnection
 
-    def __init__(self, tchannel, hostport):
+    def __init__(self, tchannel, hostport, rank=None, on_conn_change=None):
         """Initialize a Peer
 
         :param tchannel:
             TChannel through which requests will be made.
         :param hostport:
             Host-port this Peer is for.
+        :param rank:
+            The rank of a peer will affect the chance that the peer gets
+            selected when the client sends outbound requests.
+        :param on_conn_change:
+            A callback method takes Peer object as input and is called whenever
+            there are connection changes in the peer.
         """
         assert hostport, "hostport is required"
 
@@ -93,6 +110,25 @@ def __init__(self, tchannel, hostport):
         # helps avoid making multiple outgoing connections.
         self._connecting = None
 
+        # rank is used to measure the performance of the peer.
+        # It will be used in the peer heap.
+        if rank is not None:
+            self.rank = rank
+        else:
+            self.rank = sys.maxint
+        # index records the position of the peer in the peer heap
+        self.index = -1
+        # order maintains the push order of the peer in the heap.
+        self.order = 0
+        # for debug purpose, count the number of times the peer gets selected.
+        self.chosen_count = 0
+
+        # callback is called when there is a change in connections.
+        self._on_conn_change_cb = on_conn_change
+
+    def set_on_conn_change_callback(self, cb):
+        self._on_conn_change_cb = cb
+
     def connect(self):
         """Get a connection to this peer.
 
@@ -128,8 +164,7 @@ def on_connect(_):
                 # We don't actually need to handle the exception. That's on
                 # the caller.
                 connection = conn_future.result()
-                self._connections.append(connection)
-                self._set_on_close_cb(connection)
+                self.register_outgoing_conn(connection)
             self._connecting = None
 
         conn_future.add_done_callback(on_connect)
@@ -139,13 +174,30 @@ def _set_on_close_cb(self, conn):
 
         def on_close():
             self._connections.remove(conn)
+            self._on_conn_change()
 
         conn.set_close_callback(on_close)
 
-    def register_incoming(self, conn):
+    def register_outgoing_conn(self, conn):
+        """Add outgoing connection into the heap."""
+        assert conn, "conn is required"
+        conn.set_outbound_pending_change_callback(self._on_conn_change)
+        self._connections.append(conn)
+        self._set_on_close_cb(conn)
+        self._on_conn_change()
+
+    def register_incoming_conn(self, conn):
+        """Add incoming connection into the heap."""
         assert conn, "conn is required"
+        conn.set_outbound_pending_change_callback(self._on_conn_change)
         self._connections.appendleft(conn)
         self._set_on_close_cb(conn)
+        self._on_conn_change()
+
+    def _on_conn_change(self):
+        """Function will be called any time there is connection changes."""
+        if self._on_conn_change_cb:
+            self._on_conn_change_cb(self)
 
     @property
     def hostport(self):
@@ -519,6 +571,8 @@ class PeerGroup(object):
 
     __slots__ = (
         'tchannel',
+        'peer_heap',
+        'rank_calculator',
         '_peers',
         '_resetting',
         '_reset_condition',
@@ -539,10 +593,8 @@ def __init__(self, tchannel):
         # to block on the same reset.
         self._resetting = False
 
-        # We'll create a Condition here later. We want to avoid it right now
-        # because it has a side-effect of scheduling some dummy work on the
-        # ioloop, which prevents us from forking (if you're into that).
-        self._reset_condition = None
+        self.peer_heap = PeerHeap()
+        self.rank_calculator = PreferIncomingCalculator()
 
     def __str__(self):
         return "<PeerGroup peers=%s>" % str(self._peers)
@@ -572,7 +624,8 @@ def get(self, hostport):
         """
         assert hostport, "hostport is required"
         if hostport not in self._peers:
-            self._peers[hostport] = self.peer_class(self.tchannel, hostport)
+            self.add(hostport)
+
         return self._peers[hostport]
 
     def lookup(self, hostport):
@@ -591,7 +644,10 @@ def remove(self, hostport):
         :returns: The removed Peer
         """
         assert hostport, "hostport is required"
-        return self._peers.pop(hostport, None)
+        peer = self._peers.pop(hostport, None)
+        if peer:
+            self.peer_heap.remove_peer(peer)
+        return peer
 
     def add(self, peer):
         """Add an existing Peer to this group.
@@ -602,13 +658,27 @@ def add(self, peer):
 
         if isinstance(peer, basestring):
             # Assume strings are host-ports
-            peer = self.peer_class(self.tchannel, peer)
+            peer = self.peer_class(
+                tchannel=self.tchannel,
+                hostport=peer,
+                on_conn_change=self._update_heap,
+            )
 
         assert peer.hostport not in self._peers, (
             "%s already has a peer" % peer.hostport
         )
-
+        peer.rank = self.rank_calculator.get_rank(peer)
         self._peers[peer.hostport] = peer
+        self.peer_heap.push_peer(peer)
+
+    def _update_heap(self, peer):
+        """Recalculate the peer's rank and update itself in the peer heap."""
+        rank = self.rank_calculator.get_rank(peer)
+        if rank == peer.rank:
+            return
+
+        peer.rank = rank
+        self.peer_heap.update_peer(peer)
 
     @property
     def hosts(self):
@@ -659,14 +729,6 @@ def choose(self, hostport=None, blacklist=None):
         if hostport:
             return self.get(hostport)
 
-        hosts = self._peers.viewkeys() - blacklist
-        if not hosts:
-            return None
-
-        peers = list(self._connected_peers(hosts))
-        if peers:
-            return peers[random.randint(0, len(peers)-1)]
-        else:
-            hosts = list(hosts)
-            host = hosts[random.randint(0, len(hosts)-1)]
-            return self.get(host)
+        return self.peer_heap.smallest_peer(
+            (lambda p: p.hostport not in blacklist),
+        )
diff --git a/tchannel/tornado/tchannel.py b/tchannel/tornado/tchannel.py
index ddbf57f2..64e4b790 100644
--- a/tchannel/tornado/tchannel.py
+++ b/tchannel/tornado/tchannel.py
@@ -238,7 +238,7 @@ def request(self,
             Defaults to 'c'.
         """
         # TODO disallow certain parameters or don't propagate them backwards.
-        # For example, blacklist and score threshold aren't really
+        # For example, blacklist and rank threshold aren't really
         # user-configurable right now.
         return self.peers.request(hostport=hostport,
                                   service=service,
@@ -454,7 +454,7 @@ def handle_stream(self, stream, address):
         self.tchannel.peers.get(
             "%s:%s" % (conn.remote_host,
                        conn.remote_host_port)
-        ).register_incoming(conn)
+        ).register_incoming_conn(conn)
 
         yield conn.serve(handler=self._handle)
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 193dcf64..1bc985d4 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -39,6 +39,12 @@ def write(self, payload, callback=None):
     def getvalue(self):
         return self.buff
 
+    def set_outbound_pending_change_callback(self, cb):
+        pass
+
+    def set_close_callback(self, cb):
+        pass
+
 
 @pytest.fixture
 def connection():
diff --git a/tests/container/test_heap.py b/tests/container/test_heap.py
new file mode 100644
index 00000000..e56cb3ae
--- /dev/null
+++ b/tests/container/test_heap.py
@@ -0,0 +1,187 @@
+# Copyright (c) 2015 Uber Technologies, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+from __future__ import absolute_import
+import random
+import sys
+import math
+import pytest
+import six
+from hypothesis import given
+from hypothesis import strategies as st
+
+from tchannel.container import heap
+from tchannel.container.heap import HeapOperation, NoMatchError
+
+
+class IntHeap(HeapOperation):
+    def __init__(self):
+        self.values = []
+
+    def size(self):
+        return len(self.values)
+
+    def peek(self, i):
+        return self.values[i]
+
+    def lt(self, i, j):
+        return self.values[i] < self.values[j]
+
+    def push(self, x):
+        self.values.append(x)
+
+    def pop(self):
+        return self.values.pop()
+
+    def swap(self, i, j):
+        self.values[i], self.values[j] = self.values[j], self.values[i]
+
+
+@pytest.fixture
+def int_heap():
+    return IntHeap()
+
+
+@pytest.fixture(params=['unique', 'duplicate', 'reverse'])
+def values(request):
+    n = 1000
+    v = []
+
+    if request.param == "unique":
+        for i in six.moves.range(n):
+            v.append(i)
+        random.shuffle(v)
+    elif request.param == "duplicate":
+        for _ in six.moves.range(n):
+            v.append(random.randint(0, math.floor(n/20)))
+    elif request.param == "reverse":
+        for i in six.moves.range(n, -1, -1):
+            v.append(i)
+
+    return v
+
+
+def verify(ph, parent):
+    child1 = 2 * parent + 1
+    child2 = 2 * parent + 2
+    if child2 < ph.size():
+        assert not ph.lt(child1, parent)
+        verify(ph, child1)
+
+    if child2 < ph.size():
+        assert not ph.lt(child2, parent)
+        verify(ph, child2)
+
+
+def test_init(int_heap, values):
+    int_heap.values = values
+    heap.init(int_heap)
+    verify(int_heap, 0)
+
+
+def test_push(int_heap, values):
+    for value in values:
+        heap.push(int_heap, value)
+        verify(int_heap, 0)
+
+    assert int_heap.size() == len(values)
+
+
+def test_pop(int_heap, values):
+    for value in values:
+        heap.push(int_heap, value)
+        verify(int_heap, 0)
+
+    for i in sorted(values):
+        assert i == heap.pop(int_heap)
+        verify(int_heap, 0)
+
+    assert int_heap.size() == 0
+
+
+def test_remove(int_heap, values):
+    for value in values:
+        heap.push(int_heap, value)
+        verify(int_heap, 0)
+
+    # random remove item from the heap
+    n = len(values)
+    for i in six.moves.range(n - 1, -1, -1):
+        heap.remove(int_heap, random.randint(0, i))
+        verify(int_heap, 0)
+
+
+def test_smallest_basic(int_heap, values):
+    for value in values:
+        heap.push(int_heap, value)
+        verify(int_heap, 0)
+
+    assert heap.smallest(int_heap, (lambda _: True)) == 0
+
+    with pytest.raises(NoMatchError):
+        heap.smallest(int_heap, (lambda _: False))
+
+
+def test_smallest_empty(int_heap):
+    with pytest.raises(NoMatchError):
+        heap.smallest(int_heap, (lambda _: True))
+
+
+def test_smallest_unordered_children(int_heap):
+    int_heap.values = [1, 4, 2]
+    verify(int_heap, 0)
+
+    assert heap.smallest(int_heap, (lambda x: x % 2 == 0)) == 2
+
+
+@given(st.lists(st.integers(), min_size=1))
+def test_smallest_random(values):
+    int_heap = IntHeap()
+    for v in values:
+        heap.push(int_heap, v)
+
+    target = random.choice(int_heap.values)
+    valid = [i for (i, v) in enumerate(int_heap.values) if v == target]
+    assert heap.smallest(int_heap, (lambda x: x == target)) in valid
+
+
+@pytest.mark.heapfuzz
+@pytest.mark.skipif(True, reason='stress test for the value heap operations')
+def test_heap_fuzz(int_heap):
+    for _ in six.moves.range(random.randint(1, 100000)):
+        ops = random.randint(0, 1)
+        if ops == 0:  # push
+            heap.push(int_heap, random.randint(0, sys.maxint))
+        elif ops == 1:  # pop
+            if int_heap.size():
+                heap.pop(int_heap)
+
+        if int_heap.size():
+            assert smallest(int_heap.values) == int_heap.values[0]
+
+        verify(int_heap, 0)
+
+
+def smallest(vs):
+    m = sys.maxint
+    for value in vs:
+        if value < m:
+            m = value
+    return m
diff --git a/tests/test_event.py b/tests/test_event.py
index 6d84ec1f..e68d27e2 100644
--- a/tests/test_event.py
+++ b/tests/test_event.py
@@ -80,7 +80,7 @@ def test_after_send_error_event_called():
                 service='test',
                 arg1='endpoint',
                 hostport=tchannel.hostport,
-                timeout=0.02,
+                timeout=0.3,
             )
         mock_fire.assert_any_call(
             mock.ANY, EventType.after_send_error, mock.ANY,
diff --git a/tests/test_peer_heap.py b/tests/test_peer_heap.py
new file mode 100644
index 00000000..9d337850
--- /dev/null
+++ b/tests/test_peer_heap.py
@@ -0,0 +1,192 @@
+# Copyright (c) 2015 Uber Technologies, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+from __future__ import absolute_import
+import sys
+import math
+
+import mock
+import random
+import pytest
+import six
+from tchannel.peer_heap import PeerHeap
+
+
+@pytest.fixture
+def peer_heap():
+    return PeerHeap()
+
+
+@pytest.fixture(params=['unique', 'duplicate', 'reverse'])
+def peers(request):
+    n = 100
+    v = []
+
+    if request.param == "unique":
+        for i in six.moves.range(n):
+            v.append(mock_peer(i))
+        random.shuffle(v)
+    elif request.param == "duplicate":
+        for _ in six.moves.range(n):
+            v.append(mock_peer(random.randint(0, math.floor(n/20))))
+    elif request.param == "reverse":
+        for i in six.moves.range(n, -1, -1):
+            v.append(mock_peer(i))
+
+    return v
+
+
+def mock_peer(rank=None):
+    peer = mock.MagicMock()
+    peer.index = -1
+    peer.rank = rank if rank is not None else random.randint(0, sys.maxint)
+    return peer
+
+
+def verify(ph, parent):
+    """Assert the peer is still well-maintained."""
+    child1 = 2*parent + 1
+    child2 = 2*parent + 2
+    if child2 < ph.size():
+        assert not ph.lt(child1, parent)
+        verify(ph, child1)
+
+    if child2 < ph.size():
+        assert not ph.lt(child2, parent)
+        verify(ph, child2)
+
+
+def test_push(peer_heap, peers):
+    for peer in peers:
+        peer_heap.push_peer(peer)
+        verify(peer_heap, 0)
+
+    assert peer_heap.size() == len(peers)
+
+
+def test_pop(peer_heap, peers):
+    for peer in peers:
+        peer_heap.push_peer(peer)
+        verify(peer_heap, 0)
+
+    n = len(peers)
+    for _ in six.moves.range(n):
+        peer_heap.pop_peer()
+        verify(peer_heap, 0)
+
+    assert peer_heap.size() == 0
+
+
+def test_update(peer_heap, peers):
+    for peer in peers:
+        peer_heap.push_peer(peer)
+        verify(peer_heap, 0)
+
+    p = peer_heap.peers[len(peers) - 1]
+    p.rank = -1
+    peer_heap.update_peer(p)
+    verify(peer_heap, 0)
+    assert peer_heap.peek_peer().rank == -1
+    assert peer_heap.peek_peer() == p
+
+
+def test_remove(peer_heap, peers):
+    for peer in peers:
+        peer_heap.push_peer(peer)
+        verify(peer_heap, 0)
+
+    n = len(peers)
+    for _ in six.moves.range(n):
+        p = peer_heap.peers[random.randint(0, peer_heap.size() - 1)]
+        assert p is peer_heap.remove_peer(p)
+        verify(peer_heap, 0)
+        verify_peer_not_in_heap(peer_heap, p)
+
+
+def verify_peer_not_in_heap(peer_heap, p):
+    for peer in peer_heap.peers:
+        assert peer is not p
+
+
+def test_remove_duplicate(peer_heap, peers):
+    for peer in peers:
+        peer_heap.push_peer(peer)
+        verify(peer_heap, 0)
+
+    n = random.randint(0, len(peers))
+
+    for _ in six.moves.range(n):
+        p = peer_heap.peers[random.randint(0, peer_heap.size() - 1)]
+        assert p is peer_heap.remove_peer(p)
+        with pytest.raises(IndexError):
+            peer_heap.remove_peer(p)
+        verify(peer_heap, 0)
+        verify_peer_not_in_heap(peer_heap, p)
+
+
+def test_remove_from_empty_heap():
+    heap = PeerHeap()
+    with pytest.raises(IndexError):
+        heap.remove_peer(mock_peer())
+
+
+def test_remove_mismatch(peer_heap, peers):
+    for peer in peers:
+        peer_heap.push_peer(peer)
+        verify(peer_heap, 0)
+
+    # create a fake peer with duplicated index.
+    fake_peer = mock_peer()
+    fake_peer.index = 1
+    with pytest.raises(AssertionError) as e:
+        peer_heap.remove_peer(fake_peer)
+
+    assert e.value.message == 'peer is not in the heap'
+
+
+@pytest.mark.heapfuzz
+@pytest.mark.skipif(True, reason='stress test for the peer heap operations')
+def test_heap_fuzz(peer_heap):
+    for _ in six.moves.range(random.randint(1, 100000)):
+        ops = random.randint(0, 2)
+        if ops == 0:    # push
+            peer_heap.push_peer(mock_peer())
+        elif ops == 1:  # pop
+            peer_heap.pop_peer()
+        elif ops == 2:  # update
+            if len(peer_heap) <= 0:
+                continue
+            p = peer_heap.peers[random.randint(0, len(peer_heap) - 1)]
+            p.rank = random.randint(0, sys.maxint)
+            peer_heap.update_peer(p)
+
+        if peer_heap.size():
+            assert smallest(peer_heap.peers) == peer_heap.peek_peer().rank
+
+        verify(peer_heap, 0)
+
+
+def smallest(ps):
+    m = sys.maxint
+    for peer in ps:
+        if peer.rank < m:
+            m = peer.rank
+
+    return m
diff --git a/tests/test_peer_strategy.py b/tests/test_peer_strategy.py
new file mode 100644
index 00000000..b81f8d33
--- /dev/null
+++ b/tests/test_peer_strategy.py
@@ -0,0 +1,78 @@
+# Copyright (c) 2015 Uber Technologies, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+from __future__ import absolute_import
+
+import sys
+
+import pytest
+from tchannel import TChannel
+from tchannel.peer_strategy import PreferIncomingCalculator
+from tchannel.tornado.connection import TornadoConnection
+from tchannel.tornado.connection import INCOMING
+from tchannel.tornado.peer import Peer
+
+
+@pytest.mark.gen_test
+def test_get_rank_no_connection():
+    server = TChannel('server')
+    server.listen()
+    peer = Peer(TChannel('test'), '10.10.101.21:230')
+    calculator = PreferIncomingCalculator()
+    assert sys.maxint == calculator.get_rank(peer)
+
+
+@pytest.mark.gen_test
+def test_get_rank_with_outgoing():
+    server = TChannel('server')
+    server.listen()
+    connection = yield TornadoConnection.outgoing(server.hostport)
+
+    peer = Peer(TChannel('test'), '10.10.101.21:230')
+    calculator = PreferIncomingCalculator()
+    peer.register_outgoing_conn(connection)
+    assert PreferIncomingCalculator.TIERS[1] == calculator.get_rank(peer)
+
+
+@pytest.mark.gen_test
+def test_get_rank_with_imcoming():
+    server = TChannel('server')
+    server.listen()
+    connection = yield TornadoConnection.outgoing(server.hostport)
+    connection.direction = INCOMING
+    peer = Peer(TChannel('test'), '10.10.101.21:230')
+    calculator = PreferIncomingCalculator()
+    peer.register_incoming_conn(connection)
+    assert sys.maxint != calculator.get_rank(peer)
+
+
+@pytest.mark.gen_test
+def test_get_rank_ephemeral():
+    server = TChannel('server')
+    server.listen()
+    connection = yield TornadoConnection.outgoing(server.hostport)
+    connection.direction = INCOMING
+    peer = Peer(TChannel('test'), '10.10.101.21:230')
+    peer.register_incoming_conn(connection)
+
+    peer.host = '0.0.0.0'
+    peer.port = 0
+    calculator = PreferIncomingCalculator()
+    assert sys.maxint == calculator.get_rank(peer)
diff --git a/tests/tornado/test_connection.py b/tests/tornado/test_connection.py
index f53aa533..1e3363c8 100644
--- a/tests/tornado/test_connection.py
+++ b/tests/tornado/test_connection.py
@@ -32,6 +32,7 @@
 from tchannel.errors import TimeoutError, ReadError
 from tchannel.tornado import connection
 from tchannel.tornado.message_factory import MessageFactory
+from tchannel.tornado.peer import Peer
 
 
 def dummy_headers():
@@ -196,6 +197,55 @@ def build_raw_message(self, context, args, is_completed=True):
     assert server_peer.total_outbound_pendings == 0
 
 
+@pytest.mark.gen_test
+def test_client_connection_change_callback():
+    server = TChannel('server')
+    server.listen()
+
+    @server.raw.register
+    def hello(request):
+        return 'hi'
+
+    client = TChannel('client')
+    count = [0]
+
+    def test_cb(peer):
+        count[0] += 1
+
+    client._dep_tchannel.peers.get(
+        server.hostport)._on_conn_change_cb = test_cb
+    yield client.raw(
+        hostport=server.hostport,
+        body='work',
+        endpoint='hello',
+        service='server'
+    )
+
+    # 1: connection built, 1: sending request, 1: finish sending request
+    assert count[0] == 3
+
+
+@pytest.mark.gen_test
+def test_both_connection_change_callback():
+    client = TChannel('client')
+
+    with mock.patch.object(Peer, '_on_conn_change') as mock_conn_change:
+        server = TChannel('server')
+        server.listen()
+
+        @server.raw.register
+        def hello(request):
+            return 'hi'
+
+        yield client.raw(
+            hostport=server.hostport,
+            body='work',
+            endpoint='hello',
+            service='server'
+        )
+        assert mock_conn_change.call_count == 6
+
+
 @pytest.mark.gen_test
 def test_writer_write_error():
     server, client = socket.socketpair()
diff --git a/tests/tornado/test_peer.py b/tests/tornado/test_peer.py
index ff6c6c28..828bb69a 100644
--- a/tests/tornado/test_peer.py
+++ b/tests/tornado/test_peer.py
@@ -25,12 +25,16 @@
 
 import mock
 import pytest
+
 from tornado import gen
 
 from tchannel import TChannel
 from tchannel.errors import NoAvailablePeerError
-
 from tchannel.tornado import peer as tpeer
+from tchannel.tornado import Request
+from tchannel.tornado import Response
+from tchannel.tornado.connection import TornadoConnection
+from tchannel.tornado.peer import Peer
 from tchannel.tornado.stream import InMemStream
 from tchannel.tornado.stream import read_full
 
@@ -208,5 +212,121 @@ def test_peer_incoming_connections_are_preferred(request):
 
     assert (yield peer.connect()) is outgoing
 
-    peer.register_incoming(incoming)
+    peer.register_incoming_conn(incoming)
     assert (yield peer.connect()) is incoming
+
+
+@pytest.fixture
+def peer():
+    return Peer(
+        tchannel=TChannel('peer'),
+        hostport='127.0.0.1:21300',
+    )
+
+
+def test_on_conn_change(peer, connection):
+    c = [0]
+
+    def conn_change_db(peer):
+        c[0] += 1
+
+    peer.set_on_conn_change_callback(conn_change_db)
+    peer.register_incoming_conn(connection)
+    assert c[0] == 1
+
+    peer.register_outgoing_conn(connection)
+    assert c[0] == 2
+
+
+@pytest.mark.gen_test
+def test_outbound_pending_change():
+    server = TChannel('server')
+    server.listen()
+    connection = yield TornadoConnection.outgoing(server.hostport)
+    c = [0]
+
+    def outbound_pending_change_callback():
+        c[0] += 1
+
+    connection.set_outbound_pending_change_callback(
+        outbound_pending_change_callback
+    )
+
+    connection.add_outbound_pending_req(Request())
+    assert c[0] == 1
+    connection.add_outbound_pending_res(Response())
+    assert c[0] == 2
+    connection.remove_outbound_pending_req(Request())
+    assert c[0] == 3
+    connection.remove_outbound_pending_res(Response())
+    assert c[0] == 4
+
+
+@pytest.mark.gen_test
+def test_outbound_pending_change_propagate(peer):
+    server = TChannel('server')
+    server.listen()
+    connection = yield TornadoConnection.outgoing(server.hostport)
+
+    peer.register_incoming_conn(connection)
+    b = [0]
+
+    def conn_change_db(peer):
+        b[0] += 1
+
+    peer.set_on_conn_change_callback(conn_change_db)
+
+    connection.add_outbound_pending_req(Request())
+    assert b[0] == 1
+    connection.add_outbound_pending_res(Response())
+    assert b[0] == 2
+    connection.remove_outbound_pending_req(Request())
+    assert b[0] == 3
+    connection.remove_outbound_pending_res(Response())
+    assert b[0] == 4
+
+
+@pytest.fixture
+def hostports():
+    return ['127.0.0.1:' + str(i) for i in range(100)]
+
+
+def test_choose(hostports):
+    tchannel = TChannel('test')
+    peer_group = tchannel._dep_tchannel.peers
+    for hp in hostports:
+        peer_group.get(hp)
+
+    n = len(hostports)
+    for _ in hostports:
+        peer_group.choose()
+        assert peer_group.peer_heap.size() == n
+
+
+def test_choose_with_blacklist(hostports):
+    tchannel = TChannel('test')
+    peer_group = tchannel._dep_tchannel.peers
+    for hp in hostports:
+        peer_group.get(hp)
+
+    n = len(hostports)
+    blacklist = set()
+    for _ in hostports:
+        peer = peer_group.choose(blacklist=blacklist)
+        assert peer not in blacklist
+        blacklist.add(peer.hostport)
+        assert peer_group.peer_heap.size() == n
+
+
+def test_choose_with_target_hostport(hostports):
+    tchannel = TChannel('test')
+    peer_group = tchannel._dep_tchannel.peers
+    for hp in hostports:
+        peer_group.get(hp)
+
+    n = len(hostports) + 1
+    target = '1.0.0.1:9000'
+    for _ in hostports:
+        peer = peer_group.choose(hostport=target)
+        assert target == peer.hostport
+        assert peer_group.peer_heap.size() == n

From 6bcee32518fb62026a252c8ab7a88309fafa2c8e Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Thu, 17 Mar 2016 15:35:40 -0700
Subject: [PATCH 59/67] fix peer selection benchmark

---
 benchmarks/test_peer_selection.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/benchmarks/test_peer_selection.py b/benchmarks/test_peer_selection.py
index 4671f811..6f61225d 100644
--- a/benchmarks/test_peer_selection.py
+++ b/benchmarks/test_peer_selection.py
@@ -25,6 +25,7 @@
 import mock
 import random
 
+from tchannel.tornado.connection import INCOMING
 from tchannel.tornado.peer import (
     Peer as _Peer,
     PeerGroup as _PeerGroup,
@@ -39,6 +40,8 @@ class FakeConnection(object):
     def __init__(self, hostport):
         self.hostport = hostport
         self.closed = False
+        self.direction = INCOMING
+        self.total_outbound_pendings = 0
 
     @classmethod
     def outgoing(cls, hostport, process_name=None, serve_hostport=None,
@@ -57,6 +60,9 @@ def result(self):
     def set_close_callback(self, cb):
         pass
 
+    def set_outbound_pending_change_callback(self, cb):
+        pass
+
 
 class Peer(_Peer):
     connection_class = FakeConnection
@@ -96,7 +102,7 @@ def test_choose(benchmark):
         peer = group.peers[random.randint(0, NUM_PEERS-1)]
         if peer.hostport in connected_peers:
             continue
-        peer.register_incoming(FakeConnection(peer.hostport))
+        peer.register_incoming_conn(FakeConnection(peer.hostport))
         connected_peers.add(peer.hostport)
 
     benchmark(group.choose)

From 30a30d8c6b72afde1cd83fbb41656f96a8504ae5 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Mon, 28 Mar 2016 11:53:41 -0700
Subject: [PATCH 60/67] Update CHANGELOG

---
 CHANGES.rst              | 2 ++
 tchannel/tornado/peer.py | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index bf9a969c..c9567db5 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -4,6 +4,8 @@ Changes by Version
 0.21.11 (unreleased)
 --------------------
 
+- Peer selection is now constant time instead of linear time. This should
+  significantly reduce CPU load per request.
 - Fixed a bug where certain errors while reading requests would propagate as
   TimeoutErrors.
 - Attempting to register endpoints against a synchronous TChannel now logs an
diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py
index 5d87e96b..ac03b729 100644
--- a/tchannel/tornado/peer.py
+++ b/tchannel/tornado/peer.py
@@ -24,7 +24,6 @@
 
 import sys
 import logging
-import sys
 
 from collections import deque
 from itertools import takewhile, dropwhile

From 143e4a72772a62c45688779b651f5e1ac56d9ffa Mon Sep 17 00:00:00 2001
From: Bryce Lampe <blampe@uber.com>
Date: Tue, 29 Mar 2016 15:03:04 -0700
Subject: [PATCH 61/67] Reduce advertise interval to 3 minutes

---
 CHANGES.rst                   | 1 +
 tchannel/tornado/hyperbahn.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index bf9a969c..51160c1e 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -8,6 +8,7 @@ Changes by Version
   TimeoutErrors.
 - Attempting to register endpoints against a synchronous TChannel now logs an
   INFO level message.
+- Reduced default advertisement interval to 3 minutes.
 
 
 0.21.10 (2016-03-17)
diff --git a/tchannel/tornado/hyperbahn.py b/tchannel/tornado/hyperbahn.py
index bb1edd97..3e694396 100644
--- a/tchannel/tornado/hyperbahn.py
+++ b/tchannel/tornado/hyperbahn.py
@@ -34,7 +34,7 @@
 EXPO_BASE = 1.4  # try this first
 MAX_DELAY = 10  # sec
 MAX_ATTEMPT = 7  # pow(1.4, 8) > 10
-DELAY = 60 * 1000  # ms delay time for successful advertise
+DELAY = 3 * 60 * 1000  # ms delay time for successful advertise
 FIRST_ADVERTISE_TIME = 30  # sec
 
 log = logging.getLogger('tchannel')

From 566cd8d042b90c090b8410a3c18426105677ab4c Mon Sep 17 00:00:00 2001
From: Junchao Wu <junchaow@uber.com>
Date: Tue, 29 Mar 2016 16:15:38 -0700
Subject: [PATCH 62/67] Shuffle peers when adding to peer heap to avoid bias

Currently, there's a bias on the first request causing it to select
earlier peers. This changes add to shuffle peers around rather than
having order dependent upon when it was added.
---
 tchannel/peer_heap.py    | 19 +++++++++++++++++++
 tchannel/tornado/peer.py |  2 +-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/tchannel/peer_heap.py b/tchannel/peer_heap.py
index 34152c7d..e324d6dd 100644
--- a/tchannel/peer_heap.py
+++ b/tchannel/peer_heap.py
@@ -115,6 +115,13 @@ def push_peer(self, peer):
         peer.order = self.order + random.randint(0, self.size())
         heap.push(self, peer)
 
+    def add_and_shuffle(self, peer):
+        """Push a new peer into the heap and shuffle the heap"""
+        self.push_peer(peer)
+
+        r = random.randint(0, self.size() - 1)
+        self.swap_order(peer.index, r)
+
     def peek_peer(self):
         """Return the top peer of the heap
         :return
@@ -155,3 +162,15 @@ def smallest_peer(self, predicate):
             )
         except NoMatchError:
             return None
+
+    def swap_order(self, index, r):
+        if index == r:
+            return
+
+        p1 = self.peers[index]
+        p2 = self.peers[r]
+
+        (p1.order, p2.order) = (p2.order, p1.order)
+
+        heap.fix(self, p1.index)
+        heap.fix(self, p2.index)
diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py
index ac03b729..99859d8b 100644
--- a/tchannel/tornado/peer.py
+++ b/tchannel/tornado/peer.py
@@ -668,7 +668,7 @@ def add(self, peer):
         )
         peer.rank = self.rank_calculator.get_rank(peer)
         self._peers[peer.hostport] = peer
-        self.peer_heap.push_peer(peer)
+        self.peer_heap.add_and_shuffle(peer)
 
     def _update_heap(self, peer):
         """Recalculate the peer's rank and update itself in the peer heap."""

From 159b0c54b95e11ce67ee7e3cce69c37369333a57 Mon Sep 17 00:00:00 2001
From: Junchao Wu <junchaow@uber.com>
Date: Tue, 29 Mar 2016 17:01:10 -0700
Subject: [PATCH 63/67] Fix naming issues

---
 benchmarks/test_concurrent_requests.py | 12 +++++-------
 tchannel/peer_heap.py                  |  8 ++++----
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/benchmarks/test_concurrent_requests.py b/benchmarks/test_concurrent_requests.py
index 09898702..e5950554 100644
--- a/benchmarks/test_concurrent_requests.py
+++ b/benchmarks/test_concurrent_requests.py
@@ -30,7 +30,7 @@
 client = None
 
 
-def setUpServer(num):
+def setup_server(num):
     servers = []
     for i in xrange(num):
         server = TChannel('server' + str(i))
@@ -40,12 +40,11 @@ def hello(request):
             return 'hello'
         server.listen()
         servers.append(server)
-     
     return servers
 
 
 @gen.coroutine
-def setUpClient(servers):
+def setup_client(servers):
     router = []
     for i in xrange(1000):
         router.append('1.1.1.1:'+str(i))
@@ -54,7 +53,6 @@ def setUpClient(servers):
     @client.raw.register
     def hello(request):
         return 'hello'
-        
     client.listen()
 
     for i in xrange(100):
@@ -71,7 +69,7 @@ def hello(request):
 
 @gen.coroutine
 def peer_test():
-    global servers, client
+    global client
     fs = []
     for _ in xrange(100):
         fs.append(client.raw(
@@ -90,8 +88,8 @@ def stress_test():
 @gen.coroutine
 def setup():
     global servers, client
-    servers = setUpServer(100)
-    client = yield setUpClient(servers)
+    servers = setup_server(100)
+    client = yield setup_client(servers)
 
 
 def test_peer_heap(benchmark):
diff --git a/tchannel/peer_heap.py b/tchannel/peer_heap.py
index e324d6dd..5793b17d 100644
--- a/tchannel/peer_heap.py
+++ b/tchannel/peer_heap.py
@@ -163,12 +163,12 @@ def smallest_peer(self, predicate):
         except NoMatchError:
             return None
 
-    def swap_order(self, index, r):
-        if index == r:
+    def swap_order(self, index1, index2):
+        if index1 == index2:
             return
 
-        p1 = self.peers[index]
-        p2 = self.peers[r]
+        p1 = self.peers[index1]
+        p2 = self.peers[index2]
 
         (p1.order, p2.order) = (p2.order, p1.order)
 

From 1143edf5bde723c66dbceb259b82a29fa87cb3b7 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Wed, 30 Mar 2016 10:17:19 -0700
Subject: [PATCH 64/67] connection: simply pending outbound count tracking

We don't need to maintain references to the requests and responses inside a
dictionary. We can simply track the count inside a single integer.
---
 tchannel/tornado/connection.py | 45 +++++++++-------------------------
 tests/tornado/test_peer.py     | 18 ++++++--------
 2 files changed, 20 insertions(+), 43 deletions(-)

diff --git a/tchannel/tornado/connection.py b/tchannel/tornado/connection.py
index 9383d13a..4da018e5 100644
--- a/tchannel/tornado/connection.py
+++ b/tchannel/tornado/connection.py
@@ -124,11 +124,8 @@ def __init__(self, connection, tchannel=None, direction=None):
         # Map from message ID to futures for responses of outgoing calls.
         self._outbound_pending_call = {}
 
-        # Map from message ID to outgoing request that is being sent.
-        self._outbound_pending_req = {}
-
-        # Map from message ID to outgoing response that is being sent.
-        self._outbound_pending_res = {}
+        # Total number of pending outbound requests and responses.
+        self.total_outbound_pendings = 0
 
         # Collection of request IDs known to have timed out.
         self._request_tombstones = Cemetery()
@@ -148,13 +145,6 @@ def __init__(self, connection, tchannel=None, direction=None):
 
         connection.set_close_callback(self._on_close)
 
-    @property
-    def total_outbound_pendings(self):
-        """Returns the total number of pending outbound requests and
-        responses."""
-        return (len(self._outbound_pending_res) +
-                len(self._outbound_pending_req))
-
     def set_outbound_pending_change_callback(self, cb):
         """Specify a function to be called when outbound pending request or
         response list changed.
@@ -494,26 +484,15 @@ def ping(self):
     def pong(self):
         return self.writer.put(messages.PingResponseMessage())
 
-    def _outbound_pending_change(self):
-        """Called when there is a change in the outbound pending list."""
+    def add_pending_outbound(self):
+        self.total_outbound_pendings += 1
         if self._outbound_pending_change_cb:
             self._outbound_pending_change_cb()
 
-    def add_outbound_pending_res(self, res):
-        self._outbound_pending_res[res.id] = res
-        self._outbound_pending_change()
-
-    def add_outbound_pending_req(self, req):
-        self._outbound_pending_req[req.id] = req
-        self._outbound_pending_change()
-
-    def remove_outbound_pending_res(self, res):
-        self._outbound_pending_res.pop(res.id, None)
-        self._outbound_pending_change()
-
-    def remove_outbound_pending_req(self, req):
-        self._outbound_pending_req.pop(req.id, None)
-        self._outbound_pending_change()
+    def remove_pending_outbound(self):
+        self.total_outbound_pendings -= 1
+        if self._outbound_pending_change_cb:
+            self._outbound_pending_change_cb()
 
 
 class StreamConnection(TornadoConnection):
@@ -587,7 +566,7 @@ def _stream(self, context, message_factory):
     @tornado.gen.coroutine
     def post_response(self, response):
         try:
-            self.add_outbound_pending_res(response)
+            self.add_pending_outbound()
             # TODO: before_send_response
             yield self._stream(response, self.response_message_factory)
 
@@ -597,7 +576,7 @@ def post_response(self, response):
                 response,
             )
         finally:
-            self.remove_outbound_pending_res(response)
+            self.remove_pending_outbound()
             response.close_argstreams(force=True)
 
     def stream_request(self, request):
@@ -630,9 +609,9 @@ def send_request(self, request):
 
         future = tornado.gen.Future()
         self._outbound_pending_call[request.id] = future
-        self.add_outbound_pending_req(request)
+        self.add_pending_outbound()
         self.stream_request(request).add_done_callback(
-            lambda f: self.remove_outbound_pending_req(request)
+            lambda f: self.remove_pending_outbound()
         )
 
         if request.ttl:
diff --git a/tests/tornado/test_peer.py b/tests/tornado/test_peer.py
index 828bb69a..c13f5501 100644
--- a/tests/tornado/test_peer.py
+++ b/tests/tornado/test_peer.py
@@ -31,8 +31,6 @@
 from tchannel import TChannel
 from tchannel.errors import NoAvailablePeerError
 from tchannel.tornado import peer as tpeer
-from tchannel.tornado import Request
-from tchannel.tornado import Response
 from tchannel.tornado.connection import TornadoConnection
 from tchannel.tornado.peer import Peer
 from tchannel.tornado.stream import InMemStream
@@ -252,13 +250,13 @@ def outbound_pending_change_callback():
         outbound_pending_change_callback
     )
 
-    connection.add_outbound_pending_req(Request())
+    connection.add_pending_outbound()
     assert c[0] == 1
-    connection.add_outbound_pending_res(Response())
+    connection.add_pending_outbound()
     assert c[0] == 2
-    connection.remove_outbound_pending_req(Request())
+    connection.remove_pending_outbound()
     assert c[0] == 3
-    connection.remove_outbound_pending_res(Response())
+    connection.remove_pending_outbound()
     assert c[0] == 4
 
 
@@ -276,13 +274,13 @@ def conn_change_db(peer):
 
     peer.set_on_conn_change_callback(conn_change_db)
 
-    connection.add_outbound_pending_req(Request())
+    connection.add_pending_outbound()
     assert b[0] == 1
-    connection.add_outbound_pending_res(Response())
+    connection.add_pending_outbound()
     assert b[0] == 2
-    connection.remove_outbound_pending_req(Request())
+    connection.remove_pending_outbound()
     assert b[0] == 3
-    connection.remove_outbound_pending_res(Response())
+    connection.remove_pending_outbound()
     assert b[0] == 4
 
 

From 02f17928ff42027b9ab36a33b29a3d7825840277 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Wed, 30 Mar 2016 10:37:36 -0700
Subject: [PATCH 65/67] peerheap: make rank selection slighly cheaper

We shouldn't be allocating lists from deques each time we need to recalculate
the rank.
---
 tchannel/peer_strategy.py |  2 +-
 tchannel/tornado/peer.py  | 42 +++++++++++++++++----------------------
 2 files changed, 19 insertions(+), 25 deletions(-)

diff --git a/tchannel/peer_strategy.py b/tchannel/peer_strategy.py
index 5e6042e7..23ea4545 100644
--- a/tchannel/peer_strategy.py
+++ b/tchannel/peer_strategy.py
@@ -54,7 +54,7 @@ def get_rank(self, peer):
         if peer.is_ephemeral or not peer.connections:
             return self.TIERS[0]
 
-        if not peer.incoming_connections:
+        if not peer.has_incoming_connections:
             return self.TIERS[1] + peer.total_outbound_pendings
 
         return self.TIERS[2] + peer.total_outbound_pendings
diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py
index 99859d8b..9b768f99 100644
--- a/tchannel/tornado/peer.py
+++ b/tchannel/tornado/peer.py
@@ -68,8 +68,8 @@ class Peer(object):
         'order',
         'chosen_count',
         'on_conn_change',
+        'connections',
 
-        '_connections',
         '_connecting',
         '_on_conn_change_cb',
     )
@@ -88,7 +88,8 @@ def __init__(self, tchannel, hostport, rank=None, on_conn_change=None):
             Host-port this Peer is for.
         :param rank:
             The rank of a peer will affect the chance that the peer gets
-            selected when the client sends outbound requests.
+            selected when the client sends outbound requests. Lower rank is
+            better.
         :param on_conn_change:
             A callback method takes Peer object as input and is called whenever
             there are connection changes in the peer.
@@ -102,7 +103,7 @@ def __init__(self, tchannel, hostport, rank=None, on_conn_change=None):
         #: Collection of all connections for this Peer. Incoming connections
         #: are added to the left side of the deque and outgoing connections to
         #: the right side.
-        self._connections = deque()
+        self.connections = deque()
 
         # This contains a future to the TornadoConnection if we're already in
         # the process of making an outgoing connection to the peer. This
@@ -139,10 +140,10 @@ def connect(self):
             A future containing a connection to this host.
         """
         # Prefer incoming connections over outgoing connections.
-        if self._connections:
+        if self.connections:
             # First value is an incoming connection
             future = gen.Future()
-            future.set_result(self._connections[0])
+            future.set_result(self.connections[0])
             return future
 
         if self._connecting:
@@ -172,16 +173,20 @@ def on_connect(_):
     def _set_on_close_cb(self, conn):
 
         def on_close():
-            self._connections.remove(conn)
+            self.connections.remove(conn)
             self._on_conn_change()
 
         conn.set_close_callback(on_close)
 
+    @property
+    def has_incoming_connections(self):
+        return self.connections and self.connections[0].direction == INCOMING
+
     def register_outgoing_conn(self, conn):
         """Add outgoing connection into the heap."""
         assert conn, "conn is required"
         conn.set_outbound_pending_change_callback(self._on_conn_change)
-        self._connections.append(conn)
+        self.connections.append(conn)
         self._set_on_close_cb(conn)
         self._on_conn_change()
 
@@ -189,7 +194,7 @@ def register_incoming_conn(self, conn):
         """Add incoming connection into the heap."""
         assert conn, "conn is required"
         conn.set_outbound_pending_change_callback(self._on_conn_change)
-        self._connections.appendleft(conn)
+        self.connections.appendleft(conn)
         self._set_on_close_cb(conn)
         self._on_conn_change()
 
@@ -203,20 +208,13 @@ def hostport(self):
         """The host-port this Peer is for."""
         return "%s:%d" % (self.host, self.port)
 
-    @property
-    def connections(self):
-        """Returns an iterator over all connections for this peer.
-
-        Incoming connections are listed first."""
-        return list(self._connections)
-
     @property
     def outgoing_connections(self):
         """Returns a list of all outgoing connections for this peer."""
 
         # Outgoing connections are on the right
         return list(
-            dropwhile(lambda c: c.direction != OUTGOING, self._connections)
+            dropwhile(lambda c: c.direction != OUTGOING, self.connections)
         )
 
     @property
@@ -225,17 +223,13 @@ def incoming_connections(self):
 
         # Incoming connections are on the left.
         return list(
-            takewhile(lambda c: c.direction == INCOMING, self._connections)
+            takewhile(lambda c: c.direction == INCOMING, self.connections)
         )
 
     @property
     def total_outbound_pendings(self):
         """Return the total number of out pending req/res among connections"""
-        num = 0
-        for con in self.connections:
-            num += con.total_outbound_pendings
-
-        return num
+        return sum(c.total_outbound_pendings for c in self.connections)
 
     @property
     def is_ephemeral(self):
@@ -246,10 +240,10 @@ def is_ephemeral(self):
     def connected(self):
         """Return True if this Peer is connected."""
 
-        return len(self._connections) > 0
+        return len(self.connections) > 0
 
     def close(self):
-        for connection in list(self._connections):
+        for connection in self.connections:
             # closing the connection will mutate the deque so create a copy
             connection.close()
 

From 9255fec01d68086405267d78d238f1271aa41f81 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Wed, 30 Mar 2016 11:13:01 -0700
Subject: [PATCH 66/67] Fix deque mutation during iteration

---
 tchannel/tornado/peer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tchannel/tornado/peer.py b/tchannel/tornado/peer.py
index 9b768f99..ced1d6e7 100644
--- a/tchannel/tornado/peer.py
+++ b/tchannel/tornado/peer.py
@@ -243,7 +243,7 @@ def connected(self):
         return len(self.connections) > 0
 
     def close(self):
-        for connection in self.connections:
+        for connection in list(self.connections):
             # closing the connection will mutate the deque so create a copy
             connection.close()
 

From c14a1e2255a67236c40b20db9ac0c4ce395b5d6d Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Wed, 30 Mar 2016 11:17:37 -0700
Subject: [PATCH 67/67] Don't use globals in test_concurrent_requests.

---
 benchmarks/test_concurrent_requests.py | 44 ++++++++++++--------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/benchmarks/test_concurrent_requests.py b/benchmarks/test_concurrent_requests.py
index e5950554..e9296da3 100644
--- a/benchmarks/test_concurrent_requests.py
+++ b/benchmarks/test_concurrent_requests.py
@@ -22,41 +22,43 @@
     absolute_import, unicode_literals, print_function, division
 )
 
-from tornado import gen, ioloop
-from tchannel import TChannel
-
+from tornado import gen
+from tornado.ioloop import IOLoop
 
-servers = None
-client = None
+from tchannel import TChannel
 
 
-def setup_server(num):
+def setup_servers(num):
     servers = []
+
     for i in xrange(num):
         server = TChannel('server' + str(i))
 
         @server.raw.register
         def hello(request):
             return 'hello'
+
         server.listen()
         servers.append(server)
+
     return servers
 
 
 @gen.coroutine
 def setup_client(servers):
-    router = []
+    known_peers = []
     for i in xrange(1000):
-        router.append('1.1.1.1:'+str(i))
-    client = TChannel('client', known_peers=router)
+        known_peers.append('1.1.1.1:'+str(i))
+    client = TChannel('client', known_peers=known_peers)
+    # Add a bunch of unconnected peers
 
     @client.raw.register
     def hello(request):
         return 'hello'
     client.listen()
 
-    for i in xrange(100):
-        server = servers[i]
+    # Open incoming connection from the server to the client.
+    for server in servers:
         yield server.raw(
             service='server',
             endpoint='hello',
@@ -68,8 +70,7 @@ def hello(request):
 
 
 @gen.coroutine
-def peer_test():
-    global client
+def peer_test(client):
     fs = []
     for _ in xrange(100):
         fs.append(client.raw(
@@ -81,17 +82,12 @@ def peer_test():
     yield fs
 
 
-def stress_test():
-    ioloop.IOLoop.current().run_sync(peer_test)
-
-
-@gen.coroutine
-def setup():
-    global servers, client
-    servers = setup_server(100)
-    client = yield setup_client(servers)
+def stress_test(client):
+    IOLoop.current().run_sync(lambda: peer_test(client))
 
 
 def test_peer_heap(benchmark):
-    ioloop.IOLoop.current().run_sync(setup)
-    benchmark(stress_test)
+    servers = setup_servers(100)
+    client = IOLoop.current().run_sync(lambda: setup_client(servers))
+
+    benchmark(stress_test, client)