In [1]:
import pyarrow as pa
import pyarrow.parquet as pq
import pyarrow.flight as flight
import numpy as np
import pandas as pd
import time
import threading

In [2]:
class DemoServer(flight.FlightServerBase):
    
    def __init__(self):
        self._cache = {}
    
    def list_actions(self, context):
        return [flight.ActionType('list-tables', 'List stored tables'),
                flight.ActionType('drop-table', 'Drop a stored table')]

    # -----------------------------------------------------------------
    # Implement actions
    
    def do_action(self, context, action):
        handlers = {
            'list-tables': self._list_tables,
            'drop-table': self._drop_table
        }        
        handler = handlers.get(action.type)
        if not handler:
            raise NotImplementedError   
        return handlers[action.type](action)
        
    def _drop_table(self, action):
        del self._cache[action.body]
        
    def _list_tables(self, action):
        return iter([flight.Result(cache_key) 
                     for cache_key in sorted(self._cache.keys())])

    # -----------------------------------------------------------------
    # Implement puts
    
    def do_put(self, context, descriptor, reader, writer):
        self._cache[descriptor.command] = reader.read_all()
        
    # -----------------------------------------------------------------
    # Implement gets

    def do_get(self, context, ticket):
        table = self._cache[ticket.ticket]
        return flight.RecordBatchStream(table)

In [3]:
import contextlib
import socket
def find_free_port():
    # Find a free port
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    with contextlib.closing(sock) as sock:
        sock.bind(('', 0))
        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        port = sock.getsockname()[1]
    return port

def wait_for_available(client):
    deadline = time.time() + 5.0
    while True:
        try:
            list(client.list_flights())
        except Exception as e:
            if 'Connect Failed' in str(e):
                if time.time() < deadline:
                    time.sleep(0.025)
                    continue
                else:
                    raise
        break

In [4]:
port = 1337
location = flight.Location.for_grpc_tcp("localhost", find_free_port())
location

<Location b'grpc+tcp://localhost:36245'>

In [5]:
server = DemoServer()
server.init(location)

thread = threading.Thread(target=lambda: server.run(), daemon=True)
thread.start()

client = flight.FlightClient.connect(location)
wait_for_available(client)

In [6]:
client.list_actions()

[ActionType(type='list-tables', description='List stored tables'),
 ActionType(type='drop-table', description='Drop a stored table')]

In [7]:
def list_tables(client):
    action = flight.Action('list-tables', b'')
    return [x.body.to_pybytes().decode('utf8') for x in client.do_action(action)]    

# def drop_table(client):

list_tables(client)

[]

In [8]:
def cache_table_in_server(name, table):
    desc = flight.FlightDescriptor.for_command(name.encode('utf8'))
    put_writer, put_meta_reader = client.do_put(desc, table.schema)
    put_writer.write(table)
    put_writer.close()
    
    
def get_table(name):
    reader = client.do_get(flight.Ticket(name.encode('utf8')))
    return reader.read_all()

In [9]:
table = pa.table([pa.array([1,2,3,4,5])], names=['f0'])
cache_table_in_server('table1', table)

In [10]:
list_tables(client)

['table1']

In [11]:
cache_table_in_server('table2', table)
cache_table_in_server('table3', table)
cache_table_in_server('table4', table)

In [12]:
list_tables(client)

['table1', 'table2', 'table3', 'table4']

In [13]:
get_table('table1')

pyarrow.Table
f0: int64

In [14]:
fec_table = pq.read_table('fec-2012.parquet')

In [15]:
fec_table = pa.concat_tables([fec_table] * 10)

In [21]:
%%time
cache_table_in_server('fec_table', fec_table)

CPU times: user 471 ms, sys: 1.67 s, total: 2.14 s
Wall time: 1.38 s


In [22]:
list_tables(client)

['fec_table', 'table1', 'table2', 'table3', 'table4']

In [24]:
%%time 

fec_table_received = get_table('fec_table')

CPU times: user 517 ms, sys: 1.69 s, total: 2.21 s
Wall time: 1.72 s
