In [None]:
import pyarrow as pa
import pyarrow.parquet as pq
import pyarrow.flight as flight
import numpy as np
import pandas as pd
import time
import threading

In [None]:
class DemoServer(flight.FlightServerBase):
    
    def __init__(self):
        self._cache = {}
    
    def list_actions(self, context):
        return [flight.ActionType('list-tables', 'List stored tables'),
                flight.ActionType('drop-table', 'Drop a stored table')]

    # -----------------------------------------------------------------
    # Implement actions
    
    def do_action(self, context, action):
        handlers = {
            'list-tables': self._list_tables,
            'drop-table': self._drop_table
        }        
        handler = handlers.get(action.type)
        if not handler:
            raise NotImplementedError   
        return handlers[action.type](action)
        
    def _drop_table(self, action):
        del self._cache[action.body]
        
    def _list_tables(self, action):
        return iter([flight.Result(cache_key) 
                     for cache_key in sorted(self._cache.keys())])

    # -----------------------------------------------------------------
    # Implement puts
    
    def do_put(self, context, descriptor, reader, writer):
        self._cache[descriptor.command] = reader.read_all()
        
    # -----------------------------------------------------------------
    # Implement gets

    def do_get(self, context, ticket):
        table = self._cache[ticket.ticket]
        return flight.RecordBatchStream(table)

In [None]:
import contextlib
import socket
def find_free_port():
    # Find a free port
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    with contextlib.closing(sock) as sock:
        sock.bind(('', 0))
        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        port = sock.getsockname()[1]
    return port

def wait_for_available(client):
    deadline = time.time() + 5.0
    while True:
        try:
            list(client.list_flights())
        except Exception as e:
            if 'Connect Failed' in str(e):
                if time.time() < deadline:
                    time.sleep(0.025)
                    continue
                else:
                    raise
        break

In [None]:
port = 1337
location = flight.Location.for_grpc_tcp("localhost", find_free_port())
location

In [None]:
server = DemoServer()
server.init(location)

thread = threading.Thread(target=lambda: server.run(), daemon=True)
thread.start()

client = flight.FlightClient.connect(location)
wait_for_available(client)

In [None]:
client.list_actions()

In [None]:
def list_tables(client):
    action = flight.Action('list-tables', b'')
    return [x.body.to_pybytes().decode('utf8') for x in client.do_action(action)]    

# def drop_table(client):

list_tables(client)

In [None]:
def cache_table_in_server(name, table):
    desc = flight.FlightDescriptor.for_command(name.encode('utf8'))
    put_writer, put_meta_reader = client.do_put(desc, table.schema)
    put_writer.write(table)
    put_writer.close()
    
    
def get_table(name):
    reader = client.do_get(flight.Ticket(name.encode('utf8')))
    return reader.read_all()

In [None]:
table = pa.table([pa.array([1,2,3,4,5])], names=['f0'])
cache_table_in_server('table1', table)

In [None]:
list_tables(client)

In [None]:
cache_table_in_server('table2', table)
cache_table_in_server('table3', table)
cache_table_in_server('table4', table)

In [None]:
list_tables(client)

In [None]:
get_table('table1')

In [None]:
import pandas as pd
fec = pd.read_csv('/home/wesm/code/pydata-book/datasets/fec/P00000001-ALL.csv')
fec.head()
def coerce_int(x):
    try:
        return int(x)
    except:
        return -1

fec['contbr_zip'] = fec['contbr_zip'].map(coerce_int).astype(np.int64)

In [17]:
fec_table = pa.table(fec)

In [18]:
fec_table = pa.concat_tables([fec_table] * 10)

In [19]:
%%time
cache_table_in_server('fec_table', fec_table)

CPU times: user 425 ms, sys: 1.13 s, total: 1.56 s
Wall time: 1.16 s


In [20]:
list_tables(client)

['fec_table', 'table1', 'table2', 'table3', 'table4']

In [21]:
%%time 

fec_table_received = get_table('fec_table')

CPU times: user 404 ms, sys: 995 ms, total: 1.4 s
Wall time: 1.1 s
