initial commit

zzzeek · Sep 24, 2012 · c49e406 · c49e406
commit c49e406
Show file tree

Hide file tree

Showing 11 changed files with 581 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,10 @@
+*.pyc
+*.swp
+*.orig
+build
+tmp
+dist
+.venv
+test*.py
+akiban.egg-info/
+.coverage
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,9 @@
+recursive-include docs *.html *.css *.txt *.js *.jpg *.png *.py Makefile *.rst *.sty
+recursive-include tests *.py *.dat
+recursive-include akiban *.py *.dat
+
+include README* LICENSE CHANGES* test.cfg
+
+prune docs/build/output
+
+
diff --git a/README.rst b/README.rst
@@ -0,0 +1,109 @@
+Akiban for Python provides a DBAPI compatibility layer for
+`Akiban Server <http://www.akiban.com/>`_.
+
+Akiban Server is a new database engine that is similar in many ways to
+well known engines like Postgresql and MySQL.   However, it introduces
+some new twists on SQL, including the ability to render "nested" result
+sets using plain SQL.
+
+Akiban Server uses a database protocol that is compatible with
+Postgresql.   Any `DBAPI <http://www.python.org/dev/peps/pep-0249/>`_
+written for Postgresql can also work with Akiban
+Server directly.  What Akiban for Python provides is a wrapper around
+these DBAPIs so that Akiban's "nested" result system can be used
+transparently, meaning any result row can contain columns which themselves
+contain "sub-cursors".
+
+So far, Akiban for Python implements one extension module for
+the `psycopg2 <http://pypi.python.org/pypi/psycopg2/>`_ DBAPI for Postgresql.
+Psycopg2 is the most widely used DBAPI for Postgresql, is extremely
+performant and stable and supports Python 3.
+
+Usage of Akiban for Python is extremely simple.   When using psycopg2,
+the plugin is enabled as a **connection factory** for psycopg2::
+
+  >>> from akiban.psycopg2 import Connection
+  >>> import psycopg2
+
+  >>> connection = psycopg2.connect(host="localhost", port=15432,
+  ...                  connection_factory=Connection)
+
+The connection above is in every way an ordinary psycopg2 connection object.
+It's special behavior becomes apparent when using Akiban's **nested result set**
+capability::
+
+  >>> cursor = connection.cursor()
+  >>> cursor.execute("""
+  ...       select customers.customer_id, customers.name,
+  ...            (select orders.order_id, orders.order_info,
+  ...                 (select items.item_id, items.price, items.quantity
+  ...                 from items
+  ...                 where items.order_id = orders.order_id and
+  ...                 orders.customer_id = customers.customer_id) as items
+  ...             from orders
+  ...             where orders.customer_id = customers.customer_id) as orders
+  ...       from customers
+  ...     """)
+
+Above, we've selected from a table ``customers``, including a nested
+result set for ``orders``.  Within that of ``orders``, we have another
+nested result against ``items``. Inspecting ``cursor.description``, we
+see the three outermost columns represented, all normally except for
+``orders`` which has a special typecode ``NESTED_CURSOR``::
+
+  >>> cursor.description
+  [(u'customer_id', <psycopg2._psycopg.type 'INTEGER' at 0x10060a368>, None, None, None, None, None), (u'name', <psycopg2._psycopg.type 'STRING' at 0x10060a4c8>, None, None, None, None, None), (u'orders', <object object at 0x1002af0c0>, None, None, None, None, None)]
+
+If we fetch the first row, it looks mostly normal except for one column that contains a "nested cursor"::
+
+  >>> row = cursor.fetchone()
+  >>> row
+  (1, 'David McFarlane', <akiban.api.NestedCursor object at 0x10068e050>)
+
+looking at the ``orders`` column, we can see that the value is itself a cursor, with its own ``.description``::
+
+  >>> subcursor = row[2]
+  >>> subcursor.description
+  [(u'order_id', <psycopg2._psycopg.type 'INTEGER' at 0x10060a368>, None, None, None, None, None), (u'order_info', <psycopg2._psycopg.type 'STRING' at 0x10060a4c8>, None, None, None, None, None), (u'items', <object object at 0x1002af0c0>, None, None, None, None, None)]
+
+Fetching a row from this cursor, we see it has its own nested data::
+
+  >>> subrow = subcursor.fetchone()
+  >>> subrow
+  (101, 'apple related', <akiban.api.NestedCursor object at 0x10068e0d0>)
+
+and continuing the process, we can see ``items`` column of this row contains another nested cursor::
+
+  >>> subsubcursor = subrow[2]
+  >>> subsubcursor.description
+  [(u'item_id', <psycopg2._psycopg.type 'INTEGER' at 0x10060a368>, None, None, None, None, None), (u'price', <psycopg2._psycopg.type 'DECIMAL' at 0x10060a418>, None, None, None, None, None), (u'quantity', <psycopg2._psycopg.type 'INTEGER' at 0x10060a368>, None, None, None, None, None)]
+
+We can also access all levels of ".description" in one step from the
+lead result, using the extension ".akiban_description".  This is
+basically the same structure as that of ``cursor.description``, except
+it produces 8-tuples, instead of 7-tuples.  The eighth member of the
+tuple contains the sub-description, if any::
+
+  >>> cursor.akiban_description
+  [(u'customer_id', <psycopg2._psycopg.type 'INTEGER' at 0x10068a3c0>, None, None, None, None, None, None), (u'name', <psycopg2._psycopg.type 'STRING' at 0x10068a520>, None, None, None, None, None, None), (u'orders', <object object at 0x1002af0c0>, None, None, None, None, None, [(u'order_id', <psycopg2._psycopg.type 'INTEGER' at 0x10068a3c0>, None, None, None, None, None, None), (u'order_info', <psycopg2._psycopg.type 'STRING' at 0x10068a520>, None, None, None, None, None, None), (u'items', <object object at 0x1002af0c0>, None, None, None, None, None, [(u'item_id', <psycopg2._psycopg.type 'INTEGER' at 0x10068a3c0>, None, None, None, None, None, None), (u'price', <psycopg2._psycopg.type 'DECIMAL' at 0x10068a470>, None, None, None, None, None, None), (u'quantity', <psycopg2._psycopg.type 'INTEGER' at 0x10068a3c0>, None, None, None, None, None, None)])])]
+
+All those descriptions are nice, but how do we just get all those rows
+back?   We need to recursively descend through the nested cursors.
+The code below illustrates one way to do this::
+
+  from akiban import NESTED_CURSOR
+
+  def printrows(cursor, indent=""):
+      for row in cursor.fetchall():
+          nested = []
+          out = ""
+          for field, col in zip(cursor.description, row):
+              if field[1] == NESTED_CURSOR:
+                  nested.append((field[0], col, indent))
+              else:
+                  out += " " + str(col)
+          print indent + out
+          for key, values, indent in nested:
+              printrows(values, "%s    %s: " % (indent, key))
+
+
diff --git a/akiban/__init__.py b/akiban/__init__.py
@@ -0,0 +1,3 @@
+__version__ = '0.9'
+
+from .api import NESTED_CURSOR
diff --git a/akiban/api.py b/akiban/api.py
@@ -0,0 +1,35 @@
+import collections
+
+NESTED_CURSOR = object()
+
+
+class NestedCursor(object):
+
+    def __init__(self, ctx, arraysize, fields, description_factory):
+        self.ctx = ctx
+        self._fields = fields
+        self._description_factory = description_factory
+        self._rows = collections.deque()
+        self.arraysize = arraysize
+
+    @property
+    def description(self):
+        return self._description_factory(self._fields)
+
+    def fetchone(self):
+        if self._rows:
+            return self._rows.popleft()
+        else:
+            return None
+
+    def fetchall(self):
+        r = list(self._rows)
+        self._rows.clear()
+        return r
+
+    def fetchmany(self, size=None):
+        if size is None:
+            size = self.arraysize
+        l = list(self._rows)
+        r, self._rows = l[0:size], collections.deque(l[size:])
+        return r
diff --git a/akiban/impl.py b/akiban/impl.py
@@ -0,0 +1,83 @@
+import json
+from .api import NestedCursor
+
+json_decoder = json.JSONDecoder()
+
+_NESTED_OID = 5001
+
+class AkibanResultContext(object):
+
+    def gen_description(self, fields):  # pragma: no cover
+        raise NotImplementedError()
+
+    def typecast(self, value, oid):  # pragma: no cover
+        raise NotImplementedError()
+
+    def __init__(self, cursor, firstrow):
+        self.cursor = cursor
+        self.fields = _fields_from_row(firstrow)
+
+    @property
+    def arraysize(self):
+        return self.cursor.arraysize
+
+def _fields_from_row(row):
+    document = json_decoder.decode(row[0])
+    return _format_fields(document)
+
+def _filter_row(row, ctx):
+    if row is None:
+        return None
+    document = json_decoder.decode(row[0])
+    return _format_row(document, ctx.fields, ctx)
+
+def _create_rowset(document, fields, ctx):
+    return [
+        _format_row(row, fields, ctx)
+        for row in document
+    ]
+
+def _format_row(document, fields, ctx):
+    row = []
+    for field in fields:
+        if field['type_oid'] == _NESTED_OID:
+            value = NestedCursor(
+                        ctx,
+                        ctx.arraysize,
+                        field['akiban.fields'],
+                        ctx.gen_description
+            )
+            value._rows.extend(
+                _create_rowset(
+                    document[field['name']],
+                    field['akiban.fields'],
+                    ctx
+                )
+            )
+
+        else:
+            value = ctx.typecast(
+                            document[field['name']],
+                            field['type_oid']
+                        )
+        row.append(value)
+    return tuple(row)
+
+def _format_fields(document):
+    ret = []
+    for attrnum, rec in enumerate(document):
+        newrec = {
+            'table_oid': None,
+            'name': rec['name'],
+            'column_attrnum': attrnum,
+            'format': None,
+            'type_modifier': -1,
+            'type_size': -1
+        }
+        if 'columns' in rec:
+            newrec['type_oid'] = _NESTED_OID
+            newrec['akiban.fields'] = _format_fields(rec['columns'])
+        else:
+            newrec['type_oid'] = rec['oid']
+        ret.append(newrec)
+    return ret