Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 2581 -- support for postgresql json type #50

Merged
merged 6 commits into from Dec 17, 2013
@@ -14,6 +14,7 @@
DATE, BYTEA, BOOLEAN, INTERVAL, ARRAY, ENUM, dialect, array, Any, All
from .constraints import ExcludeConstraint
from .hstore import HSTORE, hstore
from .pgjson import JSON
from .ranges import INT4RANGE, INT8RANGE, NUMRANGE, DATERANGE, TSRANGE, \
TSTZRANGE

@@ -23,5 +24,5 @@
'DOUBLE_PRECISION', 'TIMESTAMP', 'TIME', 'DATE', 'BYTEA', 'BOOLEAN',
'INTERVAL', 'ARRAY', 'ENUM', 'dialect', 'Any', 'All', 'array', 'HSTORE',
'hstore', 'INT4RANGE', 'INT8RANGE', 'NUMRANGE', 'DATERANGE',
'TSRANGE', 'TSTZRANGE'
'TSRANGE', 'TSTZRANGE', 'json', 'JSON'
)
@@ -1187,6 +1187,9 @@ def visit_BIGINT(self, type_):
def visit_HSTORE(self, type_):
return "HSTORE"

def visit_JSON(self, type_):
return "JSON"

def visit_INT4RANGE(self, type_):
return "INT4RANGE"

@@ -0,0 +1,128 @@
# postgresql/json.py
# Copyright (C) 2005-2013 the SQLAlchemy authors and contributors <see AUTHORS file>
#
# This module is part of SQLAlchemy and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php

import json

from .base import ARRAY, ischema_names
from ... import types as sqltypes
from ...sql import functions as sqlfunc
from ...sql.operators import custom_op
from ... import util

__all__ = ('JSON', 'json')


class JSON(sqltypes.TypeEngine):
"""Represent the Postgresql JSON type.
The :class:`.JSON` type stores arbitrary JSON format data, e.g.::
data_table = Table('data_table', metadata,
Column('id', Integer, primary_key=True),
Column('data', JSON)
)
with engine.connect() as conn:
conn.execute(
data_table.insert(),
data = {"key1": "value1", "key2": "value2"}
)
:class:`.JSON` provides several operations:
* Index operations::
data_table.c.data['some key']
* Index operations returning text (required for text comparison or casting)::
data_table.c.data.get_item_as_text('some key') == 'some value'
* Path index operations::
data_table.c.data.get_path("{key_1, key_2, ..., key_n}")
* Path index operations returning text (required for text comparison or casting)::
data_table.c.data.get_path("{key_1, key_2, ..., key_n}") == 'some value'
Please be aware that when used with the SQLAlchemy ORM, you will need to
replace the JSON object present on an attribute with a new object in order
for any changes to be properly persisted.
.. versionadded:: 0.9
"""

__visit_name__ = 'JSON'

def __init__(self, json_serializer=None, json_deserializer=None):
if json_serializer:
self.json_serializer = json_serializer
else:
self.json_serializer = json.dumps
if json_deserializer:
self.json_deserializer = json_deserializer
else:
self.json_deserializer = json.loads

class comparator_factory(sqltypes.Concatenable.Comparator):
"""Define comparison operations for :class:`.JSON`."""

def __getitem__(self, other):
"""Text expression. Get the value at a given key."""
# I'm choosing to return text here so the result can be cast,
# compared with strings, etc.
#
# The only downside to this is that you cannot dereference more
# than one level deep in json structures, though comparator
# support for multi-level dereference is lacking anyhow.
return self.expr.op('->', precedence=5)(other)

def get_item_as_text(self, other):
"""Text expression. Get the value at the given key as text. Use
this when you need to cast the type of the returned value."""
return self.expr.op('->>', precedence=5)(other)

def get_path(self, other):

This comment has been minimized.

Copy link
@zzzeek

zzzeek Dec 17, 2013

Owner

im thinking of streamlining the operators here. Looking at http://www.postgresql.org/docs/9.3/static/functions-json.html it seems like our operators are: getitem, getarray, and then both have the option to be "text".

so how about this:

json_col['some_element']    # get the element
json_col.astext['some_element']    # get the element as text
json_col[('a', 'b', 2)]  # get by path
json_col.astext[('a', 'b', 2)] # get by path as text

seems like also these ops are only in pg 9.3....ill add some qualifiers to the tests and I also want to add some round trips for these. going to install 9.3 now...

This comment has been minimized.

Copy link
@nathan-rice

nathan-rice Dec 17, 2013

Author Contributor

That is a much nicer interface, for sure.

What all are you planning on coding? I'm happy to code up all the stuff you mentioned, I just don't want to duplicate work.

This comment has been minimized.

Copy link
@zzzeek

zzzeek Dec 17, 2013

Owner

i think you're done! im just going to try to test this stuff out and familiarize, make that change, add a few more tests. you've laid out all the groundwork here so thanks!

"""Text expression. Get the value at a given path. Paths are of
the form {key_1, key_2, ..., key_n}."""
return self.expr.op('#>', precedence=5)(other)

def get_path_as_text(self, other):
"""Text expression. Get the value at a given path, as text.
Paths are of the form {key_1, key_2, ..., key_n}. Use this when
you need to cast the type of the returned value."""
return self.expr.op('#>>', precedence=5)(other)

def _adapt_expression(self, op, other_comparator):
if isinstance(op, custom_op):
if op.opstring == '->':
return op, sqltypes.Text
return sqltypes.Concatenable.Comparator.\
_adapt_expression(self, op, other_comparator)

def bind_processor(self, dialect):
if util.py2k:
encoding = dialect.encoding
def process(value):
return self.json_serializer(value).encode(encoding)
else:
def process(value):
return self.json_serializer(value)
return process

def result_processor(self, dialect, coltype):
if util.py2k:
encoding = dialect.encoding
def process(value):
return self.json_deserializer(value.decode(encoding))
else:
def process(value):
return self.json_deserializer(value)
return process

This comment has been minimized.

Copy link
@zzzeek

This comment has been minimized.

Copy link
@nathan-rice

nathan-rice Dec 9, 2013

Author Contributor

Thanks for this note. It turns out that psycopg2's json support is pretty recent (up to date ubuntu packages don't have it), and it is on by default, so this code will actually error tests if psycopg2 is up to date. I'll see what I can do about this.

This comment has been minimized.

Copy link
@nathan-rice

nathan-rice Dec 9, 2013

Author Contributor

There is a problem having a use_native_json kwarg. Specifically, in order to not use native json with psycopg2 >= 2.5, you have to monkey-patch the module. I could check on dialect init to see if the module has been monkey-patched and revert if necessary, but I think it would be better to just pass on letting the user turn off json processing.

This comment has been minimized.

Copy link
@zzzeek

zzzeek Dec 10, 2013

Owner

that's not a problem, we don't need a "use_native_json" kwarg then - just use psycopg2's facility when it is detected (though we have to figure out a decent way to test the non-psycopg2 version considering that the tests usually run just on psycopg2...)

This comment has been minimized.

Copy link
@zzzeek

zzzeek Dec 10, 2013

Owner

ok we maybe can test it if we detect psycopg2 using their Json object with a null-dumper, but don't worry about that part, that's kind of intricate and I'll figure out how that might work. if we can just get tests to pass for now that's enough for the PR.



ischema_names['json'] = JSON
@@ -179,6 +179,7 @@
ENUM, ARRAY, _DECIMAL_TYPES, _FLOAT_TYPES,\
_INT_TYPES
from .hstore import HSTORE
from .pgjson import JSON


logger = logging.getLogger('sqlalchemy.dialects.postgresql')
@@ -243,6 +244,17 @@ def result_processor(self, dialect, coltype):
else:
return super(_PGHStore, self).result_processor(dialect, coltype)


class _PGJSON(JSON):
# I've omitted the bind processor here because the method of serializing
# involves registering specific types to auto-serialize, and the adapter
# just a thin wrapper over json.dumps.
def result_processor(self, dialect, coltype):
if dialect._has_native_json:
return None
else:
return super(_PGJSON, self).result_processor(dialect, coltype)

# When we're handed literal SQL, ensure it's a SELECT-query. Since
# 8.3, combining cursors and "FOR UPDATE" has been fine.
SERVER_SIDE_CURSOR_RE = re.compile(
@@ -327,6 +339,7 @@ class PGDialect_psycopg2(PGDialect):
psycopg2_version = (0, 0)

_has_native_hstore = False
_has_native_json = False

colspecs = util.update_copy(
PGDialect.colspecs,
@@ -336,6 +349,7 @@ class PGDialect_psycopg2(PGDialect):
sqltypes.Enum: _PGEnum, # needs force_unicode
ARRAY: _PGArray, # needs force_unicode
HSTORE: _PGHStore,
JSON: _PGJSON
}
)

@@ -363,6 +377,7 @@ def initialize(self, connection):
self._has_native_hstore = self.use_native_hstore and \
self._hstore_oids(connection.connection) \
is not None
self._has_native_json = self.psycopg2_version >= (2, 5)

This comment has been minimized.

Copy link
@zzzeek

zzzeek Dec 17, 2013

Owner

nailed it! nice.


@classmethod
def dbapi(cls):
Oops, something went wrong.
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.