Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 2581 -- support for postgresql json type #50

Merged
merged 6 commits into from Dec 17, 2013
@@ -14,6 +14,7 @@
DATE, BYTEA, BOOLEAN, INTERVAL, ARRAY, ENUM, dialect, array, Any, All
from .constraints import ExcludeConstraint
from .hstore import HSTORE, hstore
from .pgjson import JSON
from .ranges import INT4RANGE, INT8RANGE, NUMRANGE, DATERANGE, TSRANGE, \
TSTZRANGE

@@ -23,5 +24,5 @@
'DOUBLE_PRECISION', 'TIMESTAMP', 'TIME', 'DATE', 'BYTEA', 'BOOLEAN',
'INTERVAL', 'ARRAY', 'ENUM', 'dialect', 'Any', 'All', 'array', 'HSTORE',
'hstore', 'INT4RANGE', 'INT8RANGE', 'NUMRANGE', 'DATERANGE',
'TSRANGE', 'TSTZRANGE'
'TSRANGE', 'TSTZRANGE', 'json', 'JSON'
)
@@ -1187,6 +1187,9 @@ def visit_BIGINT(self, type_):
def visit_HSTORE(self, type_):
return "HSTORE"

def visit_JSON(self, type_):
return "JSON"

def visit_INT4RANGE(self, type_):
return "INT4RANGE"

@@ -0,0 +1,109 @@
# postgresql/json.py
# Copyright (C) 2005-2013 the SQLAlchemy authors and contributors <see AUTHORS file>
#
# This module is part of SQLAlchemy and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php

import json

from .base import ARRAY, ischema_names
from ... import types as sqltypes
from ...sql import functions as sqlfunc
from ...sql.operators import custom_op
from ... import util

__all__ = ('JSON', 'json')


class JSON(sqltypes.TypeEngine):
"""Represent the Postgresql HSTORE type.

This comment has been minimized.

Copy link
@zzzeek

zzzeek Dec 9, 2013

Owner

change this to JSON ;)

The :class:`.JSON` type stores arbitrary JSON format data, e.g.::
data_table = Table('data_table', metadata,
Column('id', Integer, primary_key=True),
Column('data', JSON)
)
with engine.connect() as conn:
conn.execute(
data_table.insert(),
data = {"key1": "value1", "key2": "value2"}
)
:class:`.JSON` provides two operations:
* Index operations::
data_table.c.data['some key'] == 'some value'
* Path Index operations::
data_table.c.data.get_path('{key_1, key_2, ..., key_n}']
Please be aware that when used with the SQL Alchemy ORM, you will need to

This comment has been minimized.

Copy link
@zzzeek

zzzeek Dec 9, 2013

Owner

SQLAlchemy, seems like a misbehaving spellchecker

replace the JSON object present on an attribute with a new object in order
for any changes to be properly persisted.
.. versionadded:: 0.9
"""

__visit_name__ = 'JSON'

def __init__(self, json_serializer=None, json_deserializer=None):
if json_serializer:
self.json_serializer = json_serializer
else:
self.json_serializer = json.dumps
if json_deserializer:
self.json_deserializer = json_deserializer
else:
self.json_deserializer = json.loads

class comparator_factory(sqltypes.Concatenable.Comparator):
"""Define comparison operations for :class:`.JSON`."""

def __getitem__(self, other):
"""Text expression. Get the value at a given key."""
# I'm choosing to return text here so the result can be cast,
# compared with strings, etc.
#
# The only downside to this is that you cannot dereference more
# than one level deep in json structures, though comparator
# support for multi-level dereference is lacking anyhow.
return self.expr.op('->>', precedence=5)(other)

def get_path(self, other):

This comment has been minimized.

Copy link
@zzzeek

zzzeek Dec 17, 2013

Owner

im thinking of streamlining the operators here. Looking at http://www.postgresql.org/docs/9.3/static/functions-json.html it seems like our operators are: getitem, getarray, and then both have the option to be "text".

so how about this:

json_col['some_element']    # get the element
json_col.astext['some_element']    # get the element as text
json_col[('a', 'b', 2)]  # get by path
json_col.astext[('a', 'b', 2)] # get by path as text

seems like also these ops are only in pg 9.3....ill add some qualifiers to the tests and I also want to add some round trips for these. going to install 9.3 now...

This comment has been minimized.

Copy link
@nathan-rice

nathan-rice Dec 17, 2013

Author Contributor

That is a much nicer interface, for sure.

What all are you planning on coding? I'm happy to code up all the stuff you mentioned, I just don't want to duplicate work.

This comment has been minimized.

Copy link
@zzzeek

zzzeek Dec 17, 2013

Owner

i think you're done! im just going to try to test this stuff out and familiarize, make that change, add a few more tests. you've laid out all the groundwork here so thanks!

"""Text expression. Get the value at a given path. Paths are of
the form {key_1, key_2, ..., key_n}."""
return self.expr.op('#>>', precedence=5)(other)

def _adapt_expression(self, op, other_comparator):
if isinstance(op, custom_op):
if op.opstring == '->':
return op, sqltypes.Text
return sqltypes.Concatenable.Comparator.\
_adapt_expression(self, op, other_comparator)

def bind_processor(self, dialect):
if util.py2k:
encoding = dialect.encoding
def process(value):
return self.json_serializer(value).encode(encoding)
else:
def process(value):
return self.json_serializer(value)
return process

def result_processor(self, dialect, coltype):
if util.py2k:
encoding = dialect.encoding
def process(value):
return self.json_deserializer(value.decode(encoding))
else:
def process(value):
return self.json_deserializer(value)
return process

This comment has been minimized.

Copy link
@zzzeek

This comment has been minimized.

Copy link
@nathan-rice

nathan-rice Dec 9, 2013

Author Contributor

Thanks for this note. It turns out that psycopg2's json support is pretty recent (up to date ubuntu packages don't have it), and it is on by default, so this code will actually error tests if psycopg2 is up to date. I'll see what I can do about this.

This comment has been minimized.

Copy link
@nathan-rice

nathan-rice Dec 9, 2013

Author Contributor

There is a problem having a use_native_json kwarg. Specifically, in order to not use native json with psycopg2 >= 2.5, you have to monkey-patch the module. I could check on dialect init to see if the module has been monkey-patched and revert if necessary, but I think it would be better to just pass on letting the user turn off json processing.

This comment has been minimized.

Copy link
@zzzeek

zzzeek Dec 10, 2013

Owner

that's not a problem, we don't need a "use_native_json" kwarg then - just use psycopg2's facility when it is detected (though we have to figure out a decent way to test the non-psycopg2 version considering that the tests usually run just on psycopg2...)

This comment has been minimized.

Copy link
@zzzeek

zzzeek Dec 10, 2013

Owner

ok we maybe can test it if we detect psycopg2 using their Json object with a null-dumper, but don't worry about that part, that's kind of intricate and I'll figure out how that might work. if we can just get tests to pass for now that's enough for the PR.



ischema_names['json'] = JSON
@@ -15,7 +15,8 @@
from sqlalchemy import exc, schema, types
from sqlalchemy.dialects.postgresql import base as postgresql
from sqlalchemy.dialects.postgresql import HSTORE, hstore, array, \
INT4RANGE, INT8RANGE, NUMRANGE, DATERANGE, TSRANGE, TSTZRANGE
INT4RANGE, INT8RANGE, NUMRANGE, DATERANGE, TSRANGE, TSTZRANGE, \
JSON
import decimal
from sqlalchemy import util
from sqlalchemy.testing.util import round_decimal
@@ -1651,3 +1652,157 @@ def _data_str(self):

def _data_obj(self):
return self.extras.DateTimeTZRange(*self.tstzs())


class JSONTest(fixtures.TestBase):
def _assert_sql(self, construct, expected):
dialect = postgresql.dialect()
compiled = str(construct.compile(dialect=dialect))
compiled = re.sub(r'\s+', ' ', compiled)
expected = re.sub(r'\s+', ' ', expected)
eq_(compiled, expected)

def setup(self):
metadata = MetaData()
self.test_table = Table('test_table', metadata,
Column('id', Integer, primary_key=True),
Column('test_column', JSON)
)
self.jsoncol = self.test_table.c.test_column

def _test_where(self, whereclause, expected):
stmt = select([self.test_table]).where(whereclause)
self._assert_sql(
stmt,
"SELECT test_table.id, test_table.test_column FROM test_table "
"WHERE %s" % expected
)

def _test_cols(self, colclause, expected, from_=True):
stmt = select([colclause])
self._assert_sql(
stmt,
(
"SELECT %s" +
(" FROM test_table" if from_ else "")
) % expected
)

def test_bind_serialize_default(self):
from sqlalchemy.engine import default

dialect = default.DefaultDialect()
proc = self.test_table.c.test_column.type._cached_bind_processor(dialect)
eq_(
proc({"A": [1, 2, 3, True, False]}),
'{"A": [1, 2, 3, true, false]}'
)

def test_result_deserialize_default(self):
from sqlalchemy.engine import default

dialect = default.DefaultDialect()
proc = self.test_table.c.test_column.type._cached_result_processor(
dialect, None)
eq_(
proc('{"A": [1, 2, 3, true, false]}'),
{"A": [1, 2, 3, True, False]}
)

# This test is a bit misleading -- in real life you will need to cast to do anything
def test_where_getitem(self):
self._test_where(
self.jsoncol['bar'] == None,
"(test_table.test_column ->> %(test_column_1)s) IS NULL"
)

def test_where_path(self):
self._test_where(
self.jsoncol.get_path('{"foo", 1}') == None,
"(test_table.test_column #>> %(test_column_1)s) IS NULL"
)

def test_cols_get(self):
self._test_cols(
self.jsoncol['foo'],
"test_table.test_column ->> %(test_column_1)s AS anon_1",
True
)


class JSONRoundTripTest(fixtures.TablesTest):

This comment has been minimized.

Copy link
@zzzeek

zzzeek Dec 17, 2013

Owner

great job on this

__only_on__ = 'postgresql'

@classmethod
def define_tables(cls, metadata):
Table('data_table', metadata,
Column('id', Integer, primary_key=True),
Column('name', String(30), nullable=False),
Column('data', JSON)
)

def _fixture_data(self, engine):
data_table = self.tables.data_table
engine.execute(
data_table.insert(),
{'name': 'r1', 'data': {"k1": "r1v1", "k2": "r1v2"}},
{'name': 'r2', 'data': {"k1": "r2v1", "k2": "r2v2"}},
{'name': 'r3', 'data': {"k1": "r3v1", "k2": "r3v2"}},
{'name': 'r4', 'data': {"k1": "r4v1", "k2": "r4v2"}},
{'name': 'r5', 'data': {"k1": "r5v1", "k2": "r5v2"}},
)

def _assert_data(self, compare):
data = testing.db.execute(
select([self.tables.data_table.c.data]).
order_by(self.tables.data_table.c.name)
).fetchall()
eq_([d for d, in data], compare)

def _test_insert(self, engine):
engine.execute(
self.tables.data_table.insert(),
{'name': 'r1', 'data': {"k1": "r1v1", "k2": "r1v2"}}
)
self._assert_data([{"k1": "r1v1", "k2": "r1v2"}])

def _non_native_engine(self):
if testing.against("postgresql+psycopg2"):
engine = engines.testing_engine(options=dict(use_native_hstore=False))

This comment has been minimized.

Copy link
@zzzeek

zzzeek Dec 9, 2013

Owner

yeah this would have to be "use_native_json=False" once that flag is implemented

else:
engine = testing.db
engine.connect()
return engine

def test_reflect(self):
from sqlalchemy import inspect
insp = inspect(testing.db)
cols = insp.get_columns('data_table')
assert isinstance(cols[2]['type'], JSON)

@testing.only_on("postgresql+psycopg2")
def test_insert_native(self):
engine = testing.db
self._test_insert(engine)

def test_insert_python(self):
engine = self._non_native_engine()
self._test_insert(engine)

@testing.only_on("postgresql+psycopg2")
def test_criterion_native(self):
engine = testing.db
self._fixture_data(engine)
self._test_criterion(engine)

def test_criterion_python(self):
engine = self._non_native_engine()
self._fixture_data(engine)
self._test_criterion(engine)

def _test_criterion(self, engine):
data_table = self.tables.data_table
result = engine.execute(
select([data_table.c.data]).where(data_table.c.data['k1'] == 'r3v1')
).first()
eq_(result, ({'k1': 'r3v1', 'k2': 'r3v2'},))
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.