Skip to content

Commit

Permalink
Allow overriding true/false/null values in ColumnTypes. Closes #206.
Browse files Browse the repository at this point in the history
  • Loading branch information
onyxfish committed Sep 3, 2015
1 parent 50515c0 commit 14f41d1
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 16 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
0.7.0
-----

* Values to coerce to true/false can now be overridden for BooleanType.
* Values to coerce to null can now be overridden for all ColumnType subclasses. (#206)
* Add key_type argument to TableSet and Table.group_by. (#205)
* Nested TableSet's and multi-dimensional aggregates. (#204)
* TableSet.aggregate will now use key_name as the group column name. (#203)
Expand Down
53 changes: 37 additions & 16 deletions agate/column_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,26 +22,43 @@

from agate.exceptions import CastError

#: String values which will be automatically cast to :code:`None`.
NULL_VALUES = ('', 'na', 'n/a', 'none', 'null', '.')
#: Default values which will be automatically cast to :code:`None`
DEFAULT_NULL_VALUES = ('', 'na', 'n/a', 'none', 'null', '.')

#: String values which will be automatically cast to :code:`True`.
TRUE_VALUES = ('yes', 'y', 'true', 't')
#: Default values which will be automatically cast to :code:`True`.
DEFAULT_TRUE_VALUES = ('yes', 'y', 'true', 't')

#: String values which will be automatically cast to :code:`False`.
FALSE_VALUES = ('no', 'n', 'false', 'f')
#: Default values which will be automatically cast to :code:`False`.
DEFAULT_FALSE_VALUES = ('no', 'n', 'false', 'f')

class ColumnType(object): #pragma: no cover
"""
Base class for column data types.
:param null_values: A sequence of values which should be cast to
:code:`None` when encountered with this type.
"""
def __init__(self, null_values=DEFAULT_NULL_VALUES):
self.null_values = null_values

def _create_column(self, table, index):
raise NotImplementedError

class BooleanType(ColumnType):
"""
Column type for :class:`BooleanColumn`.
:param true_values: A sequence of values which should be cast to
:code:`True` when encountered with this type.
:param false_values: A sequence of values which should be cast to
:code:`False` when encountered with this type.
"""
def __init__(self, true_values=DEFAULT_TRUE_VALUES, false_values=DEFAULT_FALSE_VALUES, null_values=DEFAULT_NULL_VALUES):
super(BooleanType, self).__init__(null_values=null_values)

self.true_values = true_values
self.false_values = false_values

def cast(self, d):
"""
Cast a single value to :class:`bool`.
Expand All @@ -57,13 +74,13 @@ def cast(self, d):

d_lower = d.lower()

if d_lower in NULL_VALUES:
if d_lower in self.null_values:
return None

if d_lower in TRUE_VALUES:
if d_lower in self.true_values:
return True

if d_lower in FALSE_VALUES:
if d_lower in self.false_values:
return False

raise CastError('Can not convert value %s to bool for BooleanColumn.' % d)
Expand All @@ -77,7 +94,9 @@ class DateType(ColumnType):
"""
Column type for :class:`DateColumn`.
"""
def __init__(self, date_format=None):
def __init__(self, date_format=None, null_values=DEFAULT_NULL_VALUES):
super(DateType, self).__init__(null_values=null_values)

self.date_format = date_format

def cast(self, d):
Expand All @@ -94,7 +113,7 @@ def cast(self, d):
if isinstance(d, six.string_types):
d = d.strip()

if d.lower() in NULL_VALUES:
if d.lower() in self.null_values:
return None

if self.date_format:
Expand All @@ -111,7 +130,9 @@ class DateTimeType(ColumnType):
"""
Column type for :class:`DateTimeColumn`.
"""
def __init__(self, datetime_format=None):
def __init__(self, datetime_format=None, null_values=DEFAULT_NULL_VALUES):
super(DateTimeType, self).__init__(null_values=null_values)

self.datetime_format = datetime_format

def cast(self, d):
Expand All @@ -128,7 +149,7 @@ def cast(self, d):
if isinstance(d, six.string_types):
d = d.strip()

if d.lower() in NULL_VALUES:
if d.lower() in self.null_values:
return None

if self.datetime_format:
Expand Down Expand Up @@ -158,7 +179,7 @@ def cast(self, d):
if isinstance(d, six.string_types):
d = d.strip()

if d.lower() in NULL_VALUES:
if d.lower() in self.null_values:
return None

seconds = pytimeparse.parse(d)
Expand Down Expand Up @@ -187,7 +208,7 @@ def cast(self, d):
if isinstance(d, six.string_types):
d = d.replace(',' ,'').strip()

if d.lower() in NULL_VALUES:
if d.lower() in self.null_values:
return None

if isinstance(d, float):
Expand Down Expand Up @@ -220,7 +241,7 @@ def cast(self, d):
if isinstance(d, six.string_types):
d = d.strip()

if d.lower() in NULL_VALUES:
if d.lower() in self.null_values:
return None

return six.text_type(d)
Expand Down
10 changes: 10 additions & 0 deletions tests/test_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,16 @@ def test_boolean_cast(self):
casted = tuple(BooleanType().cast(v) for v in values)
self.assertSequenceEqual(casted, (True, True, None, False, False, None))

def test_boolean_cast_custom_strings(self):
values = ('a', 'b', 'c', 'd', 'e', 'f')
boolean_type = BooleanType(
true_values=('a', 'b'),
false_values=('d', 'e'),
null_values=('c', 'f')
)
casted = tuple(boolean_type.cast(v) for v in values)
self.assertSequenceEqual(casted, (True, True, None, False, False, None))

def test_number(self):
self.assertIsInstance(NumberType()._create_column(None, 1), NumberColumn)

Expand Down

0 comments on commit 14f41d1

Please sign in to comment.