Skip to content

Commit

Permalink
Clearer/better str and repr for expressions (#182)
Browse files Browse the repository at this point in the history
* better str and repr for expressions
  • Loading branch information
maartenbreddels committed Mar 18, 2019
1 parent f425b29 commit 99f0154
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 11 deletions.
67 changes: 56 additions & 11 deletions packages/vaex-core/vaex/expression.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,28 @@
import base64
import cloudpickle as pickle
import functools
import operator
import six
import functools

from future.utils import with_metaclass
import numpy as np
import tabulate

from vaex.functions import expression_namespace, _scopes
from vaex.utils import _ensure_strings_from_expressions, _ensure_string_from_expression
from vaex.column import ColumnString
import numpy as np
import vaex.serialize
import base64
import cloudpickle as pickle
from . import expresso


try:
from StringIO import StringIO
except ImportError:
from io import BytesIO as StringIO

# TODO: repeated from dataframe.py
default_shape = 128
PRINT_MAX_COUNT = 10

_binary_ops = [
dict(code="+", name='add', op=operator.add),
Expand Down Expand Up @@ -287,18 +293,57 @@ def tolist(self):
return self.evaluate().tolist()

def __repr__(self):
name = self.__class__.__module__ + "." + self.__class__.__name__
return self._repr_plain_()

def _repr_plain_(self):
def _format_value(value):
if isinstance(value, (str, bytes)):
if len(value) > 40:
value = repr(value[:37])[:-1] + '...'
if isinstance(value, np.ma.core.MaskedConstant):
value = str(value)
return value

def format(values):
for i in range(len(values)):
value = values[i]
yield _format_value(value)
colalign = ("right",) * 2
try:
N = len(self.ds)
if N <= 10:
values = ", ".join(str(k) for k in self.evaluate(0, N))
if N <= PRINT_MAX_COUNT:
values = format(self.evaluate(0, N))
values = tabulate.tabulate([[i, k] for i, k in enumerate(values)], tablefmt='plain', colalign=colalign)
else:
values_head = ", ".join(str(k) for k in self.evaluate(0, 5))
values_tail = ", ".join(str(k) for k in self.evaluate(N - 5, N))
values = '{} ... (total {} values) ... {}'.format(values_head, N, values_tail)
values_head = format(self.evaluate(0, PRINT_MAX_COUNT//2))
values_tail = format(self.evaluate(N - PRINT_MAX_COUNT//2, N))
values_head = list(zip(range(PRINT_MAX_COUNT//2), values_head)) +\
list(zip(range(N - PRINT_MAX_COUNT//2, N), values_tail))
values = tabulate.tabulate([k for k in values_head], tablefmt='plain', colalign=colalign)
values = values.split('\n')
width = max(map(len, values))
separator = '\n' + '...'.center(width, ' ') + '\n'
values = "\n".join(values[:PRINT_MAX_COUNT//2]) + separator + "\n".join(values[PRINT_MAX_COUNT//2:]) + '\n'
except Exception as e:
values = 'Error evaluating: %r' % e
return "<%s(expressions=%r)> instance at 0x%x values=[%s] " % (name, self.expression, id(self), values)
expression = self.expression
if len(expression) > 60:
expression = expression[:57] + '...'
info = 'Expression = ' + expression + '\n'
str_type = str
dtype = self.dtype
dtype = (str(dtype) if dtype != str_type else 'str')
if self.expression in self.ds.columns:
state = "column"
elif self.expression in self.ds.get_column_names(hidden=True):
state = "virtual column"
else:
state = "expression"
line = 'Length: {:,} dtype: {} ({})\n'.format(len(self.ds), dtype, state)
info += line
info += '-' * (len(line)-1) + '\n'
info += values
return info

def count(self, binby=[], limits=None, shape=default_shape, selection=False, delay=False, edges=False, progress=None):
'''Shortcut for ds.count(expression, ...), see `Dataset.count`'''
Expand Down
4 changes: 4 additions & 0 deletions tests/repr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ def test_mask(ds_local):
code = ds._repr_mimebundle_()['text/plain']
assert "'--'" not in code
assert "--" in code

def test_repr_expression(ds_local):
df = ds_local
assert 'Error' not in repr(df.x)

0 comments on commit 99f0154

Please sign in to comment.