Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make extending with new functions vaex more convenient #188

Merged
merged 2 commits into from Mar 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 23 additions & 2 deletions docs/source/api.rst
Expand Up @@ -55,7 +55,7 @@ vaex-core
---------

.. automodule:: vaex
:members: open, from_arrays, from_dict, from_items, from_arrow_table, from_csv, from_ascii, from_pandas, from_astropy_table, from_samp, open_many, server, example, app, delayed
:members: open, from_arrays, from_dict, from_items, from_arrow_table, from_csv, from_ascii, from_pandas, from_astropy_table, from_samp, open_many, register_function, server, example, app, delayed
:undoc-members:
:show-inheritance:

Expand All @@ -76,6 +76,27 @@ DataFrameLocal class
:special-members:


Expression class
~~~~~~~~~~~~~~~~

.. autoclass:: vaex.expression.Expression
:members:
:special-members:



String operations
~~~~~~~~~~~~~~~~

.. autoclass:: vaex.expression.StringOperations
:members:
:special-members:

.. autoclass:: vaex.expression.StringOperationsPandas
:members:
:special-members:


vaex.stat module
~~~~~~~~~~~~~~~~

Expand Down Expand Up @@ -250,7 +271,7 @@ Boosted trees
.. autoclass:: vaex.ml.lightgbm.LightGBMClassifier
:members:

.. autoclass:: vaex.ml.xgboost.XGBModel
.. autoclass:: vaex.ml.xgboost.XGBoostModel
:members:

.. PyGBM support is in the incubator phase, which means support may disappear in future versions
Expand Down
4 changes: 2 additions & 2 deletions docs/source/conf.py
Expand Up @@ -69,9 +69,9 @@
if 1:
import vaex
# The short X.Y version.
version = vaex.version
version = vaex.__version__
# The full version, including alpha/beta/rc tags.
release = vaex.version
release = vaex.__version__
else:
print("failed finding vaex module, try finding version")
import sys
Expand Down
1 change: 1 addition & 0 deletions packages/vaex-core/vaex/__init__.py
Expand Up @@ -36,6 +36,7 @@
import glob
import vaex.dataframe
import vaex.dataset
from vaex.functions import register_function
from . import stat
# import vaex.file
# import vaex.export
Expand Down
64 changes: 32 additions & 32 deletions packages/vaex-core/vaex/dataframe.py
Expand Up @@ -31,7 +31,7 @@
import logging
import vaex.kld
from . import selections, tasks, scopes
from .functions import expression_namespace
from .expression import expression_namespace
from .delayed import delayed, delayed_args, delayed_list
from .column import Column, ColumnIndexed, ColumnSparse, ColumnString, ColumnConcatenatedLazy, str_type
import vaex.events
Expand Down Expand Up @@ -209,6 +209,37 @@ def __getattr__(self, name):
else:
return object.__getattribute__(self, name)

@property
def func(self):
class Functions(object):
pass

functions = Functions()
for name, value in expression_namespace.items():
# f = vaex.expression.FunctionBuiltin(self, name)
def closure(name=name, value=value):
local_name = name
def wrap(*args, **kwargs):
def myrepr(k):
if isinstance(k, Expression):
return str(k)
else:
return repr(k)
arg_string = ", ".join([myrepr(k) for k in args] + ['{}={}'.format(name, myrepr(value)) for name, value in kwargs.items()])
expression = "{}({})".format(local_name, arg_string)
return vaex.expression.Expression(self, expression)
return wrap
f = closure()
try:
f = functools.wraps(value)(f)
except AttributeError:
pass # python2 quicks.. ?
setattr(functions, name, f)
for name, value in self.functions.items():
setattr(functions, name, value)

return functions

@_hidden
@vaex.utils.deprecated('use is_category')
def iscategory(self, column):
Expand Down Expand Up @@ -4452,37 +4483,6 @@ class Datas(object):
setattr(datas, name, array)
return datas

@property
def func(self):
class Functions(object):
pass

functions = Functions()
for name, value in expression_namespace.items():
# f = vaex.expression.FunctionBuiltin(self, name)
def closure(name=name, value=value):
local_name = name
def wrap(*args, **kwargs):
def myrepr(k):
if isinstance(k, Expression):
return str(k)
else:
return repr(k)
arg_string = ", ".join([myrepr(k) for k in args] + ['{}={}'.format(name, myrepr(value)) for name, value in kwargs.items()])
expression = "{}({})".format(local_name, arg_string)
return vaex.expression.Expression(self, expression)
return wrap
f = closure()
try:
f = functools.wraps(value)(f)
except AttributeError:
pass # python2 quicks.. ?
setattr(functions, name, f)
for name, value in self.functions.items():
setattr(functions, name, value)

return functions

def copy(self, column_names=None, virtual=True):
df = DataFrameArrays()
df._length_unfiltered = self._length_unfiltered
Expand Down
63 changes: 14 additions & 49 deletions packages/vaex-core/vaex/expression.py
Expand Up @@ -8,7 +8,6 @@
import numpy as np
import tabulate

from vaex.functions import expression_namespace, _scopes
from vaex.utils import _ensure_strings_from_expressions, _ensure_string_from_expression
from vaex.column import ColumnString
import vaex.serialize
Expand All @@ -24,6 +23,10 @@
default_shape = 128
PRINT_MAX_COUNT = 10

expression_namespace = {}
expression_namespace['nan'] = np.nan


_binary_ops = [
dict(code="+", name='add', op=operator.add),
dict(code="in", name='contains', op=operator.contains),
Expand Down Expand Up @@ -100,33 +103,11 @@ def f(a):
return Expression(self.ds, expression=expression)
attrs['__%s__' % op['name']] = f
wrap(op)
for name, func_real in expression_namespace.items():
def wrap(name=name):
def f(*args, **kwargs):
self = args[0]

def to_expression(expression):
if isinstance(expression, str):
expression = repr(expression)
if isinstance(expression, Expression):
assert expression.ds == self.ds
expression = expression.expression
return expression
expressions = [to_expression(e) for e in args]
# print(name, expressions)
expression = '{0}({1})'.format(name, ", ".join(expressions))
return Expression(self.ds, expression=expression)
try:
f = functools.wraps(func_real)(f)
except AttributeError:
pass # numpy ufuncs don't have a __module__, which may choke wraps

attrs['%s' % name] = f
if name not in attrs:
wrap(name)
return type(future_class_name, future_class_parents, attrs)


class DateTime(object):
"""DateTime operations"""
def __init__(self, expression):
self.expression = expression

Expand All @@ -150,37 +131,21 @@ def hour(self):
def weekofyear(self):
return self.expression.ds.func.dt_weekofyear(self.expression)


class StringOperations(object):
"""String operations"""
def __init__(self, expression):
self.expression = expression

class PandasStringOperations(object):

class StringOperationsPandas(object):
"""String operations using Pandas Series"""
def __init__(self, expression):
self.expression = expression

for name, function in _scopes['str'].items():
full_name = 'str_' + name
def closure(name=name, full_name=full_name, function=function):
def wrapper(self, *args, **kwargs):
lazy_func = getattr(self.expression.ds.func, full_name)
args = (self.expression, ) + args
return lazy_func(*args, **kwargs)
return wrapper
setattr(StringOperations, name, closure())

for name, function in _scopes['str_pandas'].items():
full_name = 'str_pandas_' + name
def closure(name=name, full_name=full_name, function=function):
def wrapper(self, *args, **kwargs):
lazy_func = getattr(self.expression.ds.func, full_name)
args = (self.expression, ) + args
return lazy_func(*args, **kwargs)
return wrapper
setattr(PandasStringOperations, name, closure())

class Expression(with_metaclass(Meta)):
"""Expression class"""
def __init__(self, ds, expression):
self.ds = ds
if isinstance(expression, Expression):
Expand All @@ -199,7 +164,7 @@ def str(self):
@property
def str_pandas(self):
"""Gives access to string operations (using Pandas Series)"""
return PandasStringOperations(self)
return StringOperationsPandas(self)

@property
def values(self):
Expand Down Expand Up @@ -475,7 +440,7 @@ def jit_numba(self, verbose=False):
import imp
import hashlib
names = []
funcs = set(vaex.dataset.expression_namespace.keys())
funcs = set(expression_namespace.keys())
# if it's a virtual column, we probably want to optimize that
# TODO: fully extract the virtual columns, i.e. depending ones?
expression = self.expression
Expand Down Expand Up @@ -504,7 +469,7 @@ def jit_pythran(self, verbose=False):
import hashlib
# self._import_all(module)
names = []
funcs = set(vaex.dataset.expression_namespace.keys())
funcs = set(expression_namespace.keys())
expression = self.expression
if expression in self.ds.virtual_columns:
expression = self.ds.virtual_columns[self.expression]
Expand All @@ -529,7 +494,7 @@ def f({0}):

module = imp.load_dynamic(module_name, module_path)
function_name = "f_" + m.hexdigest()
vaex.dataset.expression_namespace[function_name] = module.f
expression_namespace[function_name] = module.f

return Expression(self.ds, "{0}({1})".format(function_name, argstring))
finally:
Expand Down