Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Transformation Model and its routes #39

Merged
merged 10 commits into from
Nov 17, 2023
6 changes: 4 additions & 2 deletions sand/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from sand.commands.load import load_dataset
from sand.container import use_container
from sand.helpers.dependency_injection import use_auto_inject
from sand.models import Project, SemanticModel, Table, TableRow
from sand.models import Project, SemanticModel, Table, TableRow, Transformation
from sand.models import db as dbconn
from sand.models import init_db

Expand All @@ -21,7 +21,9 @@
def init(db):
"""Init database"""
init_db(db)
dbconn.create_tables([Project, Table, TableRow, SemanticModel], safe=True)
dbconn.create_tables(
[Project, Table, TableRow, SemanticModel, Transformation], safe=True
)
if Project.select().where(fn.Lower(Project.name) == "default").count() == 0:
Project(name="Default", description="The default project").save()

Expand Down
4 changes: 2 additions & 2 deletions sand/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from sand.controllers.search import search_bp
from sand.controllers.settings import setting_bp
from sand.controllers.table import table_bp, table_row_bp
from sand.controllers.transform import transform_bp
from sand.controllers.transformation import transformation_bp
from sand.helpers.namespace import NamespaceService
from sand.models import EntityAR, SemanticModel
from sand.models.ontology import OntClassAR, OntPropertyAR
Expand All @@ -33,7 +33,7 @@ def get_flask_app(
table_row_bp,
setting_bp,
search_bp,
transform_bp,
transformation_bp,
generate_api(
SemanticModel,
deserializers={"data": sand_deser.deserialize_graph},
Expand Down
156 changes: 90 additions & 66 deletions sand/controllers/transform.py → sand/controllers/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,26 @@

from sand.models.table import Link, Table, TableRow
from gena.deserializer import get_dataclass_deserializer
from gena import generate_api
from sand.models import Transformation

transform_bp = Blueprint("transform", "transform")
transformation_bp = generate_api(Transformation)


@dataclass
class Context:
""" Context dataclass to access the row of the cell that is being transformed."""
"""Context dataclass to access the row of the cell that is being transformed."""

index: int
row: List[Union[str, float]]


@dataclass
class TransformRequestPayload:
"""Request Payload dataclass to validate the request obtained from the API call"""

type: Literal["map", "filter", "split", "concatenate"]
tableId: int
punith300i marked this conversation as resolved.
Show resolved Hide resolved
mode: str
datapath: Union[str, List[str]]
code: str
Expand All @@ -47,10 +52,10 @@ class Tdata(TypedDict):
def filter_traceback_errors() -> str:
"""Filters traceback errors, removes sensitive information

Args:
Args:

Returns:
Error String without the sensitive information.
Returns:
Error String without the sensitive information.
"""
(exc, value, tb) = sys.exc_info()
tb = tb.tb_next
Expand All @@ -61,17 +66,20 @@ def filter_traceback_errors() -> str:
ItemIndex = int


def transform_map(transform_func: Callable[[Any, Context], Any], data: Iterable[Tuple[ItemIndex, Item, Context]],
tolerance: int) -> List[Tdata]:
def transform_map(
transform_func: Callable[[Any, Context], Any],
data: Iterable[Tuple[ItemIndex, Item, Context]],
tolerance: int,
) -> List[Tdata]:
"""Implements map transform, performs map operation over each cell, for a given column

Args:
transform_func: User defined python function defined by the user
data: iterable with Column data and context object
tolerance: contains the API request data
Args:
transform_func: User defined python function defined by the user
data: iterable with Column data and context object
tolerance: contains the API request data

Returns:
list of Tdata objects, data transformed after applying map transform
Returns:
list of Tdata objects, data transformed after applying map transform
"""
transformed_data = []
for path, value, context in data:
Expand All @@ -90,20 +98,23 @@ def transform_map(transform_func: Callable[[Any, Context], Any], data: Iterable[
return transformed_data


def transform_filter(transform_func: Callable[[Any, Context], Any], data: Iterable[Tuple[ItemIndex, Item, Context]],
tolerance: int) -> List[Tdata]:
def transform_filter(
transform_func: Callable[[Any, Context], Any],
data: Iterable[Tuple[ItemIndex, Item, Context]],
tolerance: int,
) -> List[Tdata]:
"""Implements filter transform, performs filter operation over each cell, for a given column

Args:
transform_func: User defined python function defined by the user
data: iterable with Column data and context object
tolerance: contains the API request data
Args:
transform_func: User defined python function defined by the user
data: iterable with Column data and context object
tolerance: contains the API request data

Returns:
list of Tdata objects, data transformed after applying filter transform
Returns:
list of Tdata objects, data transformed after applying filter transform

Raises:
BadRequest: An error occurred when the transform_func on execution, does not return a boolean
Raises:
BadRequest: An error occurred when the transform_func on execution, does not return a boolean
"""
transformed_data = []
for path, value, context in data:
Expand All @@ -124,20 +135,23 @@ def transform_filter(transform_func: Callable[[Any, Context], Any], data: Iterab
return transformed_data


def transform_split(transform_func: Callable[[Any, Context], Any], data: Iterable[Tuple[ItemIndex, Item, Context]],
tolerance: int) -> List[Tdata]:
def transform_split(
transform_func: Callable[[Any, Context], Any],
data: Iterable[Tuple[ItemIndex, Item, Context]],
tolerance: int,
) -> List[Tdata]:
"""Implements split transform, performs split operation over each cell, for a given column

Args:
transform_func: User defined python function defined by the user
data: iterable with Column data and context object
tolerance: contains the API request data
Args:
transform_func: User defined python function defined by the user
data: iterable with Column data and context object
tolerance: contains the API request data

Returns:
list of Tdata objects, data transformed after applying split transform
Returns:
list of Tdata objects, data transformed after applying split transform

Raises:
BadRequest: An error occurred when transform_func on execution, does not return a list
Raises:
BadRequest: An error occurred when transform_func on execution, does not return a list
"""
transformed_data = []
for path, value, context in data:
Expand All @@ -158,17 +172,20 @@ def transform_split(transform_func: Callable[[Any, Context], Any], data: Iterabl
return transformed_data


def transform_concatenate(transform_func: Callable[[Any, Context], Any],
data: Iterable[Tuple[ItemIndex, Item, Context]], tolerance: int) -> List:
def transform_concatenate(
transform_func: Callable[[Any, Context], Any],
data: Iterable[Tuple[ItemIndex, Item, Context]],
tolerance: int,
) -> List:
"""Implements concatenate transform, performs concatenate operation over each cell, for a given column

Args:
transform_func: User defined python function defined by the user
data: iterable with Column data and context object
tolerance: contains the API request data
Args:
transform_func: User defined python function defined by the user
data: iterable with Column data and context object
tolerance: contains the API request data

Returns:
list of Tdata objects, data transformed after applying concatenate transform
Returns:
list of Tdata objects, data transformed after applying concatenate transform
"""
transformed_data = []
for path, value, context in data:
Expand All @@ -190,30 +207,30 @@ def transform_concatenate(transform_func: Callable[[Any, Context], Any],
def custom_getitem_guard(obj: Any, index: int) -> Any:
"""Implements __getitem__ restrictedpython policy and wraps _getitem_ function

Args:
obj: object that has __getitem__ implementation in python
index: index of the element that can be accessed from obj
Args:
obj: object that has __getitem__ implementation in python
index: index of the element that can be accessed from obj

Returns:
object element at position index
Returns:
object element at position index
"""
return obj[index]


def compile_function(code: str) -> Callable:
"""Executes code in string in a restricted mode using restrictedpython

Args:
code: object that has __getitem__ implementation in python
Args:
code: object that has __getitem__ implementation in python

Returns:
Callable function that wraps the code as a function body
Returns:
Callable function that wraps the code as a function body

Raises:
BadRequest: An error occurred when the code has compilation error
Raises:
BadRequest: An error occurred when the code has compilation error
"""
loc = {}
safe_globals.update({'_getitem_': custom_getitem_guard})
safe_globals.update({"_getitem_": custom_getitem_guard})
compiled_result = compile_restricted_function("value,context", code, "<function>")

if compiled_result.errors:
Expand All @@ -224,23 +241,26 @@ def compile_function(code: str) -> Callable:
return loc["<function>"]


@transform_bp.route(
f"/{transform_bp.name}/<table_id>/transformations", methods=["POST"]
)
def transform(table_id: int):
table = Table.get_by_id(table_id)
table_rows: List[TableRow] = list(
TableRow.select().where(TableRow.table == table).order_by(TableRow.index)
)

@transformation_bp.route(f"/{transformation_bp.name}/test", methods=["POST"])
def transform():
if isinstance(request.json["datapath"], str):
request.json["datapath"] = [request.json["datapath"]]

request_data = transform_request_deserializer(request.json)
table = Table.get_by_id(request_data.tableId)
table_rows: List[TableRow] = list(
TableRow.select().where(TableRow.table == table).order_by(TableRow.index)
)
transform_func = compile_function(request_data.code)
col_index_list = [table.columns.index(column) for column in request_data.datapath]
data = ((table_row.index, [table_row.row[col_index] for col_index in col_index_list],
Context(index=table_row.index, row=table_row.row)) for table_row in table_rows[:request_data.rows])
data = (
(
table_row.index,
[table_row.row[col_index] for col_index in col_index_list],
Context(index=table_row.index, row=table_row.row),
)
for table_row in table_rows[: request_data.rows]
)

transformed_data = None

Expand All @@ -256,7 +276,9 @@ def transform(table_id: int):
raise BadRequest(
"For transform type map the outputpath should be a single column"
)
transformed_data = transform_filter(transform_func, data, request_data.tolerance)
transformed_data = transform_filter(
transform_func, data, request_data.tolerance
)

elif request_data.type == "split":
if request_data.outputpath is None:
Expand All @@ -266,6 +288,8 @@ def transform(table_id: int):
transformed_data = transform_split(transform_func, data, request_data.tolerance)

elif request_data.type == "concatenate":
transformed_data = transform_concatenate(transform_func, data, request_data.tolerance)
transformed_data = transform_concatenate(
transform_func, data, request_data.tolerance
)

return jsonify(transformed_data)
3 changes: 2 additions & 1 deletion sand/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
from sand.models.project import Project
from sand.models.semantic_model import SemanticModel
from sand.models.table import Table, TableRow, Link, ContextPage
from sand.models.transformation import Transformation

all_tables = [Project, SemanticModel, Table, TableRow]
all_tables = [Project, SemanticModel, Table, TableRow, Transformation]
45 changes: 45 additions & 0 deletions sand/models/transformation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from __future__ import annotations
from typing import List, Union, Literal
from peewee import (
CharField,
ForeignKeyField,
TextField,
IntegerField,
BooleanField,
)
from playhouse.sqlite_ext import JSONField

from sand.models.base import BaseModel
from sand.models.table import Table


class Transformation(BaseModel):
name = CharField()
table = ForeignKeyField(Table, backref="transformations", on_delete="CASCADE")
mode = CharField()
datapath: Union[List[str], str] = JSONField() # type: ignore
outputpath: List[str] = JSONField() # type: ignore
punith300i marked this conversation as resolved.
Show resolved Hide resolved
type = CharField()
code = TextField()
on_error: Literal[
"set_to_blank", "store_error", "keep_original", "abort"
] = CharField()
is_draft = BooleanField()
order = IntegerField()
insert_after = ForeignKeyField("self", null=True, on_delete="SET NULL")

def to_dict(self):
return {
"id": self.id,
"name": self.name,
"table": self.table_id,
"type": self.type,
"mode": self.mode,
"datapath": self.datapath,
"outputpath": self.outputpath,
"code": self.on_error,
"on_error": self.on_error,
"is_draft": self.is_draft,
"order": self.order,
"insert_after": self.insert_after,
}
15 changes: 15 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,21 @@ def example_db(client):
semantic_model_data = json.load(open(semantic_file_path))
client.post("/api/semanticmodel", json=semantic_model_data)

transformation_data = {
"name": "transformation 1",
"table": 1,
"type": "map",
"mode": "restrictedpython",
"datapath": "Name",
"outputpath": ["Random"],
"code": "return value",
"on_error": "abort",
"is_draft": True,
"order": 1,
"order_for": 1,
punith300i marked this conversation as resolved.
Show resolved Hide resolved
}
client.post("/api/transformation", json=transformation_data)

yield None
finally:
for table in all_tables:
Expand Down
Loading