Skip to content
This repository has been archived by the owner on Nov 8, 2021. It is now read-only.

Commit

Permalink
Bump dependencies and add developer mode (#26)
Browse files Browse the repository at this point in the history
* Bump deps

* Add developer mode to boost performance in production

* Linting

* Add developer mode to unit test

* Fix devmode and add new test

* Fix docs

* Rebump deps
  • Loading branch information
villebro committed Feb 8, 2020
1 parent 1cb3201 commit 1528e9c
Show file tree
Hide file tree
Showing 8 changed files with 88 additions and 49 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,11 @@ pip install sqltask[snowflake]

Please refer to the [documentation](https://sqltask.readthedocs.io/en/latest/)
on Read The Docs for further information.

## Developer instructions

By default, `sqltask` performs minimal validity checking of column values and
types to ensure maximum performance. However, in developer mode,`sqltask` does
additional type checking and ensuring that column values are populated in accordance
with schema specifications. This can be very helpful while developing new tasks.
To enable these these checks, set the environment variable `SQLTASK_DEVELOPER_MODE=1`.
41 changes: 20 additions & 21 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,56 +5,55 @@
# pip-compile --output-file=requirements-dev.txt requirements-dev.in setup.py
#
alabaster==0.7.12 # via sphinx
babel==2.7.0 # via sphinx
babel==2.8.0 # via sphinx
certifi==2019.11.28 # via requests
chardet==3.0.4
click==7.0 # via pip-tools
codecov==2.0.15
coverage==4.5.4 # via codecov
docutils==0.15.2 # via sphinx
coverage==5.0.3 # via codecov
docutils==0.16 # via sphinx
entrypoints==0.3 # via flake8
filelock==3.0.12 # via tox
flake8==3.7.9
idna==2.8 # via requests
imagesize==1.1.0 # via sphinx
importlib-metadata==1.3.0 # via pluggy, tox
imagesize==1.2.0 # via sphinx
importlib-metadata==1.5.0 # via pluggy, tox
isort==4.3.21
jinja2==2.10.3 # via sphinx
jinja2==2.11.1 # via sphinx
markupsafe==1.1.1 # via jinja2
mccabe==0.6.1 # via flake8
more-itertools==8.0.2 # via zipp
mypy-extensions==0.4.3 # via mypy
mypy==0.750
mypy==0.761
nose==1.3.7
packaging==19.2 # via sphinx, tox
pip-tools==4.3.0
packaging==20.1 # via sphinx, tox
pip-tools==4.4.1
pluggy==0.13.1 # via tox
py==1.8.0 # via tox
py==1.8.1 # via tox
pycodestyle==2.5.0 # via flake8
pyflakes==2.1.1 # via flake8
pygments==2.5.2 # via sphinx
pyparsing==2.4.5 # via packaging
pyparsing==2.4.6 # via packaging
pytz==2019.3 # via babel
requests==2.22.0 # via codecov, sphinx
six==1.13.0 # via packaging, pip-tools, tox
six==1.14.0 # via packaging, pip-tools, tox
snowballstemmer==2.0.0 # via sphinx
sphinx-autodoc-typehints==1.10.3
sphinx-rtd-theme==0.4.3
sphinx==2.2.2
sphinx==2.3.1
sphinxcontrib-applehelp==1.0.1 # via sphinx
sphinxcontrib-devhelp==1.0.1 # via sphinx
sphinxcontrib-htmlhelp==1.0.2 # via sphinx
sphinxcontrib-jsmath==1.0.1 # via sphinx
sphinxcontrib-qthelp==1.0.2 # via sphinx
sphinxcontrib-serializinghtml==1.1.3 # via sphinx
sqlalchemy==1.3.11
sqlalchemy==1.3.13
toml==0.10.0 # via tox
tox==3.14.2
typed-ast==1.4.0 # via mypy
tox==3.14.3
typed-ast==1.4.1 # via mypy
typing-extensions==3.7.4.1 # via mypy
urllib3==1.25.7 # via requests
virtualenv==16.7.8 # via tox
zipp==0.6.0 # via importlib-metadata
urllib3==1.25.8 # via requests
virtualenv==16.7.9 # via tox
zipp==2.2.0 # via importlib-metadata

# The following packages are considered to be unsafe in a requirements file:
# setuptools==42.0.2 # via sphinx
# setuptools==45.1.0 # via sphinx
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
# pip-compile --output-file=requirements.txt setup.py
#
chardet==3.0.4
sqlalchemy==1.3.11
sqlalchemy==1.3.13
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name="sqltask",
version="0.6.4.2",
version="0.6.4.3",
description="ETL tool based on SqlAlchemy for building robust ETL pipelies with "
"high emphasis on high data quality",
long_description=long_description,
Expand Down
17 changes: 9 additions & 8 deletions sqltask/base/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from sqltask.base import dq
from sqltask.base.row_source import BaseRowSource
from sqltask.utils.performance import is_developer_mode

if TYPE_CHECKING:
from sqltask.base.engine import EngineContext
Expand Down Expand Up @@ -324,7 +325,7 @@ def __init__(self, table_context: BaseTableContext):

def __setitem__(self, key, value):
# validate column value if table schema defined
if self.table_context.columns is not None:
if is_developer_mode() and self.table_context.columns is not None:
target_column = self.table_context.columns.get(key)
if target_column is None:
raise KeyError(f"Column not found in target schema: {key}")
Expand Down Expand Up @@ -387,13 +388,13 @@ def append(self) -> None:
should only be called once all cell values for the row have been fully populated,
as any changes.
"""

output_row = {}
for column in self.table_context.columns.values():
if column.name not in self:
raise Exception(f"No column `{column.name}` in output row for table "
f"`{self.table_context.name}`")
output_row[column.name] = self[column.name]
output_row: Dict[str, Any] = {}
if is_developer_mode():
for column in self.table_context.columns.values():
if column.name not in self:
raise Exception(f"No column `{column.name}` in output row for table "
f"`{self.table_context.name}`")
output_row.update(self)
self.table_context.output_rows.append(output_row)


Expand Down
3 changes: 3 additions & 0 deletions sqltask/engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from sqltask.base.common import UrlParams
from sqltask.base.table import BaseTableContext
from sqltask.utils.engine_specs import get_escaped_string_value
from sqltask.utils.performance import is_developer_mode

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -255,6 +256,8 @@ def validate_column_value(cls, value: Any, column: Column) -> None:
:param value: value to insert into a column of a database table
:param column: The target column
"""
if not is_developer_mode():
return
global VALID_COLUMN_TYPES
name = column.name
valid_types = VALID_COLUMN_TYPES.get(type(column.type))
Expand Down
10 changes: 10 additions & 0 deletions sqltask/utils/performance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import os


def is_developer_mode() -> bool:
"""
Check if developer mode is activated.
:return: True if developer mode is active, otherwise False
"""
return False if os.getenv("SQLTASK_DEVELOPER_MODE") is None else True
54 changes: 36 additions & 18 deletions tests/utils/test_engine_specs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from datetime import date, datetime
from decimal import Decimal
from test.support import EnvironmentVarGuard
from unittest import TestCase

from sqlalchemy.schema import Column
Expand All @@ -19,26 +20,43 @@ def test_csv_export(self):
file_path = create_tmp_csv(table_context)
os.remove(f"{file_path}")

def test_validate_column_types(self):
def test_validate_column_types_devmode(self):
# when in devmode, strict type checking is enforced
with EnvironmentVarGuard() as env:
env["SQLTASK_DEVELOPER_MODE"] = "1"
validate = BaseEngineSpec.validate_column_value
str10_column = Column("str10_col", String(10), nullable=False)
str_column = Column("str_col", String, nullable=False)
int_column = Column("int_col", Integer())
float_column = Column("float_col", Float(), nullable=False)
date_column = Column("float_col", Date(), nullable=False)
datetime_column = Column("float_col", DateTime(), nullable=False)
self.assertIsNone(validate(date(2019, 12, 31), date_column))
self.assertIsNone(validate(date(2019, 12, 31), datetime_column))
self.assertIsNone(validate("abc", str10_column))
self.assertIsNone(validate("1234567890", str10_column))
self.assertIsNone(validate("123456789012345", str_column))
self.assertIsNone(validate(Decimal("1234.567"), float_column))
self.assertIsNone(validate(1.1, float_column))
self.assertIsNone(validate(1, float_column))
self.assertIsNone(validate(1, int_column))
self.assertIsNone(validate(None, int_column))
self.assertRaises(ValueError, validate, datetime.utcnow(), date_column)
self.assertRaises(ValueError, validate, None, str_column)
self.assertRaises(ValueError, validate, "12345678901", str10_column)
self.assertRaises(ValueError, validate, 12345, str_column)
self.assertRaises(ValueError, validate, 12345.5, int_column)

def test_validate_column_types_non_devmode(self):
# when not in devmode, feeding incorrect types into columns should
# not raise exceptions
validate = BaseEngineSpec.validate_column_value
str10_column = Column("str10_col", String(10), nullable=False)
str_column = Column("str_col", String, nullable=False)
int_column = Column("int_col", Integer())
float_column = Column("float_col", Float(), nullable=False)
date_column = Column("float_col", Date(), nullable=False)
datetime_column = Column("float_col", DateTime(), nullable=False)
self.assertIsNone(validate(date(2019, 12, 31), date_column))
self.assertIsNone(validate(date(2019, 12, 31), datetime_column))
self.assertIsNone(validate("abc", str10_column))
self.assertIsNone(validate("1234567890", str10_column))
self.assertIsNone(validate("123456789012345", str_column))
self.assertIsNone(validate(Decimal("1234.567"), float_column))
self.assertIsNone(validate(1.1, float_column))
self.assertIsNone(validate(1, float_column))
self.assertIsNone(validate(1, int_column))
self.assertIsNone(validate(None, int_column))
self.assertRaises(ValueError, validate, datetime.utcnow(), date_column)
self.assertRaises(ValueError, validate, None, str_column)
self.assertRaises(ValueError, validate, "12345678901", str10_column)
self.assertRaises(ValueError, validate, 12345, str_column)
self.assertRaises(ValueError, validate, 12345.5, int_column)
self.assertIsNone(validate(datetime.utcnow(), date_column))
self.assertIsNone(validate(None, str_column))
self.assertIsNone(validate("12345678901", str10_column))
self.assertIsNone(validate(12345, str_column))
self.assertIsNone(validate(12345.5, int_column))

0 comments on commit 1528e9c

Please sign in to comment.