Skip to content

Commit

Permalink
Merge branch 'person/gageorgiev/log-module-leve' of github.com:vmware…
Browse files Browse the repository at this point in the history
…/versatile-data-kit into person/gageorgiev/log-module-leve
  • Loading branch information
gageorgiev committed Nov 29, 2023
2 parents a3fefec + 7f771ff commit f515fc9
Show file tree
Hide file tree
Showing 42 changed files with 1,136 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ ARG GID=1000
# Set the working directory
WORKDIR /job

# Validate base image is python based
RUN python -V
# Create necessary users and set home directory to /job
RUN groupadd -r -g $GID vdkgroup && useradd -u $UID -g $GID -r vdkuser && chown -R $UID:$GID /job
ENV HOME=/job
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.3.4
1.3.5
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ ARG GID=1000
# Set the working directory
WORKDIR /job

# Validate base image is python based
RUN python -V
# Create necessary users and set home directory to /job
RUN groupadd -r -g $GID group && useradd -u $UID -g $GID -r user && chown -R $UID:$GID /job
ENV HOME=/job
Expand Down
2 changes: 1 addition & 1 deletion projects/control-service/projects/job-builder/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.4.0
1.4.1
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
public class JobImageBuilder {
private static final Logger log = LoggerFactory.getLogger(JobImageBuilder.class);

private static final int BUILDER_TIMEOUT_SECONDS = 1800;
private static final String REGISTRY_TYPE_ECR = "ecr";
private static final String REGISTRY_TYPE_GENERIC = "generic";

Expand Down Expand Up @@ -81,6 +80,9 @@ public class JobImageBuilder {
@Value("${datajobs.deployment.builder.serviceAccountName}")
private String builderServiceAccountName;

@Value("${datajobs.deployment.builder.builderTimeoutSeconds:1800}")
private int builderTimeoutSeconds;

private final ControlKubernetesService controlKubernetesService;
private final DockerRegistryService dockerRegistryService;
private final DeploymentNotificationHelper notificationHelper;
Expand Down Expand Up @@ -228,7 +230,7 @@ public boolean buildImage(

var condition =
controlKubernetesService.watchJob(
builderJobName, BUILDER_TIMEOUT_SECONDS, s -> log.debug("Wait status: {}", s));
builderJobName, builderTimeoutSeconds, s -> log.debug("Wait status: {}", s));

log.debug("Finished watching builder job {}. Condition is: {}", builderJobName, condition);
String logs = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ project.ext {
'org.testcontainers:junit-jupiter' : 'org.testcontainers:junit-jupiter:1.19.1',
'org.mock-server:mockserver-netty' : 'org.mock-server:mockserver-netty:5.15.0', //5.11.2
'org.awaitility:awaitility' : 'org.awaitility:awaitility:4.2.0',
'org.apache.commons:commons-lang3' : 'org.apache.commons:commons-lang3:3.13.0',
'org.apache.commons:commons-lang3' : 'org.apache.commons:commons-lang3:3.14.0',
'org.apache.commons:commons-text' : 'org.apache.commons:commons-text:1.11.0',
'com.github.tomakehurst:wiremock' : 'com.github.tomakehurst:wiremock:2.27.2',
'com.graphql-java:graphql-java-extended-scalars' : 'com.graphql-java:graphql-java-extended-scalars:20.2',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
IngesterConfiguration,
)
from vdk.internal.builtin_plugins.ingestion.ingester_utils import AtomicCounter
from vdk.internal.builtin_plugins.ingestion.ingester_utils import DecimalJsonEncoder
from vdk.internal.builtin_plugins.ingestion.ingester_utils import IngesterJsonEncoder
from vdk.internal.core import errors
from vdk.internal.core.errors import ResolvableBy

Expand Down Expand Up @@ -679,7 +679,7 @@ def __verify_payload_format(self, payload_dict: dict):
# Check if payload dict is valid json
# TODO: optimize the check - we should not need to serialize the payload every time
try:
json.dumps(payload_dict, cls=DecimalJsonEncoder)
json.dumps(payload_dict, cls=IngesterJsonEncoder)
except (TypeError, OverflowError, Exception) as e:
errors.report_and_throw(
JsonSerializationIngestionException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,20 @@ def __repr__(self) -> str:
return str(self)


class DecimalJsonEncoder(JSONEncoder):
class IngesterJsonEncoder(JSONEncoder):
"""
This class is used to avoid an issue with the __verify_payload_format serialization check.
Normally, including data of type Decimal would cause that check to fail so we've amended
the default JsonEncoder object used to convert Decimal values to floats to avoid this issue.
Normally, including data of type Decimal and datetime would cause that check to fail so we've amended
the default JsonEncoder object used to convert Decimal and datetime values to floats to avoid this issue.
"""

def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.timestamp()
if isinstance(obj, Decimal):
return float(obj)
if isinstance(obj, bytes):
return list(obj)
return super().default(obj)


Expand Down
6 changes: 2 additions & 4 deletions projects/vdk-core/src/vdk/internal/core/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,7 @@ def report(error_type: ResolvableBy, exception: BaseException):


def report_and_throw(
exception: BaseVdkError,
resolvable_by: ResolvableBy = None,
cause: BaseException = None,
exception: BaseVdkError, resolvable_by: ResolvableBy = None
) -> None:
"""
Add exception to resolvable context and then throw it to be handled up the stack.
Expand All @@ -205,7 +203,7 @@ def report_and_throw(
exception,
)
)
raise exception from cause
raise exception


def report_and_rethrow(error_type: ResolvableBy, exception: BaseException) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ def run(job_input: IJobInput):
payload = payload_str
if payload_str == "None":
payload = None
elif payload_str == "date":
payload = {"key1": datetime.utcnow()}
elif payload_str == "unserializable":
payload = {"key1": log}

job_input.send_object_for_ingestion(
payload=payload, destination_table="object_table", method="memory"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_payload_verification_unserializable():
"run",
jobs_path_from_caller_directory("test-ingest-bad-payload-job"),
"--arguments",
'{"payload": "date"}',
'{"payload": "unserializable"}',
]
)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,24 +1,54 @@
# Copyright 2021-2023 VMware, Inc.
# SPDX-License-Identifier: Apache-2.0
import datetime
import json
from decimal import Decimal

from pytest import raises
from vdk.internal.builtin_plugins.ingestion.ingester_utils import DecimalJsonEncoder
from vdk.internal.builtin_plugins.ingestion.ingester_utils import IngesterJsonEncoder


def test_decimal_json_encoder():
payload_no_decimal = {"a": 1, "b": 2}
def test_ingester_json_encoder():
payload_no_specials = {"a": 1, "b": 2}
payload_with_decimal = {"a": Decimal(1), "b": Decimal(2)}
payload_with_datetime = {
"a": datetime.datetime.fromtimestamp(1700641925),
"b": datetime.datetime.fromtimestamp(1700641925),
}
payload_with_bytes = {
"a": b"enoded string bla bla",
"b": b"another encoded string, look at me, I'm so special",
}

assert json.dumps(payload_no_decimal) == '{"a": 1, "b": 2}'
assert json.dumps(payload_no_specials) == '{"a": 1, "b": 2}'

with raises(TypeError):
json.dumps(payload_with_decimal)

assert json.dumps(payload_no_decimal, cls=DecimalJsonEncoder) == '{"a": 1, "b": 2}'
with raises(TypeError):
json.dumps(payload_with_datetime)

with raises(TypeError):
json.dumps(payload_with_bytes)

assert (
json.dumps(payload_no_specials, cls=IngesterJsonEncoder) == '{"a": 1, "b": 2}'
)

assert (
json.dumps(payload_with_decimal, cls=DecimalJsonEncoder)
json.dumps(payload_with_decimal, cls=IngesterJsonEncoder)
== '{"a": 1.0, "b": 2.0}'
)

assert (
json.dumps(payload_with_datetime, cls=IngesterJsonEncoder)
== '{"a": 1700641925.0, "b": 1700641925.0}'
)

assert json.dumps(payload_with_bytes, cls=IngesterJsonEncoder) == (
'{"a": [101, 110, 111, 100, 101, 100, 32, 115, 116, 114, 105, 110, 103, 32, '
'98, 108, 97, 32, 98, 108, 97], "b": [97, 110, 111, 116, 104, 101, 114, 32, '
"101, 110, 99, 111, 100, 101, 100, 32, 115, 116, 114, 105, 110, 103, 44, 32, "
"108, 111, 111, 107, 32, 97, 116, 32, 109, 101, 44, 32, 73, 39, 109, 32, 115, "
"111, 32, 115, 112, 101, 99, 105, 97, 108]}"
)
4 changes: 2 additions & 2 deletions projects/vdk-plugins/.plugin-common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@
- "projects/vdk-core/src/**/*"

.build-plugin-dind:
image: docker:23.0.1
image: docker:23.0.6
services:
- docker:23.0.1-dind
- docker:23.0.6-dind
variables:
DOCKER_HOST: tcp://localhost:2375
DOCKER_DRIVER: overlay2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ def handle_memory_error(self, exception, recovery_cursor):
exception,
classname_with_package="impala.error.OperationalError",
exception_message_matcher_regex=".*Memory limit exceeded:.*",
) or errors.exception_matches(
exception,
classname_with_package="impala.error.HiveServer2Error",
exception_message_matcher_regex=".*Memory limit exceeded:.*",
):
# We are going to try to increase the memory limits and see if the query passes
# But we won't do anything if the sql statement itself sets a memory limit
Expand Down
2 changes: 2 additions & 0 deletions projects/vdk-plugins/vdk-kerberos-auth/.plugin-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
- export DEBIAN_FRONTEND=noninteractive
- apt-get update
- apt-get install -y krb5-user
# the line below needs to be removed when cryptolib 1.3.1 or 1.4.0 is released https://community.snowflake.com/s/article/Python-Connector-fails-to-connect-with-LibraryNotFoundError-Error-detecting-the-version-of-libcrypto
- pip install https://github.com/wbond/oscrypto/archive/d5f3437ed24257895ae1edd9e503cfb352e635a8.zip
- !reference [.build-plugin, script]
extends: .build-plugin-dind

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ def setUp(self) -> None:
def test_no_authentication(self):
with mock.patch.dict(
os.environ,
{"VDK_KRB_AUTH_FAIL_FAST": "true"},
{
"VDK_KRB_AUTH_FAIL_FAST": "true",
"VDK_LOG_EXECUTION_RESULT": "True",
},
):
result: Result = self.__runner.invoke(
["run", jobs_path_from_caller_directory("test-job")]
Expand Down Expand Up @@ -67,6 +70,7 @@ def test_kinit_authentication(self):
"VDK_KRB_AUTH": "kinit",
"VDK_KRB_AUTH_FAIL_FAST": "true",
"VDK_KRB5_CONF_FILENAME": krb5_conf_filename,
"VDK_LOG_EXECUTION_RESULT": "True",
},
):
result: Result = self.__runner.invoke(
Expand Down Expand Up @@ -211,6 +215,7 @@ def test_minikerberos_authentication(self):
"VDK_KEYTAB_REALM": "EXAMPLE.COM",
"VDK_KERBEROS_KDC_HOST": "localhost",
"VDK_KRB5_CONF_FILENAME": krb5_conf_filename,
"VDK_LOG_EXECUTION_RESULT": "True",
},
):
result: Result = self.__runner.invoke(
Expand Down
22 changes: 22 additions & 0 deletions projects/vdk-plugins/vdk-oracle/.plugin-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright 2021-2023 VMware, Inc.
# SPDX-License-Identifier: Apache-2.0

image: "python:3.7"

.build-vdk-oracle:
variables:
PLUGIN_NAME: vdk-oracle
extends: .build-plugin-dind

build-py37-vdk-oracle:
extends: .build-vdk-oracle
image: "python:3.7"

build-py311-vdk-oracle:
extends: .build-vdk-oracle
image: "python:3.11"

release-vdk-oracle:
variables:
PLUGIN_NAME: vdk-oracle
extends: .release-plugin
111 changes: 111 additions & 0 deletions projects/vdk-plugins/vdk-oracle/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# oracle

Support for VDK Managed Oracle connection

TODO: what the project is about, what is its purpose


## Usage

```
pip install vdk-oracle
```

### Configuration

(`vdk config-help` is useful command to browse all config options of your installation of vdk)

| Name | Description | (example) Value |
|--------------------------|--------------------------------------------------|----------------------|
| oracle_user | Username used when connecting to Oracle database | "my_user" |
| oracle_password | Password used when connecting to Oracle database | "super_secret_shhhh" |
| oracle_connection_string | The Oracle connection string | "localhost/free" |

### Example

#### Ingestion

```python
import datetime
from decimal import Decimal

def run(job_input):

# Ingest object
payload_with_types = {
"id": 5,
"str_data": "string",
"int_data": 12,
"float_data": 1.2,
"bool_data": True,
"timestamp_data": datetime.datetime.fromtimestamp(1700554373),
"decimal_data": Decimal(0.1),
}

job_input.send_object_for_ingestion(
payload=payload_with_types, destination_table="test_table"
)

# Ingest tabular data
col_names = [
"id",
"str_data",
"int_data",
"float_data",
"bool_data",
"timestamp_data",
"decimal_data",
]
row_data = [
[
0,
"string",
12,
1.2,
True,
datetime.datetime.fromtimestamp(1700554373),
Decimal(1.1),
],
[
1,
"string",
12,
1.2,
True,
datetime.datetime.fromtimestamp(1700554373),
Decimal(1.1),
],
[
2,
"string",
12,
1.2,
True,
datetime.datetime.fromtimestamp(1700554373),
Decimal(1.1),
],
]
job_input.send_tabular_data_for_ingestion(
rows=row_data, column_names=col_names, destination_table="test_table"
)
```
### Build and testing

```
pip install -r requirements.txt
pip install -e .
pytest
```

In VDK repo [../build-plugin.sh](https://github.com/vmware/versatile-data-kit/tree/main/projects/vdk-plugins/build-plugin.sh) script can be used also.


#### Note about the CICD:

.plugin-ci.yaml is needed only for plugins part of [Versatile Data Kit Plugin repo](https://github.com/vmware/versatile-data-kit/tree/main/projects/vdk-plugins).

The CI/CD is separated in two stages, a build stage and a release stage.
The build stage is made up of a few jobs, all which inherit from the same
job configuration and only differ in the Python version they use (3.7, 3.8, 3.9 and 3.10).
They run according to rules, which are ordered in a way such that changes to a
plugin's directory trigger the plugin CI, but changes to a different plugin does not.
Loading

0 comments on commit f515fc9

Please sign in to comment.