Skip to content

Commit

Permalink
vdk-notebook: handle job with mixed .ipynb, .py, .sql files use-case (#…
Browse files Browse the repository at this point in the history
…2279)

What:
Added string function to the stepBuilder, which is being called when a
new notebook step is added to the job.
This ensures that the steps are executed alphanumerically.

Why:
currently, when we execute "vdk run" firstly all the python and sql
files are executed and then the notebook files (which is not expected
since we expect vdk run to run files alphanumerically)

Signed-off-by: Duygu Hasan [hduygu@vmware.com](mailto:hduygu@vmware.com)
  • Loading branch information
duyguHsnHsn committed Jun 21, 2023
1 parent 9a95e14 commit 48ae6b8
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def register_notebook_steps(file_path: Path, context: JobContext):
)
notebook_steps.append(step)
context.step_builder.add_step(step)

context.step_builder._StepBuilder__steps.sort(key=lambda step: step.name)
log.debug(f"{len(notebook_steps)} " f"cells with vdk tag were detected!")
except json.JSONDecodeError as e:
errors.log_and_rethrow(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DROP TABLE IF EXISTS rest_target_table;
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "826a105f-1874-4251-8abd-75fb898ba71c",
"metadata": {
"pycharm": {
"name": "#%%\n"
},
"tags": [
"vdk"
]
},
"outputs": [],
"source": [
"job_input.execute_query(\"CREATE TABLE rest_target_table (userId, id, title, completed);\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2021-2023 VMware, Inc.
# SPDX-License-Identifier: Apache-2.0
import requests


def run(job_input):
response = requests.get("https://jsonplaceholder.typicode.com/todos/1")
response.raise_for_status()
payload = response.json()

job_input.send_object_for_ingestion(
payload=payload, destination_table="rest_target_table"
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
; Supported format: https://docs.python.org/3/library/configparser.html#supported-ini-file-structure

; This is the only file required to deploy a Data Job.
; Read more to understand what each option means:

; Information about the owner of the Data Job
[owner]

; Team is a way to group Data Jobs that belonged to the same team.
team = jupyter-test-jobs

[vdk]
; Key value pairs of any configuration options that can be passed to vdk.
; For possible options in your vdk installation execute command vdk config-help
db_default_type=SQLITE
ingest_method_default=SQLITE
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Python jobs can specify extra library dependencies in requirements.txt file.
# See https://pip.readthedocs.io/en/stable/user_guide/#requirements-files
# The file is optional and can be deleted if no extra library dependencies are necessary.

requests
14 changes: 14 additions & 0 deletions projects/vdk-plugins/vdk-notebook/tests/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,17 @@ def test_failing_job_with_sql_error(self) -> None:
["run", jobs_path_from_caller_directory("rest-api-job-sql-error")]
)
cli_assert_equal(1, result)

def test_mixed_job_with_py_and_sql(self) -> None:
result: Result = self.__runner.invoke(
["run", jobs_path_from_caller_directory("mixed-rest-api")]
)
cli_assert_equal(0, result)
actual_rs: Result = self.__runner.invoke(
["sqlite-query", "--query", "SELECT * FROM rest_target_table"]
)
assert actual_rs.stdout == (
" userId id title completed\n"
"-------- ---- ------------------ -----------\n"
" 1 1 delectus aut autem 0\n"
)

0 comments on commit 48ae6b8

Please sign in to comment.