In [1]:
from analytic_schema import parse_input, validate_input, OutputDoc

In [6]:
# Example 1: From CLI-like string (Corrected argument names with hyphens)
raw_cli_params = parse_input('--input-schema-version 1.0.0 '
                             '--start-dtg 2025-06-01T00:00:00Z '
                             '--end-dtg 2025-06-02T00:00:00Z '
                             '--data-source-type file '
                             '--data-source /tmp/conn.csv')
print(raw_cli_params)

{'input_schema_version': '1.0.0', 'start_dtg': '2025-06-01T00:00:00Z', 'end_dtg': '2025-06-02T00:00:00Z', 'data_source_type': 'file', 'data_source': '/tmp/conn.csv'}


In [7]:
params_from_cli = validate_input(raw_cli_params)
print(params_from_cli)

{'input_schema_version': '1.0.0', 'start_dtg': '2025-06-01T00:00:00Z', 'end_dtg': '2025-06-02T00:00:00Z', 'data_source_type': 'file', 'data_source': '/tmp/conn.csv'}


In [8]:
# Example 2: From a dictionary
raw_dict_params = {
    "input_schema_version": "1.0.0",
    "start_dtg": "2025-06-01T00:00:00Z",
    "end_dtg": "2025-06-02T00:00:00Z",
    "data_source_type": "file",
    "data_source": "/tmp/conn.csv",
    "analytic_parameters": '{"param_a": 123}' # As JSON string
}
raw_dict_params = parse_input(raw_dict_params)
print(raw_dict_params)

{'input_schema_version': '1.0.0', 'start_dtg': '2025-06-01T00:00:00Z', 'end_dtg': '2025-06-02T00:00:00Z', 'data_source_type': 'file', 'data_source': '/tmp/conn.csv', 'analytic_parameters': '{"param_a": 123}'}


In [9]:
params_from_dict = validate_input(raw_dict_params)
print(params_from_dict)
# params_from_dict['analytic_parameters'] would be {'param_a': 123}

{'input_schema_version': '1.0.0', 'start_dtg': '2025-06-01T00:00:00Z', 'end_dtg': '2025-06-02T00:00:00Z', 'data_source_type': 'file', 'data_source': '/tmp/conn.csv', 'analytic_parameters': {'param_a': 123}}


In [10]:
# Example 3: Using --config (imagine 'my_config.json' exists)
# Contents of 'my_config.json':
# {
#   "input_schema_version": "1.0.0",
#   "start_dtg": "2025-07-01T00:00:00Z",
#   "end_dtg": "2025-07-02T00:00:00Z",
#   "data_source_type": "api endpoint",
#   "data_source": "https://api.example.com/data"
# }
# raw_params_with_config = parse_input(['--config', 'my_config.json', '--start-dtg', '2000-01-01T00:00:00Z'])
# params_from_config_file = validate_input(raw_params_with_config)
# params_from_config_file would use values from 'my_config.json', start_dtg from CLI would be overridden.

In [None]:
raw_data_sha256 = "e3b0c4...55"  # Example hash
out = OutputDoc(
    input_schema_version=params_from_dict['input_schema_version'],
    output_schema_version='1.1.0',
    analytic_id='notebooks/beacon_detection.ipynb',
    analytic_name='Beacon Detection',
    analytic_version='2.3.1',
    inputs=params_from_dict, # Pass the validated and canonicalized inputs
    input_data_hash=raw_data_sha256,
    status='success',
    exit_code=0,
    findings=[],
    records_processed=0
)
print(out)

In [12]:
out.finalise()

None
