## HuggingFace Model Conversion to ONNX and some adjustments to the model

In [None]:
# Edit manConfig.json file FIRST to set variables BEFORE running any commands

## Model preparation (fixing dynamic dims, fixing versions for compatibility, etc..)

In [1]:
# --- 0) Imports ---
import os, sys, json, glob, subprocess
import getpass

import onnx
import onnxruntime as rt
from onnxruntime.tools.onnx_model_utils import make_dim_param_fixed

import teradataml as tdml

In [2]:
# --- 1) Load config ---
cfg_path = "./manConfig.json"
with open(cfg_path, "r") as f:
    cfg = json.load(f)

db = cfg["db"]
log = cfg["log"]
model_cfg = cfg["model"]

td_host = db["hostName"]
u_name  = db["userName"]
db_name = db["dbName"]
db_logmech = db.get("logmech", "LDAP")           # LDAP by default
table_prefix = db.get("tablePrefix", "misc")
model_table  = db.get("modelTable", "embeddings_models")
tok_table    = db.get("tokenizerTable", "embeddings_tokenizers")

model_id     = model_cfg["modelNameShort"]
model_name   = model_cfg["hubModelId"]
out_dir      = model_cfg["outputDir"]
opset        = int(model_cfg.get("opset", 16))
trust_remote = bool(model_cfg.get("trustRemoteCode", True))
onnx_files   = model_cfg.get("onnxFiles", "model.onnx")   # string | list | glob(s)
apply_fixes  = bool(model_cfg.get("applyPostFixes", True))
bs = int(model_cfg.get("fixedBatchSize", 1))
sl = int(model_cfg.get("fixedSequenceLength", 512))
ed = int(model_cfg.get("fixedEmbeddingDim", 384))

debug = bool(log.get("debug", False))

In [3]:
# --- 2) Export the model with Optimum CLI ---
cli_cmd = [
    "optimum-cli", "export", "onnx",
    "-m", model_name,
    out_dir,
    "--opset", str(opset)
]
if trust_remote:
    cli_cmd.append("--trust-remote-code")

print(f"[exec] {' '.join(cli_cmd)}")
res = subprocess.run(cli_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
print(res.stdout)
if res.returncode != 0:
    raise RuntimeError("Optimum export failed. See logs above.")
# (Optional) sanity check for success message:
if "exported model was saved at" not in res.stdout.lower():
    print("[warn] Success message not detected, but process returned 0. Proceeding...")

[exec] optimum-cli export onnx -m BAAI/bge-small-en-v1.5 bge-small-en-v1.5-onnx --opset 16 --trust-remote-code
Opset 16 is lower than the recommended minimum opset (18) to export transformer. The ONNX export may fail or the exported model may be suboptimal.
  inverted_mask = torch.tensor(1.0, dtype=dtype) - expanded_mask

[warn] Success message not detected, but process returned 0. Proceeding...


In [4]:
# --- 3) Resolve ONNX files to process ---
def resolve_onnx_paths(output_dir, spec):
    paths = []
    candidates = [spec] if isinstance(spec, str) else (spec if isinstance(spec, list) else ["model.onnx"])
    for c in candidates:
        if any(ch in c for ch in ["*", "?"]):  # glob pattern
            matches = glob.glob(os.path.join(output_dir, c))
            if not matches:
                print(f"[warn] glob '{c}' did not match any files under {output_dir}")
            paths.extend(matches)
        else:
            p = os.path.join(output_dir, c)
            if not os.path.isfile(p):
                print(f"[warn] file '{c}' not found under {output_dir}")
            else:
                paths.append(p)
    # de-dupe
    dedup = []
    seen = set()
    for p in paths:
        if p not in seen:
            dedup.append(p)
            seen.add(p)
    if not dedup:
        raise FileNotFoundError("No ONNX files resolved. Check 'onnxFiles' or export output.")
    print("[info] ONNX files to process:")
    for p in dedup: print(f"  - {p}")
    return dedup

onnx_paths = resolve_onnx_paths(out_dir, onnx_files)

[info] ONNX files to process:
  - bge-small-en-v1.5-onnx\model.onnx


In [5]:

# --- 4) Apply IR/opset + dimension fixes, remove token_embeddings ---
fixed_paths = []

for onnx_path in onnx_paths:
    base = os.path.basename(onnx_path)
    fixed_path = os.path.join(out_dir, f"fixed_{base}")

    print(f"[info] Loading ONNX: {onnx_path}")
    raw_model = onnx.load(onnx_path)

    # Set desired opset
    op = onnx.OperatorSetIdProto()
    op.version = opset

    # Align IR/opset explicitly
    model_ir = onnx.helper.make_model(
        raw_model.graph,
        ir_version=8,
        opset_imports=[op]
    )

    # Lock dynamic dims (if symbols present)
    print(f"[info] Fixing dims for '{base}': batch_size={bs}, sequence_length={sl}, embedding_dim={ed}")
    for sym, val in [
        ("batch_size", bs),
        ("sequence_length", sl),
        ("Divsentence_embedding_dim_1", ed),
        ("sentence_embedding_dim_1", ed),
        ("embedding_dim_1", ed),
    ]:
        try:
            make_dim_param_fixed(model_ir.graph, sym, val)
        except Exception:
            # Symbol may not exist in this graph; ignore gracefully.
            pass

    # Remove token-level outputs safely (reverse iteration)
    removed = False
    outputs = model_ir.graph.output
    for i in reversed(range(len(outputs))):
        if outputs[i].name == "token_embeddings":
            del outputs[i]
            removed = True

    if removed:
        print("[info] Removed 'token_embeddings' from outputs.")
    else:
        print("[info] No 'token_embeddings' output found; nothing to remove.")

    # Optional: print the remaining outputs for sanity
    out_names = [o.name for o in model_ir.graph.output]
    print(f"[info] Remaining outputs: {out_names}")

    print(f"[info] Saving fixed ONNX: {fixed_path}")
    onnx.save(model_ir, fixed_path)
    fixed_paths.append(fixed_path)
    

[info] Loading ONNX: bge-small-en-v1.5-onnx\model.onnx
[info] Fixing dims for 'model.onnx': batch_size=1, sequence_length=512, embedding_dim=384
[info] Removed 'token_embeddings' from outputs.
[info] Remaining outputs: ['sentence_embedding']
[info] Saving fixed ONNX: bge-small-en-v1.5-onnx\fixed_model.onnx


# Model deployment to database

In [6]:
# --- 5) Connect to Teradata ---
u_password = "td01"  # keep your current approach; replace if you want to prompt or read from secrets
if not u_name:
    u_name = input("User Name: ")
if not u_password:
    u_password = getpass.getpass(prompt="Password: ")

print(f"[info] Connecting to Teradata host={td_host}, db={db_name}, logmech={db_logmech}")
tdml.create_context(host=td_host, username=u_name, password=u_password, database=db_name, logmech=db_logmech)

[info] Connecting to Teradata host=192.168.100.20, db=td01, logmech=TD2


Engine(teradatasql://td01:***@192.168.100.20?DATABASE=td01&LOGDATA=%2A%2A%2A&LOGMECH=%2A%2A%2A)

## Deploying the model

## Deploying the tokenizer

In [7]:
# --- 6) Save model(s) & tokenizer to BYOM tables ---
# Model ID uses the HF ID for clarity; feel free to change if you prefer a shorter key.
# model_id = model_name

# Save each fixed ONNX file
for p in fixed_paths:
    print(f"[info] Saving model file to '{model_table}': {p}")
    tdml.save_byom(model_id, p, model_table)

# Save tokenizer
tok_path = os.path.join(out_dir, "tokenizer.json")
if os.path.isfile(tok_path):
    print(f"[info] Saving tokenizer to '{tok_table}': {tok_path}")
    tdml.save_byom(model_id, tok_path, tok_table)
else:
    print(f"[warn] tokenizer.json not found under {out_dir}. If Optimum produced a different tokenizer file, load that instead.")

print("[done] Model(s) and tokenizer deployed to Teradata BYOM tables.")


[info] Saving model file to 'embeddings_models': bge-small-en-v1.5-onnx\fixed_model.onnx
Created the model table 'embeddings_models' as it does not exist.
Model is saved.
[info] Saving tokenizer to 'embeddings_tokenizers': bge-small-en-v1.5-onnx\tokenizer.json
Created the model table 'embeddings_tokenizers' as it does not exist.
Model is saved.
[done] Model(s) and tokenizer deployed to Teradata BYOM tables.


In [8]:
tdml.remove_context()

True