# SafeVinci Fine-tuning Notebook (H100)

This notebook helps you:
- Verify GPU availability and environment
- Prepare chat-format datasets
- Run baseline inference with the base OmniVinci model
- Fine-tune with QLoRA (BF16 on H100) using TRL/PEFT (optionally DeepSpeed)
- Visualize training
- Evaluate the fine-tuned adapter vs. baseline

Prereqs:
- Run `scripts/setup_h100.sh` once (or equivalent manual steps)
- Base model present at `models/omnivinci/`
- Dataset under `src/data/` with `train.jsonl`, `test.jsonl` and `annotations/`


In [None]:
# GPU / environment checks
import os, sys, json, subprocess, torch
print("Python:", sys.version)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA device count:", torch.cuda.device_count())
    print("GPU name:", torch.cuda.get_device_name(0))
    print("BF16 supported:", torch.cuda.is_bf16_supported())
    print("Total mem (GB):", round(torch.cuda.get_device_properties(0).total_memory/1e9, 2))
    !nvidia-smi || true
else:
    print("No GPU detected.")


In [None]:
# Paths and basic config
from pathlib import Path
PROJECT_ROOT = Path(".").resolve()
MODEL_PATH = PROJECT_ROOT / "models" / "omnivinci"
TRAIN_CHAT = PROJECT_ROOT / "src" / "data" / "train_chat.jsonl"
TEST_CHAT = PROJECT_ROOT / "src" / "data" / "test_chat.jsonl"
OUTPUT_DIR = PROJECT_ROOT / "outputs" / "sft-omnivinci"
DEEPSPEED_CFG = PROJECT_ROOT / "deepspeed_config.json"
print("PROJECT_ROOT:", PROJECT_ROOT)
print("MODEL_PATH:", MODEL_PATH)
print("TRAIN_CHAT exists:", TRAIN_CHAT.exists())
print("TEST_CHAT exists:", TEST_CHAT.exists())
print("OUTPUT_DIR:", OUTPUT_DIR)
print("DEEPSPEED_CFG exists:", DEEPSPEED_CFG.exists())


In [None]:
# Optional: build chat datasets (safe to rerun)
import os
os.environ["PYTHONPATH"] = str(PROJECT_ROOT)
!python -m src.data_utils.build_sft_dataset


In [None]:
# Baseline inference with base model (schema-JSON)
import json
from transformers import AutoProcessor, AutoModelForCausalLM
import torch

prompt = (
    "Analyze this construction safety video and return strict JSON with keys "
    "incident_type, safety_status, probability, safety_response, action_plan. Reply with JSON only."
)

video_path = None
# pick first test sample video
with open(TEST_CHAT, 'r') as f:
    line = f.readline()
    if line:
        item = json.loads(line)
        video_path = item["conversation"][0]["content"][0]["video"]

assert video_path is not None, "No test sample found."

dtype = torch.bfloat16 if (torch.cuda.is_available() and torch.cuda.is_bf16_supported()) else torch.float16
model = AutoModelForCausalLM.from_pretrained(str(MODEL_PATH), trust_remote_code=True, torch_dtype=dtype, device_map="auto")
processor = AutoProcessor.from_pretrained(str(MODEL_PATH), trust_remote_code=True)
model.config.num_video_frames = 64
processor.config.num_video_frames = 64
model.config.audio_chunk_length = "max_3600"
processor.config.audio_chunk_length = "max_3600"

conversation = [{
    "role": "user",
    "content": [
        {"type": "video", "video": video_path},
        {"type": "text", "text": prompt}
    ]
}]
text = processor.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
inputs = processor([text])
out_ids = model.generate(
    input_ids=inputs.input_ids,
    media=getattr(inputs, 'media', None),
    media_config=getattr(inputs, 'media_config', None),
    max_new_tokens=256,
    do_sample=False,
)
print(processor.tokenizer.batch_decode(out_ids, skip_special_tokens=True)[0])


In [None]:
# Fine-tune with QLoRA (BF16 on H100) - single GPU
import os
os.environ["PYTHONPATH"] = str(PROJECT_ROOT)
!torchrun --standalone --nproc_per_node=1 -m src.train.train_sft --deepspeed {DEEPSPEED_CFG}


In [None]:
# Visualize training logs (if any)
from pathlib import Path
log_dir = OUTPUT_DIR
print("Output dir:", log_dir)
# List saved files
for p in sorted(Path(log_dir).glob("**/*"))[:50]:
    print(p)


In [None]:
# Evaluate fine-tuned adapter vs. baseline
import json, os
os.environ["PYTHONPATH"] = str(PROJECT_ROOT)

# Evaluate base
print("Evaluating base model...")
!python -m src.eval.infer --model_path {MODEL_PATH} --test_path {TEST_CHAT}

# Evaluate adapter
print("Evaluating fine-tuned adapter...")
!python -m src.eval.infer --model_path {OUTPUT_DIR} --base_model_path {MODEL_PATH} --test_path {TEST_CHAT}


In [None]:
# Load and preview eval predictions JSON
import json
from pathlib import Path
preds_path = Path("outputs/eval_preds.json")
if preds_path.exists():
    data = json.load(open(preds_path))
    print("Num predictions:", len(data))
    print(json.dumps(data[:3], indent=2))
else:
    print("Predictions file not found:", preds_path)
