In [4]:
import os
from huggingface_hub import login, whoami

# Use an env var if you have set one, else you will be prompted in the notebook.
token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
if token:
    login(token=token, add_to_git_credential=True)
else:
    print("No HF token in env. You will be prompted. Create one at https://huggingface.co/settings/tokens")
    login(add_to_git_credential=True)

print("Authenticated as:", whoami().get("name") or whoami().get("email") or "unknown")

No HF token in env. You will be prompted. Create one at https://huggingface.co/settings/tokens


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Authenticated as: chinu-codes


In [5]:
# Step 1.1 — Create a new public HF model repo for your adapters
from huggingface_hub import whoami, create_repo
import os, pathlib

# Config — change the name if you prefer
REPO_NAME   = "safe-prompt-llama-3_2-3b-lora"   # e.g. "safe-prompt-v1"
ADAPTER_DIR = "./outputs/safe-prompt-3b-lora"   # where your adapters were saved

# Sanity checks
assert pathlib.Path(ADAPTER_DIR).exists(), f"Adapter dir not found: {ADAPTER_DIR}"
print("Adapter directory exists:", ADAPTER_DIR)

# Resolve your namespace
me = whoami()
namespace = me.get("name") or (me.get("orgs") or [None])[0]
assert namespace, "Could not resolve your HF namespace. Are you logged in?"

repo_id = f"{namespace}/{REPO_NAME}"
print("Target repo:", repo_id)

# Create (idempotent)
repo_url = create_repo(repo_id=repo_id, repo_type="model", private=False, exist_ok=True)
print("Repo ready:", repo_url)


Adapter directory exists: ./outputs/safe-prompt-3b-lora
Target repo: chinu-codes/safe-prompt-llama-3_2-3b-lora
Repo ready: https://huggingface.co/chinu-codes/safe-prompt-llama-3_2-3b-lora


In [6]:
# Overwrite ./outputs/safe-prompt-3b-lora/handler.py to include pluggable validators
import os, pathlib, textwrap

ADAPTER_DIR = "./outputs/safe-prompt-3b-lora"
pathlib.Path(ADAPTER_DIR).mkdir(parents=True, exist_ok=True)

handler_py = """
import os, re, json, torch
from typing import Any, Dict, List, Tuple
from dataclasses import dataclass
from transformers import AutoTokenizer, BitsAndBytesConfig, pipeline
from peft import AutoPeftModelForCausalLM

SAFE_OPEN  = "<safe>"
SAFE_CLOSE = "</safe>"

SYSTEM = (
    "You redact personal or secret information from user text. "
    "Return the SAME text but replace only the sensitive VALUES with placeholders. "
    "Do not change surrounding words like 'IMEI', 'Email', 'Phone', or punctuation. "
    "Allowed placeholders include dataset-style tags like [EMAIL], [PHONEIMEI], [FIRSTNAME], etc. "
    "Output ONLY the redacted text between <safe> and </safe>. No other text."
)

# --------------------------
# Pluggable validators (seatbelts)
# --------------------------
@dataclass
class Detector:
    name: str
    pattern: re.Pattern
    placeholder: str

DETECTORS: List[Detector] = [
    Detector("email", re.compile(r"\\b[^\\s@]+@[^\\s@]+\\.[^\\s@]+\\b"), "[EMAIL]"),
    Detector("phone", re.compile(r"\\b(?:\\+?\\d{1,3}[\\s.\\-]?)?(?:\\(?\\d{3}\\)?[\\s.\\-]?)?\\d{3}[\\s.\\-]?\\d{4}\\b"), "[PHONE]"),
    # Add more when needed, e.g.:
    # Detector("ipv4", re.compile(r"\\b(?:(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\b"), "[IP]"),
    # Detector("ssn_us", re.compile(r"\\b\\d{3}-\\d{2}-\\d{4}\\b"), "[SSN]"),
]

def apply_validators(text: str, mode: str = "enforce") -> Tuple[str, List[str]]:
    \"\"\"mode = off | warn | enforce\"\"\"
    hits: List[str] = []
    out = text
    for d in DETECTORS:
        if d.pattern.search(out):
            hits.append(d.name)
            if mode == "enforce":
                out = d.pattern.sub(d.placeholder, out)
    # In 'warn' mode we do not modify output, just record hits
    return out, hits

# --------------------------
# Endpoint handler
# --------------------------
class EndpointHandler:
    def __init__(self, path: str = ""):
        # 'path' is the mounted repo dir (contains adapters + adapter_config.json)
        self.repo_path = path
        self.seq_len   = int(os.getenv("SEQ_LEN", "512"))
        self.max_new   = int(os.getenv("MAX_NEW_TOKENS", "128"))
        self.dtype     = torch.float16
        self.validate_mode = os.getenv("VALIDATE_MODE", "enforce").lower()  # off|warn|enforce

        # Read base model id from adapter_config.json if present
        adapter_cfg = os.path.join(path, "adapter_config.json")
        if os.path.exists(adapter_cfg):
            with open(adapter_cfg, "r", encoding="utf-8") as f:
                cfg = json.load(f)
            self.base_model_id = cfg.get("base_model_name_or_path", "meta-llama/Llama-3.2-3B-Instruct")
        else:
            self.base_model_id = "meta-llama/Llama-3.2-3B-Instruct"

        # Tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(self.base_model_id, use_fast=True)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
        self.tokenizer.padding_side = "right"
        self.tokenizer.model_max_length = self.seq_len

        # Model (try 4-bit, fall back to fp16)
        kwargs = dict(torch_dtype=self.dtype, device_map="auto")
        try:
            bnb_cfg = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_use_double_quant=True,
                bnb_4bit_compute_dtype=self.dtype,
            )
            kwargs["quantization_config"] = bnb_cfg
        except Exception:
            pass

        self.model = AutoPeftModelForCausalLM.from_pretrained(path, **kwargs)
        self.model.config.use_cache = False
        self.model.eval()

        self.gen = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer)

    def _make_prompt(self, text: str) -> str:
        messages = [
            {"role": "system", "content": SYSTEM},
            {"role": "user", "content": text},
            {"role": "assistant", "content": SAFE_OPEN},
        ]
        try:
            return self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        except Exception:
            return f"<|system|>\\n{SYSTEM}\\n<|user|>\\n{text}\\n<|assistant|>\\n{SAFE_OPEN}"

    def _strip_tags(self, s: str) -> str:
        # Remove any accidental nested or echoed tags
        return s.replace(SAFE_OPEN, "").replace(SAFE_CLOSE, "").strip()

    def __call__(self, data: Any) -> str:
        # Accept { "inputs": "..." } or raw str/list[str]
        text = None
        if isinstance(data, dict):
            text = data.get("inputs")
        elif isinstance(data, str):
            text = data
        elif isinstance(data, list) and data and isinstance(data[0], str):
            text = data[0]

        if not text:
            return f"{SAFE_OPEN}{SAFE_CLOSE}"

        prompt = self._make_prompt(text)
        out = self.gen(
            prompt,
            max_new_tokens=self.max_new,
            do_sample=False,
            pad_token_id=self.tokenizer.eos_token_id,
            return_full_text=False,
        )[0]["generated_text"]

        # Trim at first </safe>, strip tags, then validators
        if SAFE_CLOSE in out:
            out = out.split(SAFE_CLOSE, 1)[0]
        out = self._strip_tags(out)

        out, hits = apply_validators(out, mode=self.validate_mode)
        # Optional: print hits in warn mode to logs without changing output
        if hits and self.validate_mode == "warn":
            print(f"[validator hits] {hits}")

        return f"{SAFE_OPEN}{out}{SAFE_CLOSE}"
"""

with open(os.path.join(ADAPTER_DIR, "handler.py"), "w", encoding="utf-8") as f:
    f.write(handler_py)

print("Updated handler with pluggable validators at:", os.path.join(ADAPTER_DIR, "handler.py"))

Updated handler with pluggable validators at: ./outputs/safe-prompt-3b-lora/handler.py


In [7]:
# Step 1.3 — Write requirements.txt for the endpoint container
import os, pathlib, textwrap

ADAPTER_DIR = "./outputs/safe-prompt-3b-lora"
pathlib.Path(ADAPTER_DIR).mkdir(parents=True, exist_ok=True)

reqs = textwrap.dedent("""
transformers==4.57.1
peft==0.17.1
bitsandbytes==0.48.1
accelerate==1.11.0
safetensors>=0.4.5
""").strip() + "\n"

with open(os.path.join(ADAPTER_DIR, "requirements.txt"), "w", encoding="utf-8") as f:
    f.write(reqs)

print("Wrote:", os.path.join(ADAPTER_DIR, "requirements.txt"))


Wrote: ./outputs/safe-prompt-3b-lora/requirements.txt


In [8]:
# Step 1.5 — Upload adapters + handler + requirements + README to Hugging Face
import os, pathlib
from huggingface_hub import HfApi, upload_folder, whoami

ADAPTER_DIR = "./outputs/safe-prompt-3b-lora"
REPO_ID     = "chinu-codes/safe-prompt-llama-3_2-3b-lora"

# Sanity checks
assert pathlib.Path(ADAPTER_DIR).exists(), f"Adapter dir not found: {ADAPTER_DIR}"
for must in ["handler.py", "requirements.txt", "README.md"]:
    p = pathlib.Path(ADAPTER_DIR) / must
    assert p.exists(), f"Missing file: {p}"

print("Uploading folder to:", REPO_ID)
upload_folder(
    repo_id=REPO_ID,
    repo_type="model",
    folder_path=ADAPTER_DIR,
    commit_message="Add PEFT adapters + custom handler + requirements + README",
    ignore_patterns=["*.ipynb_checkpoints*", "*.DS_Store"],
)
print("Upload complete.")

# List files to verify
api = HfApi()
files = api.list_repo_files(REPO_ID, repo_type="model")
print("\nFiles in repo:")
for f in files:
    print("-", f)


Uploading folder to: chinu-codes/safe-prompt-llama-3_2-3b-lora


- empty or missing yaml metadata in repo card


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

Upload complete.

Files in repo:
- .gitattributes
- README.md
- adapter_config.json
- adapter_model.safetensors
- chat_template.jinja
- checkpoint-113/README.md
- checkpoint-113/adapter_config.json
- checkpoint-113/adapter_model.safetensors
- checkpoint-113/chat_template.jinja
- checkpoint-113/optimizer.pt
- checkpoint-113/rng_state.pth
- checkpoint-113/scaler.pt
- checkpoint-113/scheduler.pt
- checkpoint-113/special_tokens_map.json
- checkpoint-113/tokenizer.json
- checkpoint-113/tokenizer_config.json
- checkpoint-113/trainer_state.json
- checkpoint-113/training_args.bin
- handler.py
- requirements.txt
- special_tokens_map.json
- tokenizer.json
- tokenizer_config.json
- training_args.bin
