In [None]:
# Meta Llama 3‑1B – RAG Prototype with Strict JSON Output
# ===============================================================
# **This notebook now targets Meta’s official Llama 3‑1B checkpoint.**
# Nothing else changed: we still enforce JSON via grammar and hit
# Postgres with pgvector.  Follow every section top‑to‑bottom.
#
# --------------------------------------------------------------
# 0. One‑time prerequisites (outside Python)
# --------------------------------------------------------------
#  a. Install / rebuild libraries (run in PowerShell *inside* the
#     `llms` Conda env).  CUDA is optional – delete the three `set`
#     lines if you only need CPU.
#
#     set "CMAKE_ARGS=-DGGML_CUDA=on"
#     set FORCE_CMAKE=1
#     python -m pip install --upgrade --no-cache-dir --force-reinstall \
#         llama-cpp-python sentence-transformers psycopg2-binary \
#         huggingface-hub
#
#  b. **Hugging Face login + license acceptance**
#
#     1. `huggingface-cli login`  (paste your HF token)
#     2. Go to <https://huggingface.co/meta-llama/Meta-Llama-3-1B> and
#        click **“Agree and access repository”** (license gating step).
#
# --------------------------------------------------------------
# 1. Download the safetensors weights & convert → GGUF
# --------------------------------------------------------------
# The following Python cell will:
#   • Grab the 2‑part safetensors checkpoint & tokenizer JSONs
#   • Use `convert.py` from llama.cpp to make a Q4_0 GGUF file
#     (≈600 MB) in `C:\Users\turgu\models\llama3-1b`.
#   • Skip the conversion on subsequent runs if the GGUF already
#     exists.
#
# Requires: llama.cpp repo cloned somewhere.  If you’ve never cloned
# it, the cell tries to grab it automatically under `%USERPROFILE%\src`.

import os, subprocess, sys, textwrap, shutil, json
from pathlib import Path
from huggingface_hub import hf_hub_download

# ----------------------------- Paths -----------------------------
HF_REPO = "meta-llama/Meta-Llama-3-1B"
MODEL_DIR = Path(r"C:/Users/turgu/models/llama3-1b")
MODEL_DIR.mkdir(parents=True, exist_ok=True)
GGUF_PATH = MODEL_DIR / "llama3-1b.Q4_0.gguf"

# 1‑A. Download safetensors & tokenizer if GGUF missing
if not GGUF_PATH.is_file():
    print("GGUF not found – downloading base weights from Hugging Face…")
    for filename in [
        "model-00001-of-00002.safetensors",
        "model-00002-of-00002.safetensors",
        "model.safetensors.index.json",
        "tokenizer.json",
        "tokenizer.model",
        "config.json",
    ]:
        local_path = hf_hub_download(repo_id=HF_REPO, filename=filename, local_dir=MODEL_DIR)
        print("‣", local_path)

    # 1‑B. Ensure llama.cpp repo present (for convert.py)
    SRC_DIR = Path(os.environ.get("USERPROFILE", "~")) / "src"
    LLAMA_CPP_DIR = SRC_DIR / "llama.cpp"
    if not LLAMA_CPP_DIR.exists():
        print("Cloning llama.cpp repo for conversion script…")
        subprocess.check_call(["git", "clone", "https://github.com/ggerganov/llama.cpp", str(LLAMA_CPP_DIR)])

    # 1‑C. Run convert.py → GGUF (Q4_0)
    print("Converting to GGUF (Q4_0)… this may take a few minutes.")
    convert_cmd = [
        sys.executable,
        str(LLAMA_CPP_DIR / "convert.py"),
        "--outfile", str(GGUF_PATH),
        "--outtype", "q4_0",
        str(MODEL_DIR),  # directory containing *.safetensors & config.json
    ]
    subprocess.check_call(convert_cmd)
else:
    print("✅ GGUF already present →", GGUF_PATH)

# --------------------------------------------------------------
# 2. Prepare tag list (smart_tag_groups.json)
# --------------------------------------------------------------
TAG_FILE = Path("smart_tag_groups.json")
if not TAG_FILE.is_file():
    raise FileNotFoundError("Place smart_tag_groups.json in the notebook folder or update TAG_FILE path.")

with TAG_FILE.open("r", encoding="utf-8") as f:
    tag_data = json.load(f)
all_tags = sorted({t for group in tag_data.values() for t in group})
print("Loaded", len(all_tags), "tags.")

# --------------------------------------------------------------
# 3. Spin‑up Llama 3‑1B (GGUF)
# --------------------------------------------------------------
from llama_cpp import Llama, LlamaGrammar

llm = Llama(
    model_path=str(GGUF_PATH),
    n_ctx=2048,
    n_threads=os.cpu_count() or 8,
)
print("Model loaded ✔️")

# --------------------------------------------------------------
# 4. Build system prompt & GBNF grammar
# --------------------------------------------------------------
import textwrap, re

SYSTEM_PROMPT = textwrap.dedent(f"""
You are a helpful assistant that converts user requests into a structured JSON query for a recipe search system.

Respond **only** with a JSON object having **exactly** these keys in this order:
- name_description (string)
- include_tags (list of strings)
- exclude_tags (list of strings)
- include_ingredients (list of strings)
- exclude_ingredients (list of strings)
- count (integer, default 5 if user gave no number)
- reason (string, ≤ 25 words)

Only use tags from the following approved list (467 total):
{', '.join(all_tags[:50])}, … and {len(all_tags) - 50} more.
""")

# Auto‑generate tag alternatives for the grammar
TAG_ALTS = " | ".join(f'"{t}"' for t in all_tags)

GBNF = textwrap.dedent(f"""
root        ::= object
object      ::= "{" kvs "}"
kvs         ::= kv_name "," kv_inc_t "," kv_exc_t "," kv_inc_i "," kv_exc_i "," kv_count "," kv_reason
kv_name     ::= "\"name_description\" :" ws? string
kv_inc_t    ::= "\"include_tags\" :" ws? list_tag
kv_exc_t    ::= "\"exclude_tags\" :" ws? list_tag
kv_inc_i    ::= "\"include_ingredients\" :" ws? list_str
kv_exc_i    ::= "\"exclude_ingredients\" :" ws? list_str
kv_count    ::= "\"count\" :" ws? int
kv_reason   ::= "\"reason\" :" ws? string
list_tag    ::= "[" ws? ( tag ( ws? "," ws? tag )* )? ws? "]"
tag         ::= {TAG_ALTS}
list_str    ::= "[" ws? ( string ( ws? "," ws? string )* )? ws? "]"
string      ::= "\"" chars* "\""
chars       ::= [^"\\] | escape
escape      ::= "\\\\".
int         ::= digit+
digit       ::= [0-9]
ws          ::= [ \t\n\r]+
""")

grammar = LlamaGrammar.from_string(GBNF)
print("Grammar compiled ✔️ (length:", len(GBNF.splitlines()), "lines)")

# --------------------------------------------------------------
# 5. Example generation
# --------------------------------------------------------------
user_input = "I'm looking for a savory pie recipe with ground beef."
prompt = f"""### SYSTEM\n{SYSTEM_PROMPT}\n\n### USER\n{user_input}\n\n### ASSISTANT\n"""

response = llm(prompt, grammar=grammar, max_tokens=256, temperature=0.3)
assistant_json = response["choices"][0]["text"].strip()
print("Raw assistant output →", assistant_json[:120], "…")

import json, pprint
parsed = json.loads(assistant_json)
pprint.pprint(parsed, width=120)

# --------------------------------------------------------------
# 6. Postgres + pgvector retrieval helper (fill in creds)
# --------------------------------------------------------------
import psycopg2, numpy as np
from sentence_transformers import SentenceTransformer

EMBEDDER = SentenceTransformer("all-MiniLM-L6-v2")

PG_CONN_STR = "dbname=recipes user=postgres password=yourpw"  # ← adjust


def search_recipes(query: dict):
    conn = psycopg2.connect(PG_CONN_STR)
    cur = conn.cursor()

    inc_tags, exc_tags = query["include_tags"], query["exclude_tags"]
    inc_ing, exc_ing = query["include_ingredients"], query["exclude_ingredients"]

    tag_filter = ""
    if inc_tags:
        tag_filter += " AND tags @> %s"
    if exc_tags:
        tag_filter += " AND NOT tags && %s"

    ing_filter = ""
    if inc_ing:
        ing_filter += " AND ingredients @> %s"
    if exc_ing:
        ing_filter += " AND NOT ingredients && %s"

    q_vec = EMBEDDER.encode(query["name_description"]).astype("float32")

    sql = f"""
        SELECT name, description
        FROM recipe
        WHERE TRUE {tag_filter} {ing_filter}
        ORDER BY embedding <-> %s
        LIMIT %s;
    """

    params = []
    if inc_tags: params.append(inc_tags)
    if exc_tags: params.append(exc_tags)
    if inc_ing:  params.append(inc_ing)
    if exc_ing:  params.append(exc_ing)
    params += [q_vec, query["count"]]

    cur.execute(sql, params)
    rows = cur.fetchall()
    cur.close(); conn.close()
    return rows

print("\nTop recipes →")
for r in search_recipes(parsed):
    print("-", r[0])
