# RAG chat

https://github.com/guidance-ai/guidance/blob/ae8830f69553fd658bf32e0fdb478cd9518bab9e/notebooks/art_of_prompt_design/rag.ipynb

In [1]:
# # autoreload your package
# %load_ext autoreload
# %autoreload 2
# import stampy_nb


In [2]:
q = "What kinds of computational neuroscience techniques could be used in MechInterp?."

In [3]:
## secrets
from dotenv import load_dotenv

load_dotenv()  # take environment variables from .env.

# import warnings
# warnings.filterwarnings("ignore", ".*does not have many workers.*")

## numeric, plotting
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline
plt.style.use("ggplot")
plt.rcParams["figure.figsize"] = (7.0, 4)

## utils
from pathlib import Path
from tqdm.auto import tqdm
import logging, os, re
import collections, functools, itertools

from loguru import logger

  from .autonotebook import tqdm as notebook_tqdm


## Helpers

### Search and data

In [4]:
import os
import diskcache
import pathlib
import requests
import html
from urllib.parse import urlparse
import urllib.parse
import io
import html
import html.parser

curr_dir = "./"
_bing_cache = diskcache.Cache(f"{curr_dir}/../bing.diskcache")


BING_SEARCH_KEY = os.environ["BING_SEARCH_KEY"]


class MLStripper(html.parser.HTMLParser):
    def __init__(self):
        super().__init__()
        self.reset()
        self.strict = False
        self.convert_charrefs = True
        self.text = io.StringIO()

    def handle_data(self, d):
        self.text.write(d)

    def get_data(self):
        return self.text.getvalue()


def strip_tags(html):
    s = MLStripper()
    s.feed(html)
    return s.get_data()


def bing_search(search_terms, count=10):
    if type(search_terms) == str:
        search_terms = [search_terms]
    search_url = "https://api.bing.microsoft.com/v7.0/search"

    headers = {"Ocp-Apim-Subscription-Key": BING_SEARCH_KEY}
    search_results = []
    for search_term in search_terms:
        params = {
            "q": search_term,
            "textDecorations": True,
            "textFormat": "HTML",
            "cout": count,
        }
        params_key = search_term + "-___-" + str(count)
        if params_key not in _bing_cache or "webPages" not in _bing_cache[params_key]:
            response = requests.get(search_url, headers=headers, params=params)
            response.raise_for_status()
            _bing_cache[params_key] = response.json()
        if "webPages" not in _bing_cache[params_key]:
            # no results
            logger.warning("No results found for %s", search_term)
            continue
        data = _bing_cache[params_key]["webPages"]["value"]
        for r in data:
            r["content"] = strip_tags(r["snippet"])
        search_results.extend(data)
    return search_results
    return [format_snippet(s) for s in search_results]


def top_snippets(query, n=3):
    results = bing_search(query, count=n)[:n]
    return [{"title": x["name"], "snippet": x["content"]} for x in results]


def format_snippet(s):
    return f"""url: {s['url']}\ntitle: {s['name']}\nextrat: {s['snippet']}""".strip()


# q = "What are the main categories of mechinterp interventions?"
# print(bing_search(q)[0])

In [5]:
# from datasets import load_dataset
# data = load_dataset('StampyAI/alignment-research-dataset', trust_remote_code=True)
# # TODO also search google and bing https://python.langchain.com/v0.2/docs/integrations/tools/search_tools/
# data

### Prompts

In [6]:
# https://github.com/StampyAI/stampy-chat/blob/main/api/src/stampy_chat/settings.py

# SOURCE_PROMPT = (
#     "You are a helpful assistant knowledgeable about AI Alignment and Safety. "
#     "Please give a clear and coherent answer to the user's questions. (written after \"Q:\") "
#     "using the following sources. Each source is labeled with a letter. Feel free to "
#     "use the sources in any order, and try to use multiple sources in your answers.\n\n"
# )



In [7]:
import random
import guidance
from guidance import (
    models,
    gen,
    select,
    substring,
    string,
    prefix_tree,
    regex,
    user,
    assistant,
    system,
)
from guidance import silent, capture, Tool, one_or_more, any_char, commit_point

# llama2 = models.LlamaCpp("/home/marcotcr_google_com/work/models/llama-2-13b-chat.Q6_K.gguf", n_gpu_layers=-1, n_ctx=4096)
lm_big = models.OpenAI("gpt-4o")
lm_small = models.OpenAI("gpt-3.5-turbo")

logger.info("test")

[32m2024-06-29 19:52:05.165[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m21[0m - [1mtest[0m


In [8]:
def format_snippets(snippets, start=1):
    ret = ""
    for i, s in enumerate(snippets, start=start):
        title = s["title"]
        snippet = s["snippet"]
        ret += f"[{i}] {title}\n"
        ret += f"{snippet}\n\n"
    return ret


@guidance
def search(lm, query):
    # Setting this for later use
    lm = lm.set("query", query)
    # This is where search actually gets called
    lm = lm.set("snippets", format_snippets(top_snippets(query)))
    lm += "\nObservation:\n" + lm["snippets"]
    return lm


@guidance
def rephrase(lm, query: str = "What is the main category of mechinterp interventions?"):
    with silent():
        with system():
            lm += """You are LibrarianGPT an intelligence assisant that can improve users searching, helping them find documents they missed.  Please return only the search and no commentary."""
        with user():
            # lm += f"Please rephrase the search query below using differen't common synonyms, alternate keywords, and changed phrasing that will help locate documents that address the following inquiry: \"{query}\""
            # lm += f"Please draft an academic search query from the following search \"{query}\""
            lm += f'Please draft an academic search query with synonyms and alternative phrases that will find documents to answer the following question: "{query}". Return only the search and no commentary.'
        with assistant():
            lm += gen("q_rephrased")
    return lm


@guidance
def example_answer(
    lm, query: str = "What is the main category of mechinterp interventions?"
):
    with silent():
        with system():
            lm += """You are LibrarianGPT an intelligence assisant that can improve users searching by providing example answers that will help with vector based similarity search. Please return only the example and no commentary."""
        with user():
            lm += f"Please draft a concrete and concise example answer that ties together all elements of the following question in a paragraph or less: {query}"
        with assistant():
            lm += gen("q_example_ans")
    return lm

### Run

In [9]:
import re
import html
import markdown2

markdowner = markdown2.Markdown()
from IPython.core.display import HTML


def doc2htmlref(d: dict, n: int):
    tooltip = html.escape(f"### Name\n{d.get('name', '')}\n\n## Content\n{d['content']}\n\n## URL\n{d['url']}").strip()
    return f'<a href="{d["url"]}"><span title="{tooltip}">{n}</span></a>'


def format_ans(ans, docs):
    # convert llm markdown to html
    ans = html.escape(ans)
    ans = markdowner.convert(ans)
    text2 = "<h3>Anwser</h3>" + ans

    # convert [1], [1,3] etc to references with tooltips
    pattern = r"\[\s*((?:\d+\s*(?:,\s*\d+\s*)*)?)\]"
    matches = re.finditer(pattern, text2)
    # do it in reverse to we preserve the earlier match indices
    matches = list(reversed(list(matches)))
    for match in matches:
        m = match.group().strip("[]")
        ns = m.strip(", ").split(", ")
        m2 = ""
        for n in ns:
            d = docs[int(n)]

            m2 += doc2htmlref(d, n) + ", "
        m2 = "[" + m2.rstrip(", ") + "]"
        s0, s1 = match.start(), match.end()
        text2 = text2[:s0] + m2 + text2[s1:]

    # turn out re matches into a list of integers for used references
    refs = [m.group().strip("[]").strip().split(", ") for m in matches]
    refs = [int(m) for m in itertools.chain(*refs)]
    refs = sorted(set(refs))

    # html list references
    text2 += "<p/><p/><h3>References:</h3><p/>"
    for r in refs:
        d = docs[int(r)]
        text2 += f'<li><a href="{d["url"]}">[{r}]</a>: {html.escape(d["name"])} - {html.escape(d["content"][:300])}</li>'

    return HTML(text2)


# ans = """Mechanistic interpretability (mechinterp) interventions can be broadly categorized into several types, each focusing on different aspects of understanding and explaining AI models. Here are the main categories:

# 1. **Post-Hoc Interpretability Techniques**: [1, 2, 3] These techniques are applied after the model has been trained to gain insights into its behavior and decision-making processes. They include efforts to uncover general,[2, 11] transferable principles across models and tasks, as well as automating the discovery and interpretation of critical circuits in trained models [11]."""
# format_ans(ans, docs)

In [10]:
# Step 1 search
lm = lm_big


# Step 2 rephrase and example answers
docs = []
docs1 = bing_search(q)


q_rephrased = lm + rephrase(q)
q_rephrased = q_rephrased["q_rephrased"].strip("\"' ")
# logger.info(f"Rephrased query: {q_rephrased}")
docs2 = bing_search(q_rephrased)

q_example_ans = lm + example_answer(q)
q_example_ans = q_example_ans["q_example_ans"].strip("\"' ")
# logger.info(f"Example answer: {q_example_ans}")
docs3 = bing_search(q_example_ans)

for d in docs1:
    d["source"] = "bing"
    docs += [d]
for d in docs2:
    d["source"] = "bing_rephrased"
    docs += [d]
for d in docs3:
    d["source"] = "bing_example_ans"
    docs += [d]

random.shuffle(docs)

# # Step 3 rerank

In [11]:
from guidance.models._model import ConstraintException

# https://github.com/PrithivirajDamodaran/FlashRank/blob/25e58dac4276b2736586dd6edcb4c00cac1320a3/flashrank/Ranker.py#L149
"""
This reranking is taken from flashrank and there are a few lessons
- using this format is effective and forces a ranking
- using assistant messages as a seperator is effective
"""


@guidance
def rank_doc2(lm, docs, query):
    num = len(docs)
    with system():
        lm += """You are RankGPT, an intelligent assistant that can rank passages based on their relevancy to the query."""
    with user():
        lm += f"""
I will provide you with {num} passages, each indicated by number identifier []. \nRank the passages based on their relevance to query: {query}"""
    with assistant():
        lm += "Okay, please provide the passages."

    for i, d in enumerate(docs):
        with user():
            lm += f"[{i + 1}] Title: {d['name']}\nContent: {d['content']}"
        with assistant():
            lm += f"Received passage [{i+1}]."
    with user():
        example_ordering = "[2] > [1]"
        lm += f"Search Query: {query}.\nRank the {num} passages above based on their relevance to the search query. All the passages should be included and listed using identifiers, in descending order of relevance. The output format should be [] > [], e.g., {example_ordering}, Only respond with the ranking results, do not say any word or explain."
    with assistant():
        lm += gen("rank", temperature=0.5)
    return lm


def rerank2(lm, docs):
    r = (lm + rank_doc2(docs, q))["rank"]
    ranks = [int(s.strip("[] ")) for s in r.split(" > ")]
    docs = [docs[i - 1] for i in ranks][::-1]
    for i, d in enumerate(docs):
        d["rank"] = i
    return docs


with silent():
    docs_all2 = rerank2(lm_big, docs)

In [14]:
# Deeper
# d = docs_all2[2]
url = d["url"]


headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36",
    "Referer": "https://www.bing.com/",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    "Accept-Encoding": "gzip, deflate, br",
}
session = requests.Session()
session.headers.update(headers)


from markdownify import markdownify as md

@functools.lru_cache()
def scrape_url(url):
    r = session.get(url)
    r.raise_for_status()
    # now html 2 md
    html = md(r.text, strip=['a'])
    return html

@guidance
def summarize(lm, query, content):
    with system():
        lm += """You are SummarizeGPT, an intelligent assistant that can summarize long passages of text."""
    with user():
        lm += f"Please summarize the following content. Use markdown block qoutes to directly qoute \n> highly relevent material\n- briefly summarize possibly relevant material or context as markdown lists\n\nand completly ignore irrelevant material. Judge relevence by the user query. Only respond with the content.\n\n### Query: {query}\n\n### Content: {content}"
    with assistant():
        lm += gen("summarize")
    return lm

def deepend_docs(docs, q, N=20):
    deeper = []
    for i, d in enumerate(docs[:N]):
        try:
            content = scrape_url(d["url"])
        except Exception as e:
            logger.error(f"Failed to scrape {d['url']} {e}")
            continue
        try:
            summary = "Summarized content: "+ (lm+summarize(q, content))['summarize']
        except ConstraintException as e:
            logger.error(f"Failed to summarize {d['url']} {e}")
            continue
        logger.info(f"Scraped {d['url']}")

        deeper += [
            dict(
                url=d["url"],
                name=d["name"],
                content=summary,
                rank=d["rank"],
                source=d["source"] + "[full]",
            )
        ]
    return deeper + docs

N = 20
docs_all2b = deepend_docs(docs_all2, q, N)
# docs2b

[32m2024-06-29 20:02:19.489[0m | [31m[1mERROR   [0m | [36m__main__[0m:[36mdeepend_docs[0m:[36m47[0m - [31m[1mFailed to summarize https://www.nature.com/articles/s41593-018-0210-5 The model attempted to generate b'> "To learn how cognition is implemented...' after the prompt `b'...10-5)\n\n<|im_end|>\n<|im_start|>assistant\n'`, but that does
not match the given grammar constraints! Since your model is a remote API that does not support full guidance
integration we cannot force the model to follow the grammar, only flag an error when it fails to match.
You can try to address this by improving the prompt, making your grammar more flexible, rerunning with
a non-zero temperature, or using a model that supports full guidance grammar constraints.[0m


In [19]:
# [print(x['content']) for x in docs_all2b[:N]]

In [16]:
def show_search(q, docs):
    # rerank by the rank attr if available
    if "rank" in docs[0]:
        docs = sorted(docs, key=lambda d: d["rank"])[::-1]
    else:
        print("unranked")

    html = f"<h4>search q={q}</h4>"
    for i, d in enumerate(docs):
        html += f'<li>{doc2htmlref(d, i)} - {d["name"]}</li>'
    return HTML(html)


# QC our searches
display((show_search(q, docs)))

display((show_search(q_rephrased, docs2)))

display((show_search(q_example_ans, docs3)))

In [17]:
# # Step 3 summarize and generate
# see make_prompt in stampy https://github.com/StampyAI/stampy-chat/blob/990c5dcad5721484c43f6297d84208614a5bf568/api/src/stampy_chat/chat.py#L245
lm = lm_big


QUESTION_PROMPT = (
    "Think step by step and use the provided documents to create the most informed, well reasoned answers possible. Use markdown lists where possible. In your three alternative answers, take differen't approaches that lead to differen't content and citations if reasonably possible "
    "using the format: [a], [b], etc. If you use multiple sources to make a claim "
    'cite all of them. For example: "AGI is concerning [c, d, e]."\n\n'
)


@guidance
def do_answers(lm, query, docs, history_summary="", maxlen=1000):
    with system():
        lm += """You are a helpful assistant knowledgeable about AI Alignment and Safety. Please give a clear and coherent answer to the user\'s questions. (written after "Q:") using the following sources. Each source is labeled with a letter. Feel free to use the sources in any order, and try to use multiple sources in your answers"""
    with user():
        lm += f'Please give a clear and coherent answer to my question. (written after "Q:") using the following sources. Each source is labeled with a letter. Feel free to use the sources in any order, and try to use multiple sources in your answers. Q: "{query}". The sources are:'
    with assistant():
        lm += "Understand. I'm ready to carefully consider the first document wrt your query."
    for i, d in enumerate(docs):
        with user():
            lm += f"[{i}]\nUrl: {d['url']}\n##Title\n{d['name']}\n\n## Content\n{d['content'][:maxlen]}"
        with assistant():
            lm += f"I have considered source {i}, next please."
    with user():
        lm += QUESTION_PROMPT + f"Q: {history_summary}: {query}\n "
    # with assistant():
    #     lm += "A: "
    with assistant():
        lm += gen(
            "ans",
            stop="```",
        )
    return lm


docs_all3 = docs_all2b[:30][
    ::-1
]  # get the tops ones, but put best ones last so they are more salient
r = lm + do_answers(q, docs_all3)
r
ans = r["ans"]
format_ans(ans, docs_all3)

In [18]:
# # now a nice html interface with tooltips and list of refs, need to replace numbers
# ans = r['ans']
# format_ans(ans, docs3)

## Scratch