# A Prompt Pattern Catalog to Enhance Prompt Engineering with ChatGPT

https://arxiv.org/abs/2302.11382

In [182]:
import json
from ast import literal_eval
import numpy as np
import pandas as pd
import google.generativeai as genai
import streamlit as st
from tqdm.auto import tqdm

# progress bar
tqdm.pandas()

genai.configure(api_key=st.secrets["GOOGLE_API_KEY"])

In [6]:
for m in genai.list_models():
  if 'embedContent' in m.supported_generation_methods:
    print(m.name)

models/embedding-001
models/text-embedding-004


## Parsing the $\LaTeX$ File

In [7]:
with open("./data/2302.11382v1", "r") as f:
  doc = f.read()
doc



In [8]:
doc, ref = doc.split("\\begin{thebibliography}")
ref = "\\begin{thebibliography}" + ref

In [9]:
sections = [
  (s.split('}')[0][1:].replace('\\', ''), '}'.join(s.split('}')[1:]).strip())
  for s in doc.split("\\section")[1:]
]
pd.DataFrame(sections, columns=['section', 'text'])

Unnamed: 0,section,text
0,Introduction,\label{sec:intro}\nConversational large langua...
1,Comparing Software Patterns with Prompt Patterns,\label{pattern}\n\nThe quality of the output(s...
2,A Catalog of Prompt Patterns for Conversation...,\label{impl}\nThis section presents our catalo...
3,Related Work,\label{related}\n\nSoftware patterns~\cite{gam...
4,Concluding Remarks,\label{conclusion}\n\nThis paper presented a f...


In [10]:
subsections = [
  (n1, "", s) if i==0 else (n1, s.split('}')[0][1:].replace('\\',''), '}'.join(s.split('}')[1:]).strip())
  for n1, t in sections
  for i, s in enumerate(t.split("\\subsection"))
]
pd.DataFrame(subsections, columns=['section', 'subsection', 'text'])

Unnamed: 0,section,subsection,text
0,Introduction,,\label{sec:intro}\nConversational large langua...
1,Comparing Software Patterns with Prompt Patterns,,\label{pattern}\n\nThe quality of the output(s...
2,Comparing Software Patterns with Prompt Patterns,Overview of Software Patterns,A software pattern provides a reusable solutio...
3,Comparing Software Patterns with Prompt Patterns,Overview of Prompt Patterns,\label{patternform}\nPrompt patterns are simil...
4,Comparing Software Patterns with Prompt Patterns,Evaluating Means for Defining a Prompt Pattern...,"In software patterns, the structure and partic..."
5,Comparing Software Patterns with Prompt Patterns,A Way Forward: Fundamental Contextual Statements,"An open research question, therefore, is what ..."
6,A Catalog of Prompt Patterns for Conversation...,,\label{impl}\nThis section presents our catalo...
7,A Catalog of Prompt Patterns for Conversation...,Summary of the Prompt Pattern Catalog,The classification of prompt patterns is an im...
8,A Catalog of Prompt Patterns for Conversation...,The Meta Language Creation Pattern,\label{firstpattern}\n\n\subsubsection{Intent ...
9,A Catalog of Prompt Patterns for Conversation...,The Output Automater Pattern,\subsubsection{Intent and Context}\n\nThe inte...


In [11]:
subsubsections = [
  (n1, n2, "", s) if i==0 else (n1, n2, s.split('}')[0][1:].replace('\\',''), '}'.join(s.split('}')[1:]).strip())
  for n1, n2, t in subsections
  for i, s in enumerate(t.split("\\subsubsection"))
]
df_chunks = pd.DataFrame(subsubsections, columns=['section', 'subsection', 'subsubsection', 'text'])
df_chunks

Unnamed: 0,section,subsection,subsubsection,text
0,Introduction,,,\label{sec:intro}\nConversational large langua...
1,Comparing Software Patterns with Prompt Patterns,,,\label{pattern}\n\nThe quality of the output(s...
2,Comparing Software Patterns with Prompt Patterns,Overview of Software Patterns,,A software pattern provides a reusable solutio...
3,Comparing Software Patterns with Prompt Patterns,Overview of Prompt Patterns,,\label{patternform}\nPrompt patterns are simil...
4,Comparing Software Patterns with Prompt Patterns,Evaluating Means for Defining a Prompt Pattern...,,"In software patterns, the structure and partic..."
...,...,...,...,...
101,A Catalog of Prompt Patterns for Conversation...,The Recipe Pattern,Structure and Key Ideas,Fundamental contextual statements:\n\n% \begin...
102,A Catalog of Prompt Patterns for Conversation...,The Recipe Pattern,Example Implementation,An example usage of this pattern in the contex...
103,A Catalog of Prompt Patterns for Conversation...,The Recipe Pattern,Consequences,One consequence of the recipe pattern is that ...
104,Related Work,,,\label{related}\n\nSoftware patterns~\cite{gam...


In [12]:
df_chunks = df_chunks[df_chunks.text.apply(lambda s: len(s)>0)].reset_index(drop=True)
df_chunks

Unnamed: 0,section,subsection,subsubsection,text
0,Introduction,,,\label{sec:intro}\nConversational large langua...
1,Comparing Software Patterns with Prompt Patterns,,,\label{pattern}\n\nThe quality of the output(s...
2,Comparing Software Patterns with Prompt Patterns,Overview of Software Patterns,,A software pattern provides a reusable solutio...
3,Comparing Software Patterns with Prompt Patterns,Overview of Prompt Patterns,,\label{patternform}\nPrompt patterns are simil...
4,Comparing Software Patterns with Prompt Patterns,Evaluating Means for Defining a Prompt Pattern...,,"In software patterns, the structure and partic..."
...,...,...,...,...
88,A Catalog of Prompt Patterns for Conversation...,The Recipe Pattern,Structure and Key Ideas,Fundamental contextual statements:\n\n% \begin...
89,A Catalog of Prompt Patterns for Conversation...,The Recipe Pattern,Example Implementation,An example usage of this pattern in the contex...
90,A Catalog of Prompt Patterns for Conversation...,The Recipe Pattern,Consequences,One consequence of the recipe pattern is that ...
91,Related Work,,,\label{related}\n\nSoftware patterns~\cite{gam...


In [13]:
# remove redundant spaces on title
df_chunks['section'] = df_chunks.section.str.replace(r'\s+', ' ', regex=True)
df_chunks['subsection'] = df_chunks.subsection.str.replace(r'\s+', ' ', regex=True)
df_chunks['subsubsection'] = df_chunks.subsubsection.str.replace(r'\s+', ' ', regex=True)
# add section, subsection, subsubsection tags
df_chunks['text'] = [f"\\section{{{s}}}\n\\subsection{{{ss}}}\n\\subsubsection{{{sss}}}\n{text}" for i, (s, ss, sss, text) in df_chunks.iterrows()]

df_chunks

Unnamed: 0,section,subsection,subsubsection,text
0,Introduction,,,\section{Introduction}\n\subsection{}\n\subsub...
1,Comparing Software Patterns with Prompt Patterns,,,\section{Comparing Software Patterns with Pro...
2,Comparing Software Patterns with Prompt Patterns,Overview of Software Patterns,,\section{Comparing Software Patterns with Pro...
3,Comparing Software Patterns with Prompt Patterns,Overview of Prompt Patterns,,\section{Comparing Software Patterns with Pro...
4,Comparing Software Patterns with Prompt Patterns,Evaluating Means for Defining a Prompt Pattern...,,\section{Comparing Software Patterns with Pro...
...,...,...,...,...
88,A Catalog of Prompt Patterns for Conversationa...,The Recipe Pattern,Structure and Key Ideas,\section{A Catalog of Prompt Patterns for Con...
89,A Catalog of Prompt Patterns for Conversationa...,The Recipe Pattern,Example Implementation,\section{A Catalog of Prompt Patterns for Con...
90,A Catalog of Prompt Patterns for Conversationa...,The Recipe Pattern,Consequences,\section{A Catalog of Prompt Patterns for Con...
91,Related Work,,,\section{Related Work}\n\subsection{}\n\subsub...


In [14]:
toc_mkdn = df_chunks[['section','subsection','subsubsection']].drop_duplicates().to_markdown()
print(toc_mkdn)

|    | section                                              | subsection                                                           | subsubsection           |
|---:|:-----------------------------------------------------|:---------------------------------------------------------------------|:------------------------|
|  0 | Introduction                                         |                                                                      |                         |
|  1 | Comparing Software Patterns with Prompt Patterns     |                                                                      |                         |
|  2 | Comparing Software Patterns with Prompt Patterns     | Overview of Software Patterns                                        |                         |
|  3 | Comparing Software Patterns with Prompt Patterns     | Overview of Prompt Patterns                                          |                         |
|  4 | Comparing Software Patterns with Prompt

In [15]:
generation_config = {
  "temperature": 0.3,
  "top_p": 0.95,
  "top_k": 64,
  "max_output_tokens": 8192,
  "response_mime_type": "application/json",
}

safety_settings={
  'harassment':'block_none',
  'hate':'block_none',
  'sex':'block_none',
  'danger':'block_none'
}

model = genai.GenerativeModel(
  model_name="gemini-1.5-flash",
  generation_config=generation_config,
  safety_settings=safety_settings,
  system_instruction="You return a simple table of contents with JSON format from a given markdown table. The JSON contains a key/value pair of strings 'table of contents' and markdown un-ordered list using `-`.",
)

chat_session = model.start_chat()
response = chat_session.send_message(toc_mkdn)

In [33]:
toc_json = json.loads(response.candidates[0].content.parts[0].text)
toc = toc_json['table of contents']
print(toc)

- Introduction
- Comparing Software Patterns with Prompt Patterns
  - Overview of Software Patterns
  - Overview of Prompt Patterns
  - Evaluating Means for Defining a Prompt Pattern's Structure and Ideas
  - A Way Forward: Fundamental Contextual Statements
- A Catalog of Prompt Patterns for Conversational LLMs
  - Summary of the Prompt Pattern Catalog
  - The Meta Language Creation Pattern
    - Intent and Context
    - Motivation
    - Structure and Key Ideas
    - Example Implementation
    - Consequences
  - The Output Automater Pattern
    - Intent and Context
    - Motivation
    - Structure and Key Ideas
    - Example Implementation
    - Consequences
  - The Flipped Interaction Pattern
    - Intent and Context
    - Motivation
    - Structure and Key Ideas
    - Example Implementation
    - Consequences
  - The Persona Pattern
    - Intent and Context
    - Motivation
    - Structure and Key Ideas
    - Example Implementation
    - Consequences
  - The Question Refinement Patte

In [34]:
with open('./data/toc.txt', 'w') as f:
  f.write(toc)

## Making Embeddings

In [43]:
embds = []
for i, (s, ss, sss, text) in tqdm(df_chunks.iterrows(), total=len(df_chunks)):
  for t in [s, ss, sss]:
    if t:
      title = t
  embds.append(
    genai.embed_content(
      model="models/text-embedding-004",
      content=text,
      task_type="retrieval_document",
      title=title,
    )["embedding"]
  )

0it [00:00, ?it/s]

In [45]:
len(embds[0])

768

In [46]:
df_chunks["embedding"] = embds

In [48]:
df_chunks

Unnamed: 0,section,subsection,subsubsection,text,embedding
0,Introduction,,,\section{Introduction}\n\subsection{}\n\subsub...,"[-0.029222721, -0.053995773, -0.062313642, -0...."
1,Comparing Software Patterns with Prompt Patterns,,,\section{Comparing Software Patterns with Pro...,"[-0.04231489, -0.020348035, -0.0792571, 0.0032..."
2,Comparing Software Patterns with Prompt Patterns,Overview of Software Patterns,,\section{Comparing Software Patterns with Pro...,"[-0.03364847, 0.022367166, -0.017332109, 0.010..."
3,Comparing Software Patterns with Prompt Patterns,Overview of Prompt Patterns,,\section{Comparing Software Patterns with Pro...,"[-0.03786857, -0.0046783146, -0.057849854, -0...."
4,Comparing Software Patterns with Prompt Patterns,Evaluating Means for Defining a Prompt Pattern...,,\section{Comparing Software Patterns with Pro...,"[-0.049804308, -0.014273707, -0.08175934, -0.0..."
...,...,...,...,...,...
88,A Catalog of Prompt Patterns for Conversationa...,The Recipe Pattern,Structure and Key Ideas,\section{A Catalog of Prompt Patterns for Con...,"[-0.016889501, -0.0018208668, -0.00963632, -0...."
89,A Catalog of Prompt Patterns for Conversationa...,The Recipe Pattern,Example Implementation,\section{A Catalog of Prompt Patterns for Con...,"[0.023452848, -0.04145436, -0.0069110435, -0.0..."
90,A Catalog of Prompt Patterns for Conversationa...,The Recipe Pattern,Consequences,\section{A Catalog of Prompt Patterns for Con...,"[-0.018518025, -0.018591197, -0.0014663517, 0...."
91,Related Work,,,\section{Related Work}\n\subsection{}\n\subsub...,"[-0.0314918, -0.016237408, -0.07085892, -0.006..."


In [49]:
df_chunks.to_csv("./data/2302.11382v1_embeddings.csv")

## Retriever

In [230]:
df_csv = pd.read_csv("./data/2302.11382v1_embeddings.csv", index_col=0).fillna('')
df_csv["embedding"] = df_csv.embedding.apply(literal_eval).apply(np.array)

with open('./data/toc.txt', 'r') as f:
  toc = f.read()

def search_from_section_names(query:str) -> str:
  """Retrieve the LaTeX chunks of the paper named 'A Prompt Pattern Catalog to Enhance Prompt Engineering with ChatGPT' using the section, subsection and subsubsection names.
  The input is a list of three strings of the form `[section, subsection, subsubsection]`. Only the exact matchs of the names will be returned.

  Args:
    query: a list of three strings of the form `[section, subsection, subsubsection]`
  """
  query = list(query)
  df = df_csv.copy()
  if len(query) <= 3:
    query = query + ['']*(3-len(query))
  return df[
    (df['section'] == query[0])
    & (df['subsection'] == query[1])
    & (df['subsubsection'] == query[2])
  ][['section', 'subsection', 'subsubsection', 'text']].to_json()

def search_from_text(query:str, top_n:int=5, s:float=.0):
  """Retrieve the LaTeX chunks of the paper named 'A Prompt Pattern Catalog to Enhance Prompt Engineering with ChatGPT' using cosine similarity of text.
  The input is the user's query about the contents of the paper.

  Args:
    query: the user's query string.
  """
  df = df_csv.copy()
  query_embedding = np.array(genai.embed_content(
    model="models/text-embedding-004",
    content=query,
    task_type="retrieval_query",
  )["embedding"])
  df["similarity"] = df.embedding.apply(lambda x: np.dot(x, query_embedding))
  return df[df.similarity >= s].sort_values("similarity", ascending=False).head(top_n)[['text', 'similarity']].to_json()

In [231]:
search_from_section_names(['A Catalog of Prompt Patterns for Conversational LLMs', '', ''])

'{"section":{"6":"A Catalog of Prompt Patterns for Conversational LLMs"},"subsection":{"6":""},"subsubsection":{"6":""},"text":{"6":"\\\\section{A Catalog of Prompt Patterns  for Conversational LLMs}\\n\\\\subsection{}\\n\\\\subsubsection{}\\n\\\\label{impl}\\nThis section presents our catalog of prompt patterns that have been applied to solve common problems in the domain of conversational LLM interaction and output generation for automating software tasks. Each prompt pattern is accompanied by concrete implementation samples and examples with and without the prompt.\\n\\n"}}'

In [217]:
generation_config = {
  "temperature": 1.0,
  "top_p": 0.95,
  "top_k": 64,
  "max_output_tokens": 8192,
  "response_mime_type": "text/plain",
}

safety_settings={
  'harassment':'block_none',
  'hate':'block_none',
  'sex':'block_none',
  'danger':'block_none'
}

model = genai.GenerativeModel(
  model_name="gemini-1.5-flash",
  generation_config=generation_config,
  safety_settings=safety_settings,
  system_instruction=f"""You are an experienced prompt engineer.
You can retrieve the contents of the paper titled 'A Prompt Pattern Catalog to Enhance Prompt Engineering with ChatGPT'.

If you are not sure, then just say you don't know; never make up a story.
When you use the function `search_from_section_names`, first, you try fill all the three `[section, subsection, subsubsection]` names to get one or two chunks.
If you think we need more chunks, then ask the user want to get more.

You have to use Korean (한국어) only if the user asks in Korean (한국어).
Otherwise you must use English.

Table of Contents (section, subsection, subsubsection):\n{toc}""",
  tools=[search_from_section_names, search_from_text]
  # tools=[search_from_text]
)

chat_session = model.start_chat(enable_automatic_function_calling=True)
response = chat_session.send_message("Categorize prompt patterns based on the subsection Summary of the Prompt Pattern Catalog.")

In [218]:
print(response.candidates[0].content.parts[0].text)

The prompt patterns are classified into five categories: **Input Semantics**, **Output Customization**, **Error Identification**, **Prompt Improvement**, and **Interaction**.

**Input Semantics** focuses on how the LLM understands and translates input into something it can use to generate output. This category includes the *Meta Language Creation* pattern.

**Output Customization** focuses on tailoring the types, formats, structure, or properties of the LLM's output. This category includes the *Output Automater*, *Persona*, *Visualization Generator*, *Recipe*, and *Template* patterns.

**Error Identification** focuses on identifying and resolving errors in the LLM's output. This category includes the *Fact Check List* and *Reflection* patterns.

**Prompt Improvement** focuses on improving the quality of input and output. This category includes the *Question Refinement*, *Alternative Approaches*, *Cognitive Verifier*, and *Refusal Breaker* patterns.

**Interaction** focuses on the inter

In [219]:
chat_session.history

[parts {
   text: "Categorize prompt patterns based on the subsection Summary of the Prompt Pattern Catalog."
 }
 role: "user",
 parts {
   function_call {
     name: "search_from_section_names"
     args {
       fields {
         key: "query"
         value {
           list_value {
             values {
               string_value: "A Catalog of Prompt Patterns for Conversational LLMs"
             }
             values {
               string_value: "Summary of the Prompt Pattern Catalog"
             }
             values {
               string_value: ""
             }
           }
         }
       }
     }
   }
 }
 role: "model",
 parts {
   function_response {
     name: "search_from_section_names"
     response {
       fields {
         key: "result"
         value {
           string_value: "{\"section\":{\"7\":\"A Catalog of Prompt Patterns for Conversational LLMs\"},\"subsection\":{\"7\":\"Summary of the Prompt Pattern Catalog\"},\"subsubsection\":{\"7\":\"\"},\"text\":{\

In [220]:
chat_session.rewind()

(parts {
   function_response {
     name: "search_from_section_names"
     response {
       fields {
         key: "result"
         value {
           string_value: "{\"section\":{\"7\":\"A Catalog of Prompt Patterns for Conversational LLMs\"},\"subsection\":{\"7\":\"Summary of the Prompt Pattern Catalog\"},\"subsubsection\":{\"7\":\"\"},\"text\":{\"7\":\"\\\\section{A Catalog of Prompt Patterns  for Conversational LLMs}\\n\\\\subsection{Summary of the Prompt Pattern Catalog}\\n\\\\subsubsection{}\\nThe classification of prompt patterns is an important consideration in documenting the patterns. Table~\\\\ref{class:table} outlines the initial classifications for the catalog of prompt patterns we identified in our work with ChatGPT thus far.\\n\\n\\\\begin{table}[htbp]\\n\\\\caption{Classifying Prompt Patterns}\\n{\\\\large\\\\raggedright\\n\\\\centering\\n\\\\begin{tabular}{|p{3.5cm}|p{4.4cm}|}\\n\\\\hline \\\\cellcolor[gray]{0.8}\\\\textbf{Pattern Category} & \\\\cellcolor[gray]{0.8

In [223]:
chat_session.history

[parts {
   text: "Categorize prompt patterns based on the subsection Summary of the Prompt Pattern Catalog."
 }
 role: "user",
 parts {
   function_call {
     name: "search_from_section_names"
     args {
       fields {
         key: "query"
         value {
           list_value {
             values {
               string_value: "A Catalog of Prompt Patterns for Conversational LLMs"
             }
             values {
               string_value: "Summary of the Prompt Pattern Catalog"
             }
             values {
               string_value: ""
             }
           }
         }
       }
     }
   }
 }
 role: "model"]

In [224]:
chat_session._history.clear()

In [225]:
chat_session.history

[]