# 程序辅助语言模型（PAL）链

实现程序辅助语言模型，参考论文 https://arxiv.org/pdf/2211.10435.pdf。


In [1]:
from langchain_experimental.pal_chain import PALChain
from langchain_openai import OpenAI

In [None]:
llm = OpenAI(temperature=0, max_tokens=512)

## 数学提示

In [2]:
pal_chain = PALChain.from_math_prompt(llm, verbose=True)

In [None]:
question = "Jan 的宠物数量是 Marcia 的三倍。Marcia 比 Cindy 多两只宠物。如果 Cindy 有四只宠物，三个人总共有多少只宠物？"

In [4]:
pal_chain.run(question)



[1m> Entering new PALChain chain...[0m
[32;1m[1;3mdef solution():
    """Jan has three times the number of pets as Marcia. Marcia has two more pets than Cindy. If Cindy has four pets, how many total pets do the three have?"""
    cindy_pets = 4
    marcia_pets = cindy_pets + 2
    jan_pets = marcia_pets * 3
    total_pets = cindy_pets + marcia_pets + jan_pets
    result = total_pets
    return result[0m

[1m> Finished chain.[0m


'28'

## 彩色物品

In [5]:
pal_chain = PALChain.from_colored_object_prompt(llm, verbose=True)

In [None]:
question = "在桌子上，你看到两个蓝色小册子、两个紫色小册子和两副黄色太阳镜。如果我从桌子上移走所有太阳镜，还剩下多少个紫色物品？"

In [7]:
pal_chain.run(question)



[1m> Entering new PALChain chain...[0m
[32;1m[1;3m# Put objects into a list to record ordering
objects = []
objects += [('booklet', 'blue')] * 2
objects += [('booklet', 'purple')] * 2
objects += [('sunglasses', 'yellow')] * 2

# Remove all pairs of sunglasses
objects = [object for object in objects if object[0] != 'sunglasses']

# Count number of purple objects
num_purple = len([object for object in objects if object[1] == 'purple'])
answer = num_purple[0m

[1m> Finished PALChain chain.[0m


'2'

## 中间步骤
你也可以使用中间步骤标志来返回生成答案的执行代码。

In [5]:
pal_chain = PALChain.from_colored_object_prompt(
    llm, verbose=True, return_intermediate_steps=True
)

In [None]:
question = "在桌子上，你看到两个蓝色小册子、两个紫色小册子和两副黄色太阳镜。如果我从桌子上移走所有太阳镜，还剩下多少个紫色物品？"

In [8]:
result = pal_chain({"question": question})



[1m> Entering new PALChain chain...[0m
[32;1m[1;3m# Put objects into a list to record ordering
objects = []
objects += [('booklet', 'blue')] * 2
objects += [('booklet', 'purple')] * 2
objects += [('sunglasses', 'yellow')] * 2

# Remove all pairs of sunglasses
objects = [object for object in objects if object[0] != 'sunglasses']

# Count number of purple objects
num_purple = len([object for object in objects if object[1] == 'purple'])
answer = num_purple[0m

[1m> Finished chain.[0m


In [11]:
result["intermediate_steps"]

"# Put objects into a list to record ordering\nobjects = []\nobjects += [('booklet', 'blue')] * 2\nobjects += [('booklet', 'purple')] * 2\nobjects += [('sunglasses', 'yellow')] * 2\n\n# Remove all pairs of sunglasses\nobjects = [object for object in objects if object[0] != 'sunglasses']\n\n# Count number of purple objects\nnum_purple = len([object for object in objects if object[1] == 'purple'])\nanswer = num_purple"

In [None]:
from IPython.display import HTML, display


def plt_img_base64(img_base64):
    # 使用 base64 字符串作为源创建 HTML img 标签
    image_html = f'<img src="data:image/jpeg;base64,{img_base64}" />'

    # 通过渲染 HTML 显示图像
    display(HTML(image_html))


docs = retriever.invoke("Woman with children", k=10)
for doc in docs:
    if is_base64(doc.page_content):
        plt_img_base64(doc.page_content)
    else:
        print(doc.page_content)

In [None]:
from operator import itemgetter

from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI


def prompt_func(data_dict):
    # 将上下文文本合并成一个字符串
    formatted_texts = "\n".join(data_dict["context"]["texts"])
    messages = []

    # 如果存在图片，则添加到消息中
    if data_dict["context"]["images"]:
        image_message = {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{data_dict['context']['images'][0]}"
            },
        }
        messages.append(image_message)

    # 添加文本消息进行分析
    text_message = {
        "type": "text",
        "text": (
            "作为一位专业的艺术评论家和历史学家，您的任务是分析和解释图像，"
            "考虑其历史和文化意义。除了图像之外，您还将获得相关文本以提供背景。"
            "这两者都将根据用户输入的关键词从向量存储中检索。请运用您渊博的知识和分析技能，"
            "提供一个全面的总结，包括：\n"
            "- 图像中视觉元素的详细描述\n"
            "- 图像的历史和文化背景\n"
            "- 对图像象征和含义的解释\n"
            "- 图像与相关文本之间的联系\n\n"
            f"用户提供的关键词：{data_dict['question']}\n\n"
            "文本和/或表格：\n"
            f"{formatted_texts}"
        ),
    }
    messages.append(text_message)

    return [HumanMessage(content=messages)]


model = ChatOpenAI(temperature=0, model="gpt-4-vision-preview", max_tokens=1024)

# RAG 流水线
chain = (
    {
        "context": retriever | RunnableLambda(split_image_text_types),
        "question": RunnablePassthrough(),
    }
    | RunnableLambda(prompt_func)
    | model
    | StrOutputParser()
)