### 1.  连接大模型

In [3]:
from utils import get_qwen_models
llm, chat, embed = get_qwen_models()

### 2. 输出解析器
- 规范化大模型的输出
- 方便下游任务的处理

### 3. StrOutputParser

In [4]:
from langchain_core.output_parsers import StrOutputParser

In [6]:
parser = StrOutputParser()

In [11]:
result = chat.invoke("你是谁？")

In [13]:
result.content

'我是来自阿里云的超大规模语言模型，我叫通义千问。'

In [18]:
parser.invoke(result)

'我是来自阿里云的超大规模语言模型，我叫通义千问。'

In [16]:
chain = chat | parser

In [17]:
chain.invoke(input="hello")

'Hello there! How can I assist you today?'

### 3. CommaSeparatedListOutputParser

In [21]:
from langchain_core.output_parsers import CommaSeparatedListOutputParser

In [20]:
llm.invoke("请列出3种健康的生活习惯")

'好的，这里有三种健康的生活习惯： \n\n1. 保持充足的睡眠：每晚至少7-8小时的睡眠可以帮助身体恢复活力，并有助于提高记忆力和注意力。 \n\n2. 均衡饮食：多吃蔬菜、水果、全谷物等食物，少吃油腻、油炸、甜食等不健康的食品。 \n\n3. 锻炼身体：每天坚持进行有氧运动，如慢跑、游泳、骑自行车等，以增强肌肉力量和耐力。'

In [22]:
parser = CommaSeparatedListOutputParser()

In [24]:
parser.get_format_instructions()

'Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`'

In [25]:
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate

In [34]:
prompt = PromptTemplate.from_template(template="请列出{num}种健康的生活习惯！请使用中文输出\n{format_instruction}",
                                     partial_variables={"format_instruction": parser.get_format_instructions()})

In [35]:
prompt

PromptTemplate(input_variables=['num'], partial_variables={'format_instruction': 'Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`'}, template='请列出{num}种健康的生活习惯！请使用中文输出\n{format_instruction}')

In [36]:
print(prompt.invoke(input={"num": 2}).text)

请列出2种健康的生活习惯！请使用中文输出
Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`


In [37]:
chain = prompt | llm

In [44]:
result= chain.invoke(input={"num": 10})

In [45]:
parser.parse(result)

['早睡早起',
 '均衡饮食',
 '定期运动',
 '保持乐观',
 '戒烟限酒',
 '充足水分',
 '定期体检',
 '适当休息',
 '个人卫生',
 '心理健康']

In [46]:
from langchain_core.prompts import SystemMessagePromptTemplate
from langchain_core.prompts import HumanMessagePromptTemplate

In [84]:
prompt = ChatPromptTemplate.from_messages(messages=[
    SystemMessagePromptTemplate.from_template(template="你是一个养生专家！"),
    HumanMessagePromptTemplate.from_template(template="请列出{num}种健康的生活习惯！\n请返回用逗号分开的一些列结果，比如：`苹果, 香蕉, 西瓜`")
])

In [85]:
prompt

ChatPromptTemplate(input_variables=['num'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='你是一个养生专家！')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['num'], template='请列出{num}种健康的生活习惯！\n请返回用逗号分开的一些列结果，比如：`苹果, 香蕉, 西瓜`'))])

In [86]:
chain = prompt | chat | StrOutputParser()

In [94]:
result = chain.invoke(input=dict(num=2))

In [95]:
result

'定期锻炼, 均衡饮食'

In [96]:
parser.parse(result)

['定期锻炼', '均衡饮食']

### 5. DatetimeOutputParser

In [97]:
from langchain.output_parsers import DatetimeOutputParser

In [131]:
parser = DatetimeOutputParser(format='%Y-%m-%d %H:%M:%S')

In [132]:
print(parser.get_format_instructions())

Write a datetime string that matches the following pattern: '%Y-%m-%d %H:%M:%S'.

Examples: 1504-10-15 16:31:04, 0770-07-07 02:21:51, 0193-08-22 03:34:37

Return ONLY this string, no other words!


In [133]:
from langchain_core.prompts import PromptTemplate

In [134]:
prompt = PromptTemplate.from_template(template="{question}\n{format}",
                                      partial_variables={"format": parser.get_format_instructions()})

In [135]:
prompt

PromptTemplate(input_variables=['question'], partial_variables={'format': "Write a datetime string that matches the following pattern: '%Y-%m-%d %H:%M:%S'.\n\nExamples: 0018-08-16 13:46:02, 1646-01-20 23:53:22, 0141-10-07 09:45:43\n\nReturn ONLY this string, no other words!"}, template='{question}\n{format}')

In [136]:
chain = prompt | llm

In [137]:
result = chain.invoke(input={"question": "新中国成立是什么时间？"})
result

'1949-10-01 00:00:00'

In [138]:
parser.parse(result)

datetime.datetime(1949, 10, 1, 0, 0)

In [139]:
result = chain.invoke(input={"question": "2008年中秋节是什么时间？"})

In [140]:
result

'2008-09-14 00:00:00'

In [141]:
parser.parse(result)

datetime.datetime(2008, 9, 14, 0, 0)

In [142]:
result = chain.invoke(input={"question": "北京奥运会开幕式是什么几点？"})

In [143]:
result

'2008-08-08 20:00:00'

In [144]:
parser.parse(result)

datetime.datetime(2008, 8, 8, 20, 0)

### 6. 枚举输出

In [146]:
from langchain.output_parsers import EnumOutputParser

In [147]:
from enum import Enum

In [166]:
class Color(Enum):
    BLUE = "blue"
    RED = "red"
    GREEN = "green"

In [150]:
Color.BLUE

<Color.BLUE: 'blue'>

In [151]:
parser = EnumOutputParser(enum=Color)

In [153]:
parser.get_format_instructions()

'Select one of the following options: blue, red, green'

In [169]:
prompt = PromptTemplate.from_template(template="{item}是什么颜色的？\n{format}！Return ONLY your option, no other words!",
                                    partial_variables={"format": parser.get_format_instructions()})

In [170]:
prompt

PromptTemplate(input_variables=['item'], partial_variables={'format': 'Select one of the following options: blue, red, green'}, template='{item}是什么颜色的？\n{format}！Return ONLY your option, no other words!')

In [171]:
chain = prompt | llm

In [175]:
result= chain.invoke(input={"item":"中国国旗"})

In [176]:
result

'red'

In [177]:
parser.parse(result)

<Color.RED: 'red'>

### 7. StructuredOuputParser
- 可以用于转换JSON

In [180]:
from langchain.output_parsers import StructuredOutputParser
from langchain_core.prompts import PromptTemplate
from langchain.output_parsers import ResponseSchema

In [182]:
response_schemas = [
    ResponseSchema(name="country", description="国家"),
    ResponseSchema(name="population", description="这个国家对应的人口数量")
]

In [184]:
parser = StructuredOutputParser(response_schemas=response_schemas)

In [186]:
print(parser.get_format_instructions())

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"country": string  // 国家
	"population": string  // 这个国家对应的人口数量
}
```


```json
{
	"country": string  // 国家
	"population": string  // 这个国家对应的人口数量
}
```

In [255]:
format = '''The output should be a list containing many JSON objects, just like the following:
[{
	"Country": string  // 国家
	"GDP": string  // 这个国家对应的人口数量
}]
Remeber: ONLY OUTPUT this list, NO other words! 请用中文输出！
'''

In [256]:
prompt = PromptTemplate.from_template(template="请列出世界上GPD排名前5的国家及其GDP。\n{format}",
                                     partial_variables={"format": format})

In [257]:
prompt

PromptTemplate(input_variables=[], partial_variables={'format': 'The output should be a list containing many JSON objects, just like the following:\n[{\n\t"Country": string  // 国家\n\t"GDP": string  // 这个国家对应的人口数量\n}]\nRemeber: ONLY OUTPUT this list, NO other words! 请用中文输出！\n'}, template='请列出世界上GPD排名前5的国家及其GDP。\n{format}')

In [258]:
chain = prompt | llm

In [259]:
result = chain.invoke(input = {})

In [260]:
result

'[{\n\t"Country": "美国",\n\t"GDP": "21.43万亿美元"\n}, {\n\t"Country": "中国",\n\t"GDP": "14.14万亿美元"\n}, {\n\t"Country": "日本",\n\t"GDP": "5.15万亿美元"\n}, {\n\t"Country": "德国",\n\t"GDP": "4.16万亿美元"\n}, {\n\t"Country": "英国",\n\t"GDP": "2.62万亿美元"\n}]'

In [261]:
import json

In [262]:
json.loads(s=result)

[{'Country': '美国', 'GDP': '21.43万亿美元'},
 {'Country': '中国', 'GDP': '14.14万亿美元'},
 {'Country': '日本', 'GDP': '5.15万亿美元'},
 {'Country': '德国', 'GDP': '4.16万亿美元'},
 {'Country': '英国', 'GDP': '2.62万亿美元'}]

### 8. PydanticOutParser
- 类似于 JavaScript中的 TypeScript
- 对动态语言中的数据类型，进行约束
- 增强代码的鲁棒性，把问题发现在开发阶段，而不是在运行时阶段

In [263]:
from langchain.output_parsers import PydanticOutputParser

In [293]:
from langchain.pydantic_v1 import BaseModel
from langchain.pydantic_v1 import Field

In [313]:
# 把属性定义为类变量
class ResultEntity(BaseModel):
    """
    自定义结果输出
    """
    country: str | list  = Field(description="这个国家的名字")
    population: str | list = Field(description=" 这个国家对应的人口")

In [299]:
parser = PydanticOutputParser(pydantic_object=ResultEntity)

In [300]:
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"country": {"title": "Country", "description": "\u8fd9\u4e2a\u56fd\u5bb6\u7684\u540d\u5b57", "anyOf": [{"type": "string"}, {"type": "array", "items": {}}]}, "population": {"title": "Population", "description": " \u8fd9\u4e2a\u56fd\u5bb6\u5bf9\u5e94\u7684\u4eba\u53e3", "anyOf": [{"type": "string"}, {"type": "array", "items": {}}]}}, "required": ["country", "population"]}
```


In [301]:
format_instructions = parser.get_format_instructions()

In [302]:
prompt = PromptTemplate.from_template(template="请列出世界上人口数量排名前5的国家及其人口。\n{format}",
                                     partial_variables={"format": format_instructions})

In [303]:
prompt

PromptTemplate(input_variables=[], partial_variables={'format': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"country": {"title": "Country", "description": "\\u8fd9\\u4e2a\\u56fd\\u5bb6\\u7684\\u540d\\u5b57", "anyOf": [{"type": "string"}, {"type": "array", "items": {}}]}, "population": {"title": "Population", "description": " \\u8fd9\\u4e2a\\u56fd\\u5bb6\\u5bf9\\u5e94\\u7684\\u4eba\\u53e3", "anyOf": [{"type": "string"}, {"type": "array", "items": {}}]}}, "required": ["country", "population"]}\n```'}, template='请列出世界上人口数量排名前5的国家及其人口。\n{format}')

In [304]:
chain = prompt | llm

In [305]:
result = chain.invoke(input={})

In [306]:
json.loads(s=result)

{'country': ['China', 'India', 'United States', 'Indonesia', 'Pakistan'],
 'population': ['1,444,216,107',
  '1,393,409,038',
  '332,556,431',
  '276,361,783',
  '225,199,937']}

In [307]:
result

'{\n  "country": ["China", "India", "United States", "Indonesia", "Pakistan"],\n  "population": ["1,444,216,107", "1,393,409,038", "332,556,431", "276,361,783", "225,199,937"]\n}'

In [308]:
parser.parse(result)

ResultEntity(country=['China', 'India', 'United States', 'Indonesia', 'Pakistan'], population=['1,444,216,107', '1,393,409,038', '332,556,431', '276,361,783', '225,199,937'])

In [311]:
final_result = parser.parse(result).dict()

In [312]:
final_result

{'country': ['China', 'India', 'United States', 'Indonesia', 'Pakistan'],
 'population': ['1,444,216,107',
  '1,393,409,038',
  '332,556,431',
  '276,361,783',
  '225,199,937']}