# 通过 pydantic 构造预期返回的类型 classJoke(BaseModel): """Joke to tell user.""" setup: str = Field(description="The setup of the joke") punchline: str = Field(description="The punchline of the joke") rating: Optional[int] = Field( default=None, description="How funny the joke is, from 1 to 10" ) # 这里使用 QianfanChatEndpoint,也可以替换成其他llm llm = QianfanChatEndpoint(model="ERNIE-3.5-8K") # 使用 with_structured_output 方法调用llm返回结构化数据 structured_llm = llm.with_structured_output(Joke) structured_llm.invoke("Tell me a joke about cats")
返回结果
1
Joke(setup='A cat is sitting in front of a mirror and sees another cat. What does the cat think?', punchline='The cat thinks it is time for lunch!', rating=5)
from langchain_core.output_parsers import PydanticOutputParser from langchain_core.prompts import ChatPromptTemplate from pydantic import BaseModel, Field
# 还是使用Joke来描述结构 classJoke(BaseModel): """Joke to tell user.""" setup: str = Field(description="The setup of the joke") punchline: str = Field(description="The punchline of the joke") rating: Optional[int] = Field( default=None, description="How funny the joke is, from 1 to 10" )
Answer the user query. The output should be formatted as a JSON instance that conforms to the JSON schema below.
As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]} the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.
Here is the output schema: ``` {"description": "Joke to tell user.", "properties": {"setup": {"description": "The setup of the joke", "title": "Setup", "type": "string"}, "punchline": {"description": "The punchline of the joke", "title": "Punchline", "type": "string"}, "rating": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "description": "How funny the joke is, from 1 to 10", "title": "Rating"}}, "required": ["setup", "punchline"]} ``` Tell me a joke about cats
最终结果:
1
Joke(setup='Why did the cat sit on the computer?', punchline='It wanted to keep an eye on the mouse!', rating=7)
from langchain_core.output_parsers import JsonOutputParser from pydantic import BaseModel, Field from typing importOptional
classJoke(BaseModel): """Joke to tell user.""" setup: str = Field(description="The setup of the joke") punchline: str = Field(description="The punchline of the joke") rating: Optional[int] = Field( default=None, description="How funny the joke is, from 1 to 10" )
# 使用提示此模版 prompt = PromptTemplate( template="Answer the user query.\n{format_instructions}\n{query}\n", input_variables=["query"], partial_variables={"format_instructions": parser.get_format_instructions()}, ) llm = QianfanChatEndpoint(model="ERNIE-3.5-8K")
chain = prompt | llm | parser chain.invoke({"query": "Tell me a joke about cats"})
结果:
1 2 3
{'setup': 'Why did the cat sit on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse!', 'rating':7}
自定义解析
当然,我们也可以仿照之前的例子,自己来编写提示词和配套的解析器
提示词中结构描述,还可以继续借助 pydantic 来生成对应的schema
1 2 3 4 5 6 7 8 9
classJoke(BaseModel): """Joke to tell user.""" setup: str = Field(description="The setup of the joke") punchline: str = Field(description="The punchline of the joke") rating: Optional[int] = Field( default=None, description="How funny the joke is, from 1 to 10" ) Joke.model_json_schema()
结果:
1 2 3 4 5 6 7 8 9 10 11 12 13 14
{'description': 'Joke to tell user.', 'properties':{'setup':{'description': 'The setup of the joke', 'title': 'Setup', 'type': 'string'}, 'punchline':{'description': 'The punchline of the joke', 'title': 'Punchline', 'type': 'string'}, 'rating':{'anyOf':[{'type': 'integer'},{'type': 'null'}], 'default': None, 'description': 'How funny the joke is, from 1 to 10', 'title': 'Rating'}}, 'required':['setup', 'punchline'], 'title': 'Joke', 'type': 'object'}
from langchain_core.exceptions import OutputParserException from langchain.output_parsers import OutputFixingParser
classJoke(BaseModel): """Joke to tell user.""" setup: str = Field(description="The setup of the joke") punchline: str = Field(description="The punchline of the joke") rating: Optional[int] = Field( default=None, description="How funny the joke is, from 1 to 10" )
# 这个是错误的格式 misformatted = "{'setup': 'Why did the cat sit on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse!', 'rating': 7}"
# 这里是开始的错误信息输出 Invalid json output: {'setup': 'Why did the cat sit on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse!', 'rating': 7} For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE
# 这里是使用 OutputFixingParser 处理后,解析成功的结果 Joke(setup='Why did the cat sit on the computer?', punchline='Because it wanted to keep an eye on the mouse!', rating=7)
from langchain_core.output_parsers import PydanticOutputParser from langchain.output_parsers import RetryOutputParser from pydantic import BaseModel, Field from typing importOptional
classJoke(BaseModel): """Joke to tell user.""" setup: str = Field(description="The setup of the joke") punchline: str = Field(description="The punchline of the joke") rating: Optional[int] = Field( default=None, description="How funny the joke is, from 1 to 10" ) # 构造解析器 parser = PydanticOutputParser(pydantic_object=Joke) # 使用提示此模版 prompt = PromptTemplate( template="Answer the user query.\n{format_instructions}\n{query}\n", input_variables=["query"], partial_variables={"format_instructions": parser.get_format_instructions()}, )
# 这里我们模拟一个有问题的响应,它缺失 punchline、rating属性 bad_response = "{'setup': 'Why did the cat sit on the computer?'}"
# 这里设置使用的大模型,temperature可以设置的尽量低一点,这样可以最大程度的让它不改变内容,只是补全 llm = QianfanChatEndpoint(model="ERNIE-3.5-8K", temperature=0.01) # 使用解析器、llm构造 RetryOutputParser retry_parser = RetryOutputParser.from_llm(parser=parser, llm=llm) # 将之前的提示词模版与当时的问题重新生成一个完成的提示词 prompt_value = prompt.format_prompt(query="Tell me a joke about cats") # 使用当时的提示词与错误的相应,传递给retry_parser进行重新生成 retry_parser.parse_with_prompt(bad_response, prompt_value)
响应如下,可以看到大模型帮我们补全了剩余的内容,并且没有改变原始已有的值
1
Joke(setup='Why did the cat sit on the computer?', punchline='It wanted to keep an eye on the mouse!', rating=7)
from langchain_core.prompts import ChatPromptTemplate from langchain_community.chat_models import QianfanChatEndpoint from pydantic import BaseModel, Field
# 通过编写提示词,让大模型提取输出中的特定信息 tagging_prompt = ChatPromptTemplate.from_template( """ Extract the desired information from the following passage. Only extract the properties mentioned in the 'Classification' function. Passage: {input} """ )
# 定义需要提取的字段和对应的枚举值 classClassification(BaseModel): """Classification""" sentiment: str = Field(..., enum=["happy", "neutral", "sad"]) aggressiveness: int = Field( ..., description="describes how aggressive the statement is, the higher the number the more aggressive", enum=[1,2,3,4,5] ) language: str = Field(..., enum=["Spanish", "english", "french", "german", "italian"])
inp = "Estoy increiblemente contento de haberte conocido! Creo que seremos muy buenos amigos!" prompt = tagging_prompt.invoke({"input": inp}) llm.invoke(prompt)