In [1]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0.1,
)

In [2]:
memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=80,
    # memory_key="chat_history", # default : history
    return_messages=True,
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful AI talking to a Human"),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}"),
    ]
)

In [3]:
from langchain.schema.runnable import RunnablePassthrough


# memory variable 획득 함수
# chain : input/output이 존재 -> load_memory도 input을 받음
def load_memory(_):
    return memory.load_memory_variables({})["history"]


# prompt 가 format 되기 전 함수 실행
# load_memory의 output은 history로 들어감 -> 그게 사용자의 input(question)과 셜합돼서 prompt에 전달됨
chain = RunnablePassthrough.assign(history=load_memory) | prompt | llm


def invoke_chain(question):
    result = chain.invoke({"question": question})  # result : AI message
    memory.save_context(
        {"input": question},
        {"output": result.content},
    )
    print(result)

In [4]:
invoke_chain("My name is Jimin")

content='Hello Jimin! How can I assist you today?'


In [5]:
invoke_chain("What is my name?")

content='Your name is Jimin.'
