In [11]:
from dotenv import load_dotenv
load_dotenv()

from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.messages import HumanMessage,AIMessage
from langchain_core.prompts import MessagesPlaceholder

def get_documents_from_web(url):
    loader = WebBaseLoader(url)
    docs = loader.load()

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=200,
        chunk_overlap=20
    )
    splitDocs = splitter.split_documents(docs)
    return splitDocs

# 初始化 OllamaLLM
model = OllamaLLM(model='phi3') 

# 定義 Prompt 模板
prompt = ChatPromptTemplate.from_messages([
    ("system","Answer the user's question based on the context: {context}"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human","input")
])

#chain = prompt | model
chain = create_stuff_documents_chain(
    llm=model,
    prompt=prompt
)

def process_chat(chain,question,chat_history):
    response = chain.invoke({
        "input" : question,
        "context" : docs,
        "chat_history" : chat_history
    })
    return response

if __name__ == "__main__":
    docs =get_documents_from_web("https://python.langchain.com/v0.1/docs/expression_language/")

    chat_history = []

    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            break
        
        response = process_chat(chain,user_input,chat_history)
        chat_history.append(HumanMessage(content=user_input))
        chat_history.append(AIMessage(content=response))
        print("Bot:",response)
        print("\n")

Bot: As the provided context does not include a specific user question about LangChain Expression Language (LCEL), I'll create an example question that might be asked by someone interested in using LCEL for their project and provide an answer based on what we know from the documentation. Here's how it could go:

**User Question Example:**  
"How does LangChain Expression Language (LCEL) handle first-class streaming support, particularly when dealing with language models?"

**Answer Based on Documentation Context:**  
The LangChain Expression Language provides robust streamed output directly from an LLM using a 'streaming' interface. This means that as the large model processes tokens at its own pace and outputs raw token chunks to your chain, these are then immediately forwarded to any streaming parser or application you integrate with it seamlessly without significant delay. The aim of this approach is not only for speed but also reliability in real-time scenarios where immediate feed