Config environment

In [8]:
import os
import openai
import sys
sys.path.append('../..')

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.environ['OPENAI_API_KEY']

Select LLM

In [9]:
import datetime
current_date = datetime.datetime.now().date()
if current_date < datetime.date(2023, 9, 2):
    llm_name = "gpt-3.5-turbo-0301"
else:
    llm_name = "gpt-3.5-turbo"
print(llm_name)

gpt-3.5-turbo


In [10]:
from langchain.document_loaders import PyPDFLoader
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA,  ConversationalRetrievalChain, LLMChain
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.chains.question_answering import load_qa_chain

In [11]:
def build_and_persist_db(path,chunk_size, chunk_overlap,persist_directory):
    loader = PyPDFLoader(path)
    pages = loader.load()
    r_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        separators=["\n\n", "\n", " ", ""]
    )
    docs=r_splitter.split_documents(pages)
    embedding = OpenAIEmbeddings()
    vectordb = Chroma.from_documents(documents=docs, persist_directory=persist_directory, embedding=embedding)
    # databese persisting 
    vectordb.persist()
    
    return vectordb


In [29]:
def load_chroma_create_chain( persist_directory, k, chain_type):
    question_gen_prompt=PromptTemplate.from_template("""
        #     Given the chat history and a follow-up question, rephrase the question to be standalone.
        #     Chat History:
        #     {chat_history}

        #     Follow-up question:
        #     {question}
            
        #     Standalone question:
        # """)
    question_generator= LLMChain(llm=ChatOpenAI(model_name=llm_name, temperature=0), prompt= question_gen_prompt)
        # stand_alone_question= question_generator.run({"question": query, "chat_history": self.chat_history})
    custom_prompt= PromptTemplate.from_template("""
        Use the following pieces of context to answer the question at the end. If you don't know the answer, \
        just say that you don't know, don't try to make up an answer. Use 6 sentences maximum. 
        
        Context:
        {context}

        Question:
        {question}

        Answer:
    """)
    qa_chain = load_qa_chain(
    llm=ChatOpenAI(model_name=llm_name),
    chain_type=chain_type,
    prompt= custom_prompt
    )
    embedding=OpenAIEmbeddings()
    vectordb=Chroma(persist_directory=persist_directory, embedding_function=embedding)
    # define retriever
    retriever= vectordb.as_retriever(search_type="mmr", search_kwargs={"k":k})
     # create a chatbot chain. Memory is managed externally.
    qa = ConversationalRetrievalChain(combine_docs_chain=qa_chain,
                                      question_generator=question_generator,
                                               retriever=retriever,
                                               return_source_documents=True,
                                            # return_generated_questions=True, 参数不再使用
        )
    return qa
    

In [38]:
import panel as pn
import param
# 如果在jupyter notbook运行需要加这个脚本，如果在panel serve运行可以不加
pn.extension()
class cbfs(param.Parameterized):
    chat_history = param.List([])
    # clr_trigger = param.Action(lambda self: self._clear_chat(), label="Clear")
    answer = param.String("")
    db_query  = param.String("")
    db_response = param.List([])
    db_files=param.List([])
    file_updated = param.Integer(0)  # 新增参数，用于跟踪文件更新
    
    def __init__(self, file_input, button_load, button_clearhistory,inp, **params):
        super (cbfs, self).__init__(**params)
        self.file_input=file_input
        self.button_load=button_load
        self.button_clearhistory=button_clearhistory
        self.inp=inp
        self.persist_directory="docs/chroma/"
        self.record_file = self.persist_directory + "loaded_files.txt"
        self.panels=[]
        self.k=3
        self.chain_type="stuff"
    def file_load_history(self, record_file):
         # 读取历史加载文件记录
            if os.path.exists(record_file):
                with open(record_file, "r", encoding="utf-8") as f:
                    file_list = f.read().strip().splitlines()
            else:
                file_list = []
            if file_list:
                html_display = "<ul>" + "".join(f"<li>{f}</li>" for f in file_list) + "</ul>"
            else:
                html_display = "<p>No recorded files found.</p>"
            # file_display="\n".join(f"-{f}" for f in file_list) if file_list else "No recorded files found."
            # return file_display
            return html_display
    def new_file_record(self, record_file,loaded_file):
        # 将加载的文件名保存在loaded_files.txt文件中，便于展示
            with open(record_file, "a", encoding="utf-8") as f:
                f.write(loaded_file + "\n")
            return
    def call_load_create_chain(self, count):
        if count==0 or self.file_input.value is None:
            #no file load->load persist chromadb
            # 因为在后续需要重复调用该方法进行多轮对话，所以需要把load_chroma_create_chain变成类属性self.qa
            self.qa= load_chroma_create_chain(persist_directory=self.persist_directory, 
                                            k=self.k,
                                            chain_type=self.chain_type )
            # 加载历史
            # file_display=self.file_load_history(record_file)
            #以上是后台逻辑，而返回的是panel的前台的显示，提醒用户逻辑完成后就会显示加载完成的字样
            return pn.pane.Markdown(f"Loaded persisted DB from: `{self.persist_directory}` \n\n")
        else:
            self.file_input.save("temp.pdf")
            self.loaded_file = self.file_input.filename
            self.button_load.button_style="outline"
            # bulid a new chromadb
            build_and_persist_db(path="temp.pdf",
                                 chunk_size=1000, 
                                 chunk_overlap=100,
                                 persist_directory=self.persist_directory)
            # record new loaded file in .txt
            self.new_file_record(self.record_file,self.loaded_file)
            self.file_updated += 1  # 增加这个值以触发视图更新
            self.qa= load_chroma_create_chain(persist_directory=self.persist_directory, 
                                            k=self.k,
                                            chain_type=self.chain_type )
            # 加载历史
            # file_display=self.file_load_history(self.record_file)
            self.button_load.button_style = "solid"
            self.clr_history()
            return pn.pane.Markdown(f"Uploaded and built DB for: `{self.loaded_file}`\n\n")
    def convchain(self, query):
        if not query:
            return pn.WidgetBox(pn.Row('User:', pn.pane.Markdown("", width=600)), scroll=True)
        # 创建standalone prompt 用于在database中展示
        stand_alone_question=self.qa.question_generator.run({"question": query, "chat_history": self.chat_history})
        result = self.qa({"question": query, "chat_history": self.chat_history})
        self.chat_history.extend([(query, result["answer"])])
        
        self.db_query = stand_alone_question
        
        self.db_response = result["source_documents"]
        self.answer = result['answer']
        self.panels.extend([
            pn.Row('User:', pn.pane.Markdown(query, width=600)),
            pn.Row('ChatBot:', pn.pane.Markdown(self.answer, width=600, styles={'background-color': '#F6F6F6'}))
        ])
        self.inp.value = ''  #clears loading indicator when cleared
        return pn.WidgetBox(*self.panels,scroll=True)
    
    @param.depends('file_updated')  # 每次加载数据库后自动更新显示
    def get_file_history(self):
        # 加载历史
        file_display=self.file_load_history(self.record_file)
        return pn.Column(
        pn.pane.Markdown("### Previously Loaded Files:", styles={"color": "#444"}),
        pn.pane.HTML(cb.file_load_history(cb.record_file), width=500, styles={"background-color": "#f9f9f9", "padding": "10px"})
        # pn.pane.Markdown(file_display, width=500, styles={"background-color": "#f9f9f9", "padding": "10px"})
    )


    # 对上一次的问询进行显示，如果没有问询历史，就显示没有访问数据库
    @param.depends('db_query', )
    def get_lquest(self):
        if not self.db_query :
            return pn.Column(
                pn.Row(pn.pane.Markdown(f"Last question to DB:", styles={'background-color': '#F6F6F6'})),
                pn.Row(pn.pane.Str("no DB accesses so far"))
            )
        return pn.Column(
            pn.Row(pn.pane.Markdown(f"DB query:", styles={'background-color': '#F6F6F6'})),
            pn.pane.Str(self.db_query )
        )
    # 监听self.db_response，如果self.db_response = result["source_documents"]有反应则调用 get_sources函数
    @param.depends('db_response', )
    def get_sources(self):
        if not self.db_response:
            return 
        # 创建一个rlist列表第一行显示DB的回答的字条，然后在下面列出数据库文档的检索结果
        rlist=[pn.Row(pn.pane.Markdown(f"Result of DB lookup:", styles={'background-color': '#F6F6F6'}))]
        for doc in self.db_response:
            rlist.append(pn.Row(pn.pane.Str(doc)))
        # 返回一个可以竖向滚动的组件，把rlist的每行都包装在组件的一行里面
        return pn.WidgetBox(*rlist, width=600, scroll=True)
    # 监听convchain和clr history方法被调用时，触发get chats方法，根据chat history实际内容来重新加载UI
    @param.depends('convchain', 'clr_history') 
    def get_chats(self):
        if not self.chat_history:
            return pn.WidgetBox(pn.Row(pn.pane.Str("No History Yet")), width=600, scroll=True)
        rlist=[pn.Row(pn.pane.Markdown(f"Current Chat History variable", styles={'background-color': '#F6F6F6'}))]
        for exchange in self.chat_history:
            rlist.append(pn.Row(pn.pane.Str(exchange)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)
    # 调用clr history时清空对话历史
    def clr_history(self,count=0):
        self.chat_history=[]
        # self.panels=[]
        return
    
        


ChatBot

In [40]:


file_input = pn.widgets.FileInput(accept='.pdf')
button_load = pn.widgets.Button(name="Load DB", button_type='primary')
button_clearhistory = pn.widgets.Button(name="Clear History", button_type='warning')
inp = pn.widgets.TextInput( placeholder='Enter text here…')
cb= cbfs(file_input, button_load, button_clearhistory,inp)
button_clearhistory.on_click(cb.clr_history)

# pn.bind(binded function, parameter goes to function)
bound_button_load = pn.bind(cb.call_load_create_chain, button_load.param.clicks)
conversation = pn.bind(cb.convchain, inp) 


jpg_pane = pn.pane.Image( './pic/chatbot/tmp.jpg')

tab1 = pn.Column(
    pn.Row(inp),
    pn.layout.Divider(),
    pn.panel(conversation,  loading_indicator=True, height=300),
    pn.layout.Divider(),
)
tab2= pn.Column(
    pn.panel(cb.get_lquest),
    pn.layout.Divider(),
    pn.panel(cb.get_sources ),
)
tab3= pn.Column(
    pn.panel(cb.get_chats),
    pn.layout.Divider(),
)
tab4=pn.Column(
    pn.Row( file_input, button_load, bound_button_load),
    pn.Row( button_clearhistory, pn.pane.Markdown("Clears chat history. Can use to start a new topic" )),
    pn.layout.Divider(),
    pn.panel(cb.get_file_history),
    # print(repr(cb.file_load_history(cb.record_file))),
    pn.Row(jpg_pane.clone(width=400)) #clone显示流程图的组件，这样可以随意改变图纸的大小但不影响其他调用该组件的结构
)
dashboard = pn.Column(
    pn.Row(pn.pane.Markdown('# My_Chat_Bot v1.1')),
    pn.Tabs(('Conversation', tab1), ('Database', tab2), ('Chat History', tab3),('Configure', tab4))
)
dashboard

