In [None]:
# Installation des packages
import os
import sys
! pip install langchain 
! pip install pypdf
!pip install sentence-transformers
!pip install chromadb
!{sys.executable} -m pip install --upgrade pip setuptools wheel
!{sys.executable} -m pip install --disable-pip-version-check torch torchdata
!pip install -U transformers
!pip install -U datasets==2.14.6
!pip install fsspec==2023.9.2
!pip install lark
!pip install gpt4all
! pip install accelerate
# au terminal : huggingface-cli login puis token

In [None]:
# Importation
from langchain.document_loaders import TextLoader
from langchain.text_splitter import MarkdownHeaderTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
import torch
from datasets import load_dataset
import transformers
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
from transformers import GenerationConfig
from transformers import pipeline
from transformers import pipeline
from langchain import HuggingFacePipeline
import os
import s3fs
from langchain.globals import set_verbose
set_verbose(True)
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

In [None]:
# Partie split

In [None]:
loader = TextLoader("bdc.txt")
pages_txt=loader.load()

In [None]:
headers_to_split_on = [
    ("###", "Header 1"),
    ("##", "Header 2"),
    ("#", "Header 3"),
]

In [None]:
markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on
)

In [None]:
md_header_splits = markdown_splitter.split_text(pages_txt[0].page_content)

In [None]:
print(len(md_header_splits))

In [None]:
for i in range(len(md_header_splits)):
    print(f'Contenu n° {i} : {len(md_header_splits[i].page_content)}')

In [None]:
# Partie embedding

In [None]:
#embeddings_multilingual = HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
embeddings = HuggingFaceEmbeddings()

In [None]:
persist_directory = 'chroma/'

In [None]:
# Vider le dossier à la main ou utiliser cette ligne si besoin
!rm -rf ./chroma  # remove old database files if any

In [None]:
vectordb = Chroma.from_documents(
    documents=md_header_splits,
    embedding=embeddings,
    persist_directory=persist_directory
)

In [None]:
print(vectordb._collection.count())

In [None]:
question = "Je n'arrive pas à obtenir mon avis de situation"

In [None]:
docs = vectordb.similarity_search(question,k=5)

In [None]:
# Utilisation de MMR : équilibre entre pertinence et diversité des documents retrouvés
docs_mmr = vectordb.max_marginal_relevance_search(question,k=2, fetch_k=3)

In [None]:
# Utiliser automatiquement les infos présentes dans les metadata : 
# we can use `SelfQueryRetriever`, which uses an LLM to extract:
# 1. The `query` string to use for vector search
# 2. A metadata filter to pass in as well

In [None]:
# Important de bien remplir "description" vu que ce sera lu par le LLM
metadata_field_info = [
    AttributeInfo(
        name="Header 1",
        description="Le thème général auquel la question se rattache",
        type="string",
    ),
    AttributeInfo(
        name="Header 2",
        description="La catégorie au sein du thème général",
        type="string",
    ),
    AttributeInfo(
        name="Header 3",
        description="La sous-catégorie à laquelle la question est rattachée",
        type="string",
    ),
]

In [None]:
# Build prompt llama chat
template_chat = """<s>[INST] <<SYS>>
\n
Vous êtes un assistant conversationnel cordial et honnête, qui répond, uniquement en langue française, aux questions ou aux problèmes posés par un usager. Si vous ne connaissez pas la réponse, répondez simplement que vous ne savez pas, n'essayez pas d'inventer la réponse. 
\n<</SYS>>
\n
À l'aide du contexte ci-dessous, répondez, uniquement en langue française, au problème suivant posé par un usager : {question}
\n\n
Contexte : 
\n
{context}
[/INST]"""
QA_CHAIN_PROMPT_chat = PromptTemplate.from_template(template_chat)

In [None]:
# Build prompt
template = """Vous êtes un assistant conversationnel cordial et honnête, qui répond, uniquement en langue française, aux questions ou aux problèmes posés par un usager. Si vous ne connaissez pas la réponse, répondez simplement que vous ne savez pas, n'essayez pas d'inventer la réponse.

À l'aide du contexte ci-dessous, répondez au problème suivant posé par un usager : {question}

Contexte : 
\n
{context}
"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [None]:
# Si on utilise un modèle téléchargé, par exemple nous-hermes-llama2-13b

In [None]:
S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"]
fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})

In [None]:
fs.ls("vlapegue")

In [None]:
bucket = 'vlapegue/nous-hermes-llama2-13b'
files = fs.ls(bucket)[-3:]

In [None]:
print(files[1])

In [None]:
fs.download(files[1],'nous-hermes-llama2-13b.Q4_0.gguf')

In [None]:
from langchain.llms import GPT4All

llm = GPT4All(
    model='./nous-hermes-llama2-13b.Q4_0.gguf',temp=0.1
)

In [None]:
# Run chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True, verbose=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [None]:
result = qa_chain({"query": question})

In [None]:
result["result"]

In [None]:
# Grâce à la chaîne, on peut aussi voir quels documents ont été les plus pertinents pour répondre
for i in range(len(result["source_documents"])):
    print(result["source_documents"][i],'\n\n')

In [None]:
# Si on utilise un modèle via le pipeline de HuggingFace, par exemple bigscience/bloomz-560m

In [None]:
gen_bloom = pipeline('text-generation', model = 'bigscience/bloomz-560m', min_new_tokens=60)
llm = HuggingFacePipeline(pipeline=gen_bloom)

In [None]:
# Run chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True, verbose=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [None]:
result = qa_chain({"query": question})

In [None]:
result["result"]

In [None]:
# Grâce à la chaîne, on peut aussi voir quels documents ont été les plus pertinents pour répondre
for i in range(len(result["source_documents"])):
    print(result["source_documents"][i],'\n\n')

In [None]:
# Run chain avec un modèle orca mini
from gpt4all import GPT4All
model = GPT4All("orca-mini-3b-gguf2-q4_0.gguf")


In [None]:
from langchain.llms import GPT4All

llm = GPT4All(
    model='orca-mini-3b-gguf2-q4_0.gguf',temp=0.1
)

In [None]:
# Run chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True, verbose=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [None]:
result = qa_chain({"query": question})

In [None]:
result["result"]

In [None]:
# Grâce à la chaîne, on peut aussi voir quels documents ont été les plus pertinents pour répondre
for i in range(len(result["source_documents"])):
    print(result["source_documents"][i],'\n\n')

In [None]:
# On télécharge llama2

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")

In [None]:
gen_llama = pipeline('text-generation', model = model, tokenizer=tokenizer,min_new_tokens=60)
llm = HuggingFacePipeline(pipeline=gen_llama)

In [None]:
# Run chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True, verbose=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [None]:
result = qa_chain({"query": question})

In [None]:
result["result"]

In [None]:
# Avec llama2 chat

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf",do_sample=True, temperature=0.1)

In [None]:
gen_llama_chat = pipeline('text-generation', model = model, tokenizer=tokenizer)
llm = HuggingFacePipeline(pipeline=gen_llama_chat)

In [None]:
# Run chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True, verbose=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT_chat}
)

In [None]:
question = "Où puis-je obtenir mon avis de situation ?"
result = qa_chain({"query": question})

In [None]:
result["result"]

In [None]:
# Gestion de la mémoire

In [None]:
# On garde en mémoire l'historique des messages
# Return_messages = True signifie qu'on met les messages passés sous forme de liste, 
# et non de la forme d'un simple texte
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True

In [None]:
# Le module ConversationalRetrievalChain gère la mémoire
from langchain.chains import ConversationalRetrievalChain
retriever=vectordb.as_retriever()
qa_memory = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [None]:
question = "Où puis-je obtenir mon avis de situation ?"
result = qa_memory({"query": question})

In [None]:
result['answer']

In [None]:
question = "Je ne sais pas si mes données sont diffusées"
result = qa_memory({"query": question})

In [None]:
result['answer']

In [None]:
# Create a chatbot that works on your documents

In [None]:
# The chatbot code has been updated a bit since filming. 
# The GUI appearance also varies depending on the platform it is running on.

In [None]:
# On définit ici toute la chaîne qui part de la question, jusqu'à la réponse du chatbot
# Pour des raisons d'ergonomie du chatbot, 
# on ne met pas la memory dans ce ConversationalRetrievalChain
def load_db(chain_type, k):
    # load documents
    loader = TextLoader("bdc.txt")
    documents = loader.load()
    # split documents - ajouter éventuellement dans les options separator="(?<=\. )"
    headers_to_split_on = [
    ("###", "Header 1"),
    ("##", "Header 2"),
    ("#", "Header 3")]
    markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on)
    docs = markdown_splitter.split_text(documents[0].page_content)
    # define embedding
    embeddings = HuggingFaceEmbeddings()
    # create vector database from data
    vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embeddings)
    # define retriever
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
    # create a chatbot chain. Memory is managed externally.
    # définir au préalable le modèle qu'on veut utiliser
    qa = ConversationalRetrievalChain.from_llm(
        llm=llm, 
        chain_type=chain_type, 
        retriever=retriever, 
        return_source_documents=True,
        return_generated_question=True,
    )
    return qa 


In [None]:
import panel as pn
import param

class cbfs(param.Parameterized):
    chat_history = param.List([])
    answer = param.String("")
    db_query  = param.String("")
    db_response = param.List([])
    
    def __init__(self,  **params):
        super(cbfs, self).__init__( **params)
        self.panels = []
        self.qa = load_db("stuff", 4)
    
    def call_load_db(self, count):
        if count == 0 or file_input.value is None:  # init or no file specified :
            return pn.pane.Markdown(f"Loaded File")
        else:
            button_load.button_style="outline"
            self.qa = load_db("stuff", 4)
            button_load.button_style="solid"
        self.clr_history()
        return pn.pane.Markdown(f"Loaded File")

# C'est ci-dessous qu'on ajoute la memory
    def convchain(self, query):
        if not query:
            return pn.WidgetBox(pn.Row('User:', pn.pane.Markdown("", width=600)), scroll=True)
        result = self.qa({"question": query, "chat_history": self.chat_history})
        self.chat_history.extend([(query, result["answer"])])
        self.db_query = result["generated_question"]
        self.db_response = result["source_documents"]
        self.answer = result['answer'] 
        self.panels.extend([
            pn.Row('User:', pn.pane.Markdown(query, width=600)),
            pn.Row('ChatBot:', pn.pane.Markdown(self.answer, width=600, style={'background-color': '#F6F6F6'}))
        ])
        inp.value = ''  #clears loading indicator when cleared
        return pn.WidgetBox(*self.panels,scroll=True)

    @param.depends('db_query ', )
    def get_lquest(self):
        if not self.db_query :
            return pn.Column(
                pn.Row(pn.pane.Markdown(f"Last question to DB:", styles={'background-color': '#F6F6F6'})),
                pn.Row(pn.pane.Str("no DB accesses so far"))
            )
        return pn.Column(
            pn.Row(pn.pane.Markdown(f"DB query:", styles={'background-color': '#F6F6F6'})),
            pn.pane.Str(self.db_query )
        )

    @param.depends('db_response', )
    def get_sources(self):
        if not self.db_response:
            return 
        rlist=[pn.Row(pn.pane.Markdown(f"Result of DB lookup:", styles={'background-color': '#F6F6F6'}))]
        for doc in self.db_response:
            rlist.append(pn.Row(pn.pane.Str(doc)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)

    @param.depends('convchain', 'clr_history') 
    def get_chats(self):
        if not self.chat_history:
            return pn.WidgetBox(pn.Row(pn.pane.Str("No History Yet")), width=600, scroll=True)
        rlist=[pn.Row(pn.pane.Markdown(f"Current Chat History variable", styles={'background-color': '#F6F6F6'}))]
        for exchange in self.chat_history:
            rlist.append(pn.Row(pn.pane.Str(exchange)))
        return pn.WidgetBox(*rlist, width=600, scroll=True)

    def clr_history(self,count=0):
        self.chat_history = []
        return 


In [None]:
# Create a chatbot

In [None]:
cb = cbfs()

file_input = pn.widgets.FileInput(accept='.txt')
button_load = pn.widgets.Button(name="Load DB", button_type='primary')
button_clearhistory = pn.widgets.Button(name="Clear History", button_type='warning')
button_clearhistory.on_click(cb.clr_history)
inp = pn.widgets.TextInput( placeholder='Posez votre question ici…')

bound_button_load = pn.bind(cb.call_load_db, button_load.param.clicks)
conversation = pn.bind(cb.convchain, inp) 

jpg_pane = pn.pane.Image( './img/LogoInsee.jpg')

tab1 = pn.Column(
    pn.Row(inp),
    pn.layout.Divider(),
    pn.panel(conversation,  loading_indicator=True, height=300),
    pn.layout.Divider(),
)
tab2= pn.Column(
    pn.panel(cb.get_lquest),
    pn.layout.Divider(),
    pn.panel(cb.get_sources ),
)
tab3= pn.Column(
    pn.panel(cb.get_chats),
    pn.layout.Divider(),
)
tab4=pn.Column(
    pn.Row( file_input, button_load, bound_button_load),
    pn.Row( button_clearhistory, pn.pane.Markdown("Clears chat history. Can use to start a new topic" )),
    pn.layout.Divider(),
    pn.Row(jpg_pane.clone(width=400))
)
dashboard = pn.Column(
    pn.Row(pn.pane.Markdown('# ChatWithYourData_Bot')),
    pn.Tabs(('Conversation', tab1), ('Database', tab2), ('Chat History', tab3),('Configure', tab4))
)
dashboard