In [None]:
import openai
import pandas as pd 
import llama_index
from openai import AzureOpenAI
import qdrant_client
from typing import Any, List
from llama_index.llms.openai_like import OpenAILike
from llama_index.core.embeddings import BaseEmbedding
from llama_index.core.bridge.pydantic import PrivateAttr
from llama_index.core.prompts import PromptTemplate
from llama_index.vector_stores.qdrant import QdrantVectorStore

from llama_index.core.evaluation import (
    FaithfulnessEvaluator,
    RelevancyEvaluator,
    CorrectnessEvaluator,
    RetrieverEvaluator
)
from llama_index.core import Settings
from llama_index.core import Document, VectorStoreIndex

import os
from tqdm import tqdm
import nest_asyncio
nest_asyncio.apply()

In [None]:
import os
os.environ["no_proxy"] = "10.156.254.10"
import openai
openai.api_key = "dtnumds"
openai.api_base = "http://10.156.254.10:8000/v1"
os.environ["OPENAI_API_KEY"] = "dtnumds"
os.environ["OPENAI_API_BASE"] = "http://10.156.254.10:8000/v1"

In [None]:
client = AzureOpenAI( api_key="dtnumds",
                azure_endpoint="http://10.156.254.10:8000/v1",
                api_version = "2023-07-01-preview" )

In [None]:
for model in client.models.list().data :
    print(model.id)

In [None]:
prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Tu es un assistant intelligent <|eot_id|><|start_header_id|>user<|end_header_id|>

Donne moi des idées de voyages en europe<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 """

In [None]:
client.completions.create(prompt = prompt, model="llama3-70b", max_tokens=2048, temperature=0.0)

In [None]:
_.choices[0].text

In [None]:
len(client.embeddings.create(input="test", model="dgfip-e5-large").data[0].embedding)

In [None]:

class DGFIPEmbeddings(BaseEmbedding):
    _model_name: str = PrivateAttr()
    _openai_client = PrivateAttr()

    def __init__(
        self,
        openai_client,
        model_name: str = "dgfip-e5-large",
        **kwargs: Any,
    ) -> None:
        self._model_name = model_name
        self._openai_client = openai_client
        super().__init__(**kwargs)

    @classmethod
    def class_name(cls) -> str:
        return "DGFIPEmbedding"

    async def _aget_query_embedding(self, query: str) -> List[float]:
        return self._get_query_embedding(query)

    def _get_query_embedding(self, query: str) -> List[float]:
        embeddings = self._openai_client.embeddings.create(
            input = query,
            model= self._model_name # model = "deployment_name".
        )
        return embeddings.data[0].embedding

    def _get_text_embedding(self, text: str) -> List[float]:
        embeddings = self._openai_client.embeddings.create(
            input = text,
            model= self._model_name # model = "deployment_name".
        )
        return embeddings.data[0].embedding
    
    def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
        embeddings = self._openai_client.embeddings.create(
            input = texts,
            model= self._model_name # model = "deployment_name".
        )
        embs = [e.embedding for e in embeddings.data]
        return embs

    async def _aget_query_embedding(self, query: str) -> List[float]:
        return self._get_query_embedding(query)

    async def _aget_text_embedding(self, text: str) -> List[float]:
        return self._get_text_embedding(text)


In [None]:
Settings.embed_model = DGFIPEmbeddings(openai_client = client, model_name="dgfip-e5-large")
Settings.llm  = OpenAILike(model='mixtral-instruct', max_tokens=2048, timeout=600)


## Load data

In [None]:
df = pd.read_pickle('../../../MoDR/data/preprocess/baco_data.pkl')

In [None]:
def extract_folder(x):
    return x.split('/')[1]

In [None]:
df['folder'] = df['file_name'].apply(extract_folder)

In [None]:
df = df[df['folder']=="bacorh"]

### Mise en place d'un pipeline simple

In [None]:
baco_text = df.file_content.to_list()
baco_filename = df.file_name.to_list()
baco_section = df.section.to_list()
baco_question = df.question.to_list()
baco_title = df.title.to_list()
baco_n_words = df.n_words.to_list()

In [None]:
documents = [Document(text=baco_text[i], metadata={"filename": baco_filename[i],
                                                   "section" : baco_section[i],
                                                "question" : baco_question[i],
                                                "title": baco_title[i],
                                                "n_words": baco_n_words[i]
                                                    }) for i in range(len(baco_text))]

In [None]:


client_qdrant = qdrant_client.QdrantClient(
    "10.156.254.10:6335",
)

In [None]:
vector_store = QdrantVectorStore(client=client_qdrant, collection_name="demo")

In [None]:
index = VectorStoreIndex.from_vector_store(vector_store=vector_store)

In [None]:
from llama_index.core.vector_stores.types import ExactMatchFilter, MetadataFilters, MetadataFilter
from llama_index.core.vector_stores import FilterOperator, FilterCondition
filters = {
    "index":"6669965c92c1832f69f4e931"
}
llama_index_filters =  MetadataFilters(filters=[MetadataFilter(key=key, value=value) for key, value in filters.items()], condition=FilterCondition.AND )

In [None]:
retriever = index.as_retriever(similarity_top_k=5)

In [None]:
retriever.retrieve('Quel est le taux de paiement dématérialisé')

In [None]:
eval_df = pd.read_csv("../../../data/nausicaa_parsed_eval_dataset.csv")


In [None]:
eval_df

In [None]:
eval_df.value_counts(subset=['languages'])["['fra']"]

In [None]:
df = pd.DataFrame({"test": [0,0,0,0,1,1,1]})

In [None]:
df.value_counts()[0]

In [None]:
eval_queries = eval_df.eval_question.to_list()
eval_response_eid = eval_df.element_id.to_list()
eval_text = eval_df.text.to_list()

In [None]:
def evaluate_retriver(eval_queries, response_eid, retriever, precision=1):

    correct_matches = 0
    for i, query in tqdm(enumerate(eval_queries)):
        retriever_response_eid = [n.metadata['element_id'] for n in retriever.retrieve(query)]
        
        # Check if the correct answer ID is among the top-5 re-ranked answers
        if response_eid[i] in retriever_response_eid[:precision]:
            correct_matches += 1

    accuracy = correct_matches / len(eval_queries)
    return accuracy


In [None]:
score = evaluate_retriver(eval_queries = eval_queries, response_eid=eval_response_eid, retriever=retriever, precision=5)

In [None]:
from llama_index.core.schema import TextNode
train_nodes = [TextNode(id_=id_, text=text) for id_, text in zip(eval_response_eid, eval_text)]

In [None]:
#from llama_index.core import StorageContext
#storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [None]:
#index = VectorStoreIndex.from_documents(
#    documents,
#    storage_context=storage_context,
#)

In [None]:
query_engine = index.as_query_engine(filters=llama_index_filters)

In [None]:
# Customize prompt for LangChain
rag_prompt = """
[INST]
Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query in French.
Query: {query_str}
Answer: 
[/INST]
"""
qa_template = PromptTemplate(rag_prompt)

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_template}
)

In [None]:
query = ["Qu'est que le club des médiateurs internes ? ", "test", "test2"]
answer = []
for q in query :
    answer.append(query_engine.query(q))


In [None]:
[a.source_nodes for a in answer]

In [None]:
import numpy as np
L= np.array([9,2,2,3,4])

In [None]:

def node_parser(nodes: List[Any]) -> str:
    context = ""
    for node in nodes:
        context += node.text + "\n\n"
    return context

In [None]:
node_parser()

In [None]:
answer.source_nodes[0].metadata.keys()

In [None]:
"Nom du fichier :" + answer.source_nodes[0].metadata['filename'] + " / Texte :" + answer.source_nodes[0].text 

### Méthode d'évaluation  

In [None]:
eval_dataset = pd.read_csv('data/benchmark_bacorh_min.csv', sep='|')

In [None]:
eval_question = eval_dataset['question'].to_list()
eval_response = eval_dataset['ground_truths'].to_list()

In [None]:
faithfulness_eval = FaithfulnessEvaluator()
relevancy_eval = RelevancyEvaluator()
correctness_eval = CorrectnessEvaluator()


In [None]:
faithfulness_eval_prompt = """
[INST]
Please tell if a given piece of information is supported by the context.
You need to answer with either YES or NO.
Answer YES if any of the context supports the information, even if most of the context is unrelated. Some examples are provided below. 

Information: Apple pie is generally double-crusted.
Context: An apple pie is a fruit pie in which the principal filling ingredient is apples. 
Apple pie is often served with whipped cream, ice cream ('apple pie à la mode'), custard or cheddar cheese.
It is generally double-crusted, with pastry both above and below the filling; the upper crust may be solid or latticed (woven of crosswise strips).
Answer: YES
Information: Apple pies tastes bad.
Context: An apple pie is a fruit pie in which the principal filling ingredient is apples. 
Apple pie is often served with whipped cream, ice cream ('apple pie à la mode'), custard or cheddar cheese.
It is generally double-crusted, with pastry both above and below the filling; the upper crust may be solid or latticed (woven of crosswise strips).
Answer: NO
Information: {query_str}
Context: {context_str}
Answer: 
[/INST]
"""
faithfulness_refine_prompt = """
[INST]
We want to understand if the following information is present in the context information: {query_str}
We have provided an existing YES/NO answer: {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
If the existing answer was already YES, still answer YES. If the information is present in the new context, answer YES. Otherwise answer NO.
[/INST]

"""

faithfulness_eval_template = PromptTemplate(faithfulness_eval_prompt)
faithfulness_refine_template = PromptTemplate(faithfulness_refine_prompt)


faithfulness_eval.update_prompts(
    {"eval_template": faithfulness_eval_template,
     "refine_template": faithfulness_refine_template}
)


In [None]:
relevancy_eval_prompt = """
[INST]
Your task is to evaluate if the response for the query     is in line with the context information provided.
You have two options to answer. Either YES/ NO.
Answer - YES, if the response for the query     is in line with context information otherwise NO.
Query and Response: 
 {query_str}
Context: 
 {context_str}
Answer: 
[/INST]
"""
relevancy_refine_prompt = """
[INST]
We want to understand if the following query and response isin line with the context information: 
 {query_str}
We have provided an existing YES/NO answer: 
 {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
If the existing answer was already YES, still answer YES. If the information is present in the new context, answer YES. Otherwise answer NO.
[/INST]

"""

relevancy_eval_template = PromptTemplate(relevancy_eval_prompt)
relevancy_refine_template = PromptTemplate(relevancy_refine_prompt)


relevancy_eval.update_prompts(
    {"eval_template": relevancy_eval_template,
     "refine_template": relevancy_refine_template}
)


In [None]:
def get_faithfulness(eval_query, faithfulness_evaluator, query_engine):
    response = query_engine.query(
        eval_query
    )
    eval_res = faithfulness_evaluator.evaluate_response(response=response)
    return eval_res.passing

In [None]:
def get_relevancy(eval_query, relevancy_evaluator, query_engine):
    response = query_engine.query(
        eval_query
    )
    eval_res = relevancy_evaluator.evaluate_response(response=response, query=eval_query)
    return eval_res.passing

In [None]:
from tqdm import tqdm
fres = 0
for eq in tqdm(eval_question):
    passing = get_faithfulness(eval_query=eq, faithfulness_evaluator=faithfulness_eval, query_engine=query_engine)
    if passing:
        fres +=1
fres = fres/len(eval_question)

In [None]:
fres

In [None]:
from tqdm import tqdm
rres = 0
for eq in tqdm(eval_question):
    passing = get_relevancy(eval_query=eq, relevancy_evaluator=relevancy_eval, query_engine=query_engine)
    if passing:
        rres +=1
rres = rres/len(eval_question)

In [None]:
rres

In [None]:
eval_response = [[a] for a in eval_response]

In [None]:
def get_eval_results(key, eval_results):
    results = eval_results[key]
    correct = 0
    for result in results:
        if result.passing:
            correct += 1
    score = correct / len(results)
    print(f"{key} Score: {score}")
    return score

In [None]:
score = get_eval_results("relevancy", eval_results)

In [None]:
correctness_prompt = """
[INST]
system: 
You are an expert evaluation system for a question answering chatbot.

You are given the following information:
- a user query, and
- a generated answer

You may also be given a reference answer to use for reference in your evaluation.

Your job is to judge the relevance and correctness of the generated answer.
Output a single score that represents a holistic evaluation.
You must return your response in a line with only the score.
Do not return answers in any other format.
On a separate line provide your reasoning for the score as well.

Follow these guidelines for scoring:
- Your score has to be between 1 and 5, where 1 is the worst and 5 is the best.
- If the generated answer is not relevant to the user query, you should give a score of 1.
- If the generated answer is relevant but contains mistakes, you should give a score between 2 and 3.
- If the generated answer is relevant and fully correct, you should give a score between 4 and 5.

Example Response:
4.0
The generated answer has the exact same metrics as the reference answer,     but it is not as concise.


user: 
## User Query
{query}

## Reference Answer
{reference_answer}

## Generated Answer
{generated_answer}

assistant: 
[/INST]
"""
correctness_prompt_template = PromptTemplate(correctness_prompt)


correctness_eval.update_prompts(
    {"eval_template": correctness_prompt_template,
     }
)


In [None]:
def get_correctness(eval_query, reference, correctness_evaluator, query_engine):
    response = query_engine.query(
        eval_query
    )
    eval_res = correctness_evaluator.evaluate(response=str(response), query=eval_query, reference=str(reference))
    return eval_res

In [None]:
correctness = get_correctness(eval_question[1], eval_response[1], correctness_eval, query_engine)

In [None]:
cres = 0
for eq, er in tqdm(zip(eval_question, eval_response)):
    score = get_correctness(eq, er, correctness_eval, query_engine).score
    cres += score
cres = cres/len(eval_question)

In [None]:
cres

In [None]:
import yaml
import os

#This can be loaded from configs or hardcode the path or pass as a
# variable to read_template method it is upto you.

yaml_dir_path = "data.yaml"

def read_template(yaml_dir_path):
    '''
        This method will read the yaml file from your dir path
    '''
    directory_path = yaml_dir_path
    yaml_content = ''

    with open(directory_path, "r") as f:
        try:
            yaml_content = yaml.safe_load(f)
        except yaml.YAMLError as e:
            print(f"Error parsing {directory_path}: {e}")
    
    return yaml_content

def get_prompt(yaml_dir_path, task, subtask):
    """
    This method will return you the prompt for the given task

    input:
    task (str): name of the task like intent, summary, topic discovery etc
    version (int): version of the prompt
    return (str):
    prompt
    """
    yaml_content = read_template(yaml_dir_path)
    return yaml_content[task]["prompts"][subtask]["prompt"]

    return yaml_content[task]['prompts'][version]["prompt"]


In [None]:
content = read_template("data.yaml")

In [None]:
test = get_prompt("data.yaml","relevancy", "refine")

In [None]:
test

In [None]:
test_template = PromptTemplate(test)

In [None]:
test_template.format(context_str="test", query_str="this is a test")