In [1]:
from haystack import Document
import json
import pandas as pd
import os

In [2]:
from os import environ
import google.auth
from google.oauth2 import service_account
from google.auth.transport.requests import AuthorizedSession


In [3]:
SERVICE_ACCOUNT_FILE = "./creds.json"


In [None]:
SEARCH = os.environ["SEARCH"]

In [4]:
def get_token():
    credentials = service_account.IDTokenCredentials.from_service_account_file(
        SERVICE_ACCOUNT_FILE, target_audience=SEARCH)
    request = google.auth.transport.requests.Request()
    credentials.refresh(request)
    token = credentials.token

    return token

In [5]:
a = get_token()

In [6]:
SEARCH = os.environ["SEARCH"]

In [7]:
hed = get_token()

In [62]:
a = hed["Authorization"]

In [18]:
with open("docs_with_q_4o-mini.json", "rt") as f_in:
    ds_gpt = json.load(f_in)

In [8]:
def prepare_docs(prepared_docs, docs):
    for ds in docs:
        prepared_docs.append(
            Document(content=ds["content"], meta={"source": ds["source"], "headline": ds["headline"], "ids": ds["id"], "length":ds["length"], "question": ds["question"]})
        )
    return prepared_docs

In [9]:
from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
from haystack.components.embedders import SentenceTransformersTextEmbedder, SentenceTransformersDocumentEmbedder
from haystack.components.writers import DocumentWriter
from haystack.document_stores.types import DuplicatePolicy
from haystack import Pipeline
from haystack_integrations.components.retrievers.elasticsearch import ElasticsearchEmbeddingRetriever

In [10]:
def prepare_document_store(model, docs, index_name):
    document_store = ElasticsearchDocumentStore(hosts = SEARCH, embedding_similarity_function="cosine", index=index_name,request_timeout=3000,    bearer_auth=a)
    document_embedder = SentenceTransformersDocumentEmbedder(model=model,
                                                         meta_fields_to_embed=["headline", "question"])
    document_writer = DocumentWriter(document_store=document_store, policy=DuplicatePolicy.OVERWRITE)
    indexing = Pipeline()
    indexing.add_component(instance=document_embedder, name="document_embedder")
    indexing.add_component(instance=document_writer, name="document_writer")
    indexing.connect("document_embedder.documents", "document_writer.documents")
    indexing.run({"document_embedder": {"documents": docs}}),
    return document_store

In [11]:
def prepare_retrieval_pipeline(model, document_store):
    retrieval_pipeline = Pipeline()
    retrieval_pipeline.add_component("text_embedder", SentenceTransformersTextEmbedder(model=model, progress_bar=False))
    retrieval_pipeline.add_component("retriever", ElasticsearchEmbeddingRetriever(document_store=document_store, top_k=5))
    retrieval_pipeline.connect("text_embedder", "retriever")
    return retrieval_pipeline

In [12]:
prepared_docs = []

In [13]:
docs = prepare_docs(prepared_docs, ds_gpt)

In [14]:
import pandas as pd

In [15]:

document_store = prepare_document_store(model="./transformers", docs=docs, index_name="germany")

Batches:   0%|          | 0/21 [00:00<?, ?it/s]

In [32]:
from elasticsearch import Elasticsearch, helpers


In [None]:
test = Elasticsearch()

In [101]:
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator

In [15]:
import os

In [None]:
from getpass import getpass

os.environ["OPENAI_API_KEY"]

In [189]:
template = """
You're a FAQ database assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

Context:
{% for document in documents %}
    {{ document.content }}
{% endfor %}

Question: {{question}}
Answer:
"""

prompt_builder = PromptBuilder(template=template)

generator = OpenAIGenerator(model="gpt-4o-mini")

In [177]:
from haystack.components.builders.answer_builder import AnswerBuilder

In [91]:
prepared_docs = []

In [15]:
docs = prepare_docs(prepared_docs, ds_gpt)

In [175]:
rag_pipeline = None

In [190]:

rag_pipeline = Pipeline()
# Add components to your pipeline



rag_pipeline.add_component("text_embedder", SentenceTransformersTextEmbedder(model="sentence-transformers/multi-qa-distilbert-cos-v1", progress_bar=False))
rag_pipeline.add_component("retriever", ElasticsearchEmbeddingRetriever(document_store=document_store, top_k=5))
rag_pipeline.add_component("prompt_builder", prompt_builder)
rag_pipeline.add_component("llm", generator)
rag_pipeline.add_component(instance=AnswerBuilder(), name="answer_builder")





rag_pipeline.connect("text_embedder", "retriever")
rag_pipeline.connect("retriever", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "llm")
rag_pipeline.connect("llm.replies", "answer_builder.replies")
rag_pipeline.connect("llm.meta", "answer_builder.meta")
rag_pipeline.connect("retriever", "answer_builder.documents")

<haystack.core.pipeline.pipeline.Pipeline object at 0x7f7aa4628090>
🚅 Components
  - text_embedder: SentenceTransformersTextEmbedder
  - retriever: ElasticsearchEmbeddingRetriever
  - prompt_builder: PromptBuilder
  - llm: OpenAIGenerator
  - answer_builder: AnswerBuilder
🛤️ Connections
  - text_embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> prompt_builder.documents (List[Document])
  - retriever.documents -> answer_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.prompt (str)
  - llm.replies -> answer_builder.replies (List[str])
  - llm.meta -> answer_builder.meta (List[Dict[str, Any]])

In [6]:
import pandas as pd

In [7]:
df_ground_truth = pd.read_csv("gp4o-mini-questions.csv")
ground_truth = df_ground_truth.to_dict(orient="records")


In [12]:
ground_truth[0]

{'question': 'What is the total length of the Autobahn network in Germany?',
 'headline': 'How safe is the Autobahn?',
 'content': '9d8370cf-a2c8-4c54-9f9c-476b9c09a933'}

In [19]:
ds_gpt[0]

{'source': 'https://www.reddit.com/r/germany/wiki/autobahn_safety',
 'content': 'The Autobahn is a [network of interstate highways in Germany](https://en.m.wikipedia.org/wiki/Autobahn#/media/File%3AAutobahnen_in_Deutschland.svg) with a total length of more than 8,000 miles. [65%](https://en.wikipedia.org/wiki/Autobahn#Speed_limits) of the Autobahn has no speed limit. How safe can that be?\nVehicles traveled 147 billion miles on the Autobahn in 2015. 322 people died = 2.19 deaths per billion miles.\nIn the US, vehicles travelled 757 billion miles on interstate highways. 3,837 people died = 5.07 deaths per billion miles.\nThat means: If you drive on the interstate, your likelihood to die is 131% higher than for the same distance on the Autobahn.\n*sources:*\nStatistisches Bundesamt: [Unfallentwicklung auf deutschen Straßen 2015](https://www.destatis.de/DE/PresseService/Presse/Pressekonferenzen/2016/Unfallentwicklung_2015/Pressebroschuere_unfallentwicklung.pdf?__blob=publicationFile)\nNat

In [110]:
aydi =ds_gpt[0]["id"]

In [23]:
combined_list = list(map(lambda gt: {**gt, **next((ds for ds in ds_gpt if ds['id'] == gt['content']), {

    "test" : gt["content"]
})}, ground_truth))

In [24]:
combined_list = [
    {**gt, **next((ds for ds in ds_gpt if ds['id'] == gt['content']), {})}
    for gt in ground_truth
]

In [25]:
new_ds = [
    {
        'question': gt['question'],
        'id': gt['content'],  
        'content': ds['content']
    }
    for gt in ground_truth
    for ds in ds_gpt
    if gt['content'] == ds['id']  
]


In [26]:
for gt in ground_truth[:6]:
    print(gt["question"])
    print(gt["content"])

What is the total length of the Autobahn network in Germany?
9d8370cf-a2c8-4c54-9f9c-476b9c09a933
How many deaths occurred on the Autobahn per billion miles in 2015?
9d8370cf-a2c8-4c54-9f9c-476b9c09a933
What were the safety statistics for US interstate highways in comparison to the Autobahn?
9d8370cf-a2c8-4c54-9f9c-476b9c09a933
How many miles did vehicles travel on US interstate highways in 2015?
9d8370cf-a2c8-4c54-9f9c-476b9c09a933
What percentage of the Autobahn has no speed limit?
9d8370cf-a2c8-4c54-9f9c-476b9c09a933
What are the primary methods for obtaining German citizenship if you have ancestors from Germany?
6ef3b8e4-f20b-4893-bf9e-f58f800afb82


In [27]:
new_ds[:6]

[{'question': 'What is the total length of the Autobahn network in Germany?',
  'id': '9d8370cf-a2c8-4c54-9f9c-476b9c09a933',
  'content': 'The Autobahn is a [network of interstate highways in Germany](https://en.m.wikipedia.org/wiki/Autobahn#/media/File%3AAutobahnen_in_Deutschland.svg) with a total length of more than 8,000 miles. [65%](https://en.wikipedia.org/wiki/Autobahn#Speed_limits) of the Autobahn has no speed limit. How safe can that be?\nVehicles traveled 147 billion miles on the Autobahn in 2015. 322 people died = 2.19 deaths per billion miles.\nIn the US, vehicles travelled 757 billion miles on interstate highways. 3,837 people died = 5.07 deaths per billion miles.\nThat means: If you drive on the interstate, your likelihood to die is 131% higher than for the same distance on the Autobahn.\n*sources:*\nStatistisches Bundesamt: [Unfallentwicklung auf deutschen Straßen 2015](https://www.destatis.de/DE/PresseService/Presse/Pressekonferenzen/2016/Unfallentwicklung_2015/Pressebr

In [10]:
from tqdm.auto import tqdm

In [None]:
question = "test"

response = rag_pipeline.run(
{
    "text_embedder": {"text" : question},
    "prompt_builder": {"question": question},
    "answer_builder": {"query": question},
}
)
response["answer_builder"]["answers"][0].data


In [178]:
contexts = []
responses = []
questions = []



for run in tqdm(new_ds):
    question = run["question"]
    context = run["content"]

    questions.append(question)
    contexts.append([context])

    response = rag_pipeline.run(
    {
        "text_embedder": {"text" : question},
        "prompt_builder": {"question": question},
        "answer_builder": {"query": question},
    }
    )
    responses.append(response["answer_builder"]["answers"][0].data)
    

  0%|          | 0/3305 [00:00<?, ?it/s]

In [28]:
from haystack import Pipeline
from haystack_integrations.components.evaluators.ragas import RagasEvaluator, RagasMetric

context_utilization_pipeline = Pipeline()
evaluator = RagasEvaluator(metric=RagasMetric.CONTEXT_UTILIZATION)
context_utilization_pipeline.add_component("evaluator", evaluator)

In [None]:
test["contenxt"] = contexts
test["responses"] = responses
test["questions"] = questions

In [186]:
test.to_csv("./eval_docs.csv", header=True, sep=";")

In [7]:
test = pd.read_csv("./eval_docs.csv", sep=";")

In [21]:
t = test.sample(50)


In [22]:
con = t["contenxt"].to_list()

In [23]:
contexts = []

In [24]:
for c in con:
    contexts.append(list(c))

In [25]:
t.head()

Unnamed: 0.1,Unnamed: 0,contenxt,responses,questions
1233,1233,"['The Schengen tourist visa is intended for, w...","Yes, you may study for the duration of the Sch...",Is it possible to study while holding a Scheng...
2499,2499,"['Additionally, irrespective of what contractu...",It is recommended to join a Mieterschutzverein...,Why is it recommended to join a Mieterschutzve...
455,455,['Related threads in /r/Germany subreddit.\n* ...,Several factors contribute to the reluctance o...,What factors contribute to the reluctance of G...
2313,2313,['The big three network providers tend to offe...,When choosing between a big three network prov...,What factors should I consider when choosing b...
94,94,['* Shake hands with everyone you meet (assumi...,"In towns, it is not necessary to greet strange...",Is it necessary to greet strangers in towns or...


In [26]:
responses = t["responses"].to_list()

In [27]:
questions = t["questions"].to_list()

In [28]:
import random

In [29]:
from haystack import Pipeline
from haystack_integrations.components.evaluators.ragas import RagasEvaluator, RagasMetric

answer_relevancy_pipeline = Pipeline()
evaluator = RagasEvaluator(
    metric=RagasMetric.ANSWER_RELEVANCY,
    metric_params={
        "strictness": 2
    })
answer_relevancy_pipeline.add_component("evaluator", evaluator)


In [30]:
evaluation_results = answer_relevancy_pipeline.run(
    {"evaluator": {"questions": questions, "contexts":  contexts, "responses": responses}}
)


Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

In [31]:
sums = []

In [32]:
for score in (evaluation_results["evaluator"]["results"]):
    sums.append(score[0]["score"])


In [36]:
pd.DataFrame(sums).describe()

Unnamed: 0,0
count,50.0
mean,0.930749
std,0.194594
min,0.0
25%,0.951582
50%,0.97883
75%,0.992876
max,1.0


In [21]:
pd.DataFrame(sums).describe()

Unnamed: 0,0
count,50.0
mean,0.934164
std,0.194037
min,0.0
25%,0.955874
50%,0.975092
75%,0.996937
max,1.0


In [20]:
pd.DataFrame(sums).describe()

Unnamed: 0,0
count,50.0
mean,0.910511
std,0.234884
min,0.0
25%,0.93146
50%,0.98104
75%,0.996109
max,1.0


In [33]:
pd.DataFrame(sums).describe()

Unnamed: 0,0
count,50.0
mean,0.883394
std,0.266121
min,0.0
25%,0.928617
50%,0.971967
75%,0.989022
max,1.0
