# Setup

In [None]:
# ! pip install langchain langchain-community faiss-cpu sentence-transformers octoai-sdk langchain-text-splitters lxml tiktoken python-dotenv 'arize-phoenix[evals]' openai

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()
OCTOAI_API_TOKEN = os.environ["OCTOAI_API_TOKEN"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

In [None]:
import phoenix as px
session = px.launch_app()

  from .autonotebook import tqdm as notebook_tqdm


üåç To view the Phoenix app in your browser, visit http://localhost:6006/
üìñ For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [None]:
from phoenix.trace.langchain import LangChainInstrumentor

LangChainInstrumentor().instrument()

# Ingest Data

In [None]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document

In [None]:
files = os.listdir("./city_data")
file_texts = []
for file in files:
    with open(f"./city_data/{file}") as f:
        file_text = f.read()
    text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=800, chunk_overlap=200, separator="."
    )
    texts = text_splitter.split_text(file_text)
    for i, chunked_text in enumerate(texts):
        file_texts.append(Document(page_content=chunked_text, 
                metadata={"doc_title": file.split(".")[0], "chunk_num": i}))

WARNI [langchain_text_splitters.base] Created a chunk of size 1311, which is longer than the specified 800
WARNI [langchain_text_splitters.base] Created a chunk of size 1043, which is longer than the specified 800
WARNI [langchain_text_splitters.base] Created a chunk of size 1110, which is longer than the specified 800
WARNI [langchain_text_splitters.base] Created a chunk of size 925, which is longer than the specified 800
WARNI [langchain_text_splitters.base] Created a chunk of size 831, which is longer than the specified 800
WARNI [langchain_text_splitters.base] Created a chunk of size 990, which is longer than the specified 800
WARNI [langchain_text_splitters.base] Created a chunk of size 908, which is longer than the specified 800
WARNI [langchain_text_splitters.base] Created a chunk of size 992, which is longer than the specified 800
WARNI [langchain_text_splitters.base] Created a chunk of size 928, which is longer than the specified 800
WARNI [langchain_text_splitters.base] Creat

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

In [None]:
embeddings = HuggingFaceEmbeddings()

  warn_deprecated(


In [None]:
vector_store = FAISS.from_documents(
    file_texts,
    embedding=embeddings
)

In [None]:
len(file_texts)

541

# Search the Data

In [None]:
from langchain_community.llms.octoai_endpoint import OctoAIEndpoint
llm = OctoAIEndpoint(
        model="mistral-7b-instruct-v0.3",
        max_tokens=2000,
        presence_penalty=0,
        temperature=0.0,
        top_p=0.9,
    )

                model was transferred to model_kwargs.
                Please confirm that model is what you intended.


In [None]:
retriever = vector_store.as_retriever()

In [None]:
from langchain.prompts import ChatPromptTemplate
template="""You are a tour guide. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

In [None]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
chain.invoke("Where is the best food in the world?")

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


' Berlin is known for its diverse gastronomy scene, with a wide range of international cuisines available, including Turkish, Arab, Asian, and Italian. The city is also home to many traditional German restaurants, serving dishes such as currywurst, schnitzel, and sauerbraten. Berlin has a vibrant food culture, with many street food markets, supper clubs, and food festivals throughout the year. The city is also famous for its traditional German bakeries, offering a variety of breads and pastries. Overall, Berlin is a great destination for foodies, with a wide range of options to suit all tastes and budgets.'

# Run Evals

In [None]:
from phoenix.evals import (
    HallucinationEvaluator,
    OpenAIModel,
    QAEvaluator,
    RelevanceEvaluator,
    run_evals,
)
from phoenix.session.evaluation import get_qa_with_reference, get_retrieved_documents
from phoenix.trace import DocumentEvaluations, SpanEvaluations

In [None]:
queries_df = get_qa_with_reference(px.Client())
retrieved_documents_df = get_retrieved_documents(px.Client())

In [None]:
eval_model = OpenAIModel(
    model="gpt-4-turbo",
)
hallucination_evaluator = HallucinationEvaluator(eval_model)
qa_correctness_evaluator = QAEvaluator(eval_model)
relevance_evaluator = RelevanceEvaluator(eval_model)

hallucination_eval_df, qa_correctness_eval_df = run_evals(
    dataframe=queries_df,
    evaluators=[hallucination_evaluator, qa_correctness_evaluator],
    provide_explanation=True,
)
relevance_eval_df = run_evals(
    dataframe=retrieved_documents_df,
    evaluators=[relevance_evaluator],
    provide_explanation=True,
)[0]

px.Client().log_evaluations(
    SpanEvaluations(eval_name="Hallucination", dataframe=hallucination_eval_df),
    SpanEvaluations(eval_name="QA Correctness", dataframe=qa_correctness_eval_df),
    DocumentEvaluations(eval_name="Relevance", dataframe=relevance_eval_df),
)

WARNI [phoenix.evals.executors] üêå!! If running llm_classify inside a notebook, patching the event loop with nest_asyncio will allow asynchronous eval submission, and is significantly faster. To patch the event loop, run `nest_asyncio.apply()`.
run_evals |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 (100.0%) | ‚è≥ 01:21<00:00 |  8.10s/it
WARNI [phoenix.evals.executors] üêå!! If running llm_classify inside a notebook, patching the event loop with nest_asyncio will allow asynchronous eval submission, and is significantly faster. To patch the event loop, run `nest_asyncio.apply()`.
run_evals |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 (100.0%) | ‚è≥ 02:53<00:00 |  8.68s/it
