# Setup

In [1]:
! pip install langchain langchain-community faiss-cpu sentence-transformers octoai-sdk langchain-text-splitters lxml tiktoken python-dotenv 'arize-phoenix[evals]' openai


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [2]:
from dotenv import load_dotenv
import os

load_dotenv()
OCTOAI_API_TOKEN = os.environ["OCTOAI_API_TOKEN"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

In [3]:
import phoenix as px
session = px.launch_app()
# session = px.launch_app(trace=px.TraceDataset.load("5f612e9f-e796-469d-8a5c-16aa2ea234c8"))

  from .autonotebook import tqdm as notebook_tqdm
E0809 17:08:10.109394985   51825 chttp2_server.cc:1097]                UNKNOWN:No address added out of total 1 resolved for '[::]:4317' {created_time:"2024-08-09T17:08:10.109290551+00:00", children:[UNKNOWN:Failed to add any wildcard listeners {created_time:"2024-08-09T17:08:10.109282636+00:00", children:[UNKNOWN:Unable to configure socket {created_time:"2024-08-09T17:08:10.109102229+00:00", fd:89, children:[UNKNOWN:Address already in use {created_time:"2024-08-09T17:08:10.109080108+00:00", errno:98, os_error:"Address already in use", syscall:"bind"}]}, UNKNOWN:Unable to configure socket {fd:89, created_time:"2024-08-09T17:08:10.10927949+00:00", children:[UNKNOWN:Address already in use {created_time:"2024-08-09T17:08:10.109272216+00:00", errno:98, os_error:"Address already in use", syscall:"bind"}]}]}]}
ERROR:    Traceback (most recent call last):
  File "/home/codespace/.python/current/lib/python3.10/site-packages/starlette/routing.py"

üåç To view the Phoenix app in your browser, visit http://localhost:6006/
üìñ For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [4]:
from phoenix.trace.langchain import LangChainInstrumentor

LangChainInstrumentor().instrument()

# Ingest Data

In [5]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document

In [6]:
files = os.listdir("../city_data")
file_texts = []
for file in files:
    with open(f"../city_data/{file}") as f:
        file_text = f.read()
    text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=800, chunk_overlap=200, separator="."
    )
    texts = text_splitter.split_text(file_text)
    for i, chunked_text in enumerate(texts):
        file_texts.append(Document(page_content=chunked_text, 
                metadata={"doc_title": file.split(".")[0], "chunk_num": i}))

In [7]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

In [8]:
embeddings = HuggingFaceEmbeddings()



In [9]:
vector_store = FAISS.from_documents(
    file_texts,
    embedding=embeddings
)

In [10]:
len(file_texts)

543

# Search the Data

In [11]:
from langchain_community.chat_models.octoai import ChatOctoAI

llm = ChatOctoAI(
        model_name= "meta-llama-3.1-8b-instruct",
        max_tokens=2000,
        temperature=0.0,
        model_kwargs={
            "presence_penalty": 0,
            "top_p": 0.9
        }
    )

In [12]:
retriever = vector_store.as_retriever()

In [13]:
from langchain.prompts import ChatPromptTemplate
template="""You are a helpful tour guide. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

In [14]:
print(prompt)

input_variables=['context', 'question'] messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are a helpful tour guide. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))]


In [15]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [16]:
with open("test_qs.txt", "r") as f:
    qs = f.read().splitlines()
qs

['List the cities from oldest to youngest: Paris, Berlin, San Francisco.',
 'Which historical monuments should I visit in Cairo?',
 'Is Chicago more or less populated than New York?',
 'Compare and contrast night life in Houston and Moscow.',
 'Which city has a more active tech scene? San Francisco or Lisbon?',
 'Which city has a more active financial sector? London or Boston?',
 'Where is the Eiffel Tower located?',
 'When should I visit the Empire State Building in Houston?',
 'Who is Yujian Tang?']

In [17]:
responses = []
for q in qs:
    res = chain.invoke(q)
    responses.append(res)

# Run Evals

In [18]:
from phoenix.evals import (
    HallucinationEvaluator,
    OpenAIModel,
    QAEvaluator,
    RelevanceEvaluator,
    run_evals,
)
from phoenix.session.evaluation import get_qa_with_reference, get_retrieved_documents
from phoenix.trace import DocumentEvaluations, SpanEvaluations

In [19]:
queries_df = get_qa_with_reference(px.Client())
retrieved_documents_df = get_retrieved_documents(px.Client())

In [20]:
import nest_asyncio
nest_asyncio.apply()

eval_model = OpenAIModel(
    model="gpt-4-turbo",
)
hallucination_evaluator = HallucinationEvaluator(eval_model)
qa_correctness_evaluator = QAEvaluator(eval_model)
relevance_evaluator = RelevanceEvaluator(eval_model)

hallucination_eval_df, qa_correctness_eval_df = run_evals(
    dataframe=queries_df,
    evaluators=[hallucination_evaluator, qa_correctness_evaluator],
    provide_explanation=True,
)
relevance_eval_df = run_evals(
    dataframe=retrieved_documents_df,
    evaluators=[relevance_evaluator],
    provide_explanation=True,
)[0]

px.Client().log_evaluations(
    SpanEvaluations(eval_name="Hallucination", dataframe=hallucination_eval_df),
    SpanEvaluations(eval_name="QA Correctness", dataframe=qa_correctness_eval_df),
    DocumentEvaluations(eval_name="Relevance", dataframe=relevance_eval_df),
)
trace_id = px.Client().get_trace_dataset().save()

run_evals |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 16/16 (100.0%) | ‚è≥ 01:24<00:00 |  5.26s/it
run_evals |‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 36/36 (100.0%) | ‚è≥ 01:42<00:00 |  2.85s/it


üíæ Trace dataset saved to under ID: 9f817ea5-e554-44bf-9134-cedbd23c12ad
üìÇ Trace dataset path: /home/codespace/.phoenix/trace_datasets/trace_dataset-9f817ea5-e554-44bf-9134-cedbd23c12ad.parquet


First example

üíæ Trace dataset saved to under ID: 5f612e9f-e796-469d-8a5c-16aa2ea234c8

üìÇ Trace dataset path: /Users/yujian/.phoenix/trace_datasets/trace_dataset-5f612e9f-e796-469d-8a5c-16aa2ea234c8.parquet
