## CrewAI RAG Agent with OpenAI for Long-Term Care Policies

### 1. Install Dependencies

In [None]:
!pip install -qU crewai ipywidgets pypdf langchain-community langchain-openai langchain faiss-cpu python-dotenv openai

### 2. RAG Agent Setup and Agent Definition

In [1]:
import os
import re
from dotenv import load_dotenv
from pathlib import Path
from crewai import Agent, Task, Crew
from crewai.tools import BaseTool
from pypdf import PdfReader
import ipywidgets as widgets
from IPython.display import display, HTML
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, ChatOpenAI

# Load the .env file from your home directory
env_path = Path('~/.env').expanduser()
load_dotenv(dotenv_path=env_path)

# Check if the key is loaded
if not os.getenv("OPENAI_API_KEY"):    raise ValueError("OPENAI_API_KEY not found in ~/.env file")

llm = ChatOpenAI(model="gpt-4o-mini")

def get_pdf_filenames_from_group(group_file_path):
    with open(group_file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    # Updated regex to correctly capture filenames including .pdf extension
    filenames = re.findall(r'Processing paper \d+ - ([^,]+?\.pdf), \d+ characters', content)
    print(f"Identified PDF filenames: {filenames}")
    return filenames

def read_pdf_content(file_path):
    try:
        reader = PdfReader(file_path)
        text = ""
        for page in reader.pages:
            text += page.extract_text() or ''
        print(f"Read {len(text)} characters from {file_path}")
        return text
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return None

group_file = '/Users/ytchen/Documents/projects/research/NYCU/ltc/classified_articles/Long-Term Care Policies and Programs.txt'
pdf_filenames = get_pdf_filenames_from_group(group_file)
pdf_dir = '/Users/ytchen/Documents/projects/research/NYCU/ltc/data/pdfs/'

documents = []
for filename in pdf_filenames:
    file_path = os.path.join(pdf_dir, filename.strip())
    print(f"Checking file: {file_path}, Exists: {os.path.exists(file_path)}")
    if os.path.exists(file_path):
        content = read_pdf_content(file_path)
        if content and content.strip(): # Ensure content is not empty or just whitespace
             documents.append(content)

print(f"Total documents with content: {len(documents)}")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.create_documents(documents)
print(f"Total text splits created: {len(splits)}")

embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(splits, embeddings)
retriever = vectorstore.as_retriever()

class LTC_RAG_Tool(BaseTool):
    name: str = "Long-Term Care Policy RAG Tool"
    description: str = "Searches and returns relevant information about long-term care policies. Use it to answer questions about policies, regulations, and procedures."

    def _run(self, question: str) -> str:
        docs = retriever.invoke(question)
        return "\n\n".join(doc.page_content for doc in docs)

Identified PDF filenames: ['2.獎勵布建住宿式長照機構資源計畫-114年度待獎勵區域(1140502).pdf', '問答集1.pdf', '照顧實務指導員訓練(公告).pdf', '家庭照顧者支持服務原則(公告).pdf', '附件1-申請流程圖（114.04.09修正版）.pdf', '附件4、住宿機構照顧品質獎勵計畫-懶人包.pdf']
Checking file: /Users/ytchen/Documents/projects/research/NYCU/ltc/data/pdfs/2.獎勵布建住宿式長照機構資源計畫-114年度待獎勵區域(1140502).pdf, Exists: True
Read 3789 characters from /Users/ytchen/Documents/projects/research/NYCU/ltc/data/pdfs/2.獎勵布建住宿式長照機構資源計畫-114年度待獎勵區域(1140502).pdf
Checking file: /Users/ytchen/Documents/projects/research/NYCU/ltc/data/pdfs/問答集1.pdf, Exists: True
Read 1293 characters from /Users/ytchen/Documents/projects/research/NYCU/ltc/data/pdfs/問答集1.pdf
Checking file: /Users/ytchen/Documents/projects/research/NYCU/ltc/data/pdfs/照顧實務指導員訓練(公告).pdf, Exists: True
Read 1309 characters from /Users/ytchen/Documents/projects/research/NYCU/ltc/data/pdfs/照顧實務指導員訓練(公告).pdf
Checking file: /Users/ytchen/Documents/projects/research/NYCU/ltc/data/pdfs/家庭照顧者支持服務原則(公告).pdf, Exists: True
Read 1798 characters from /Users/y

In [3]:
rag_tool = LTC_RAG_Tool()


In [4]:
ltc_policy_expert = Agent(
    role='Long-Term Care Policy Expert',
    goal='Provide accurate information about long-term care policies based on the provided documents.',
    backstory='An expert in Taiwanese long-term care policies, skilled at interpreting official documents and answering questions clearly.',
    tools=[rag_tool],
    llm=llm,
    verbose=True
)

### 3. Simple GUI for Multi-Turn Conversation

In [5]:
history = []

def ask_question(question):
    task = Task(
        description=f'Answer the following question: {question}. Use the provided context to answer. Conversation history: {history}',
        expected_output='A clear and concise answer to the question.',
        agent=ltc_policy_expert
    )
    
    crew = Crew(agents=[ltc_policy_expert], tasks=[task], verbose=True)
    result = crew.kickoff()
    history.append((question, result))
    return result

In [6]:
output_area = widgets.Output(layout={'border': '1px solid black', 'height': '300px', 'overflow_y': 'auto'})
input_text = widgets.Text(placeholder='Ask a question about long-term care policies...')
submit_button = widgets.Button(description='Ask')

def on_submit(b):
    question = input_text.value
    input_text.value = ''
    with output_area:
        display(HTML(f"<b>You:</b> {question}"))
        response = ask_question(question)
        display(HTML(f"<b>Agent:</b> {response}"))

submit_button.on_click(on_submit)
input_text.on_submit(lambda x: on_submit(None)) # Allow pressing Enter

display(widgets.VBox([output_area, widgets.HBox([input_text, submit_button])]))

  input_text.on_submit(lambda x: on_submit(None)) # Allow pressing Enter


VBox(children=(Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_rig…

In [8]:
# Example usage:
question = "What are the key principles of family caregiver support services?"
response = ask_question(question)
print(f"Agent's Response: {response}")


Output()

Output()

Agent's Response: 家庭照顧者支持服務原則主要包括以下幾個要點： 
壹、目的：發展多元支持措施，提升照顧服務品質，減輕家庭照顧者的照顧負荷。 
貳、依據：依據長期照顧服務法第十三條第二項規定。 
參、服務對象：指長期照顧服務法第三條第三款所定的家庭照顧者，即於家庭中對身心失能者提供規律性照顧的主要親屬或家人。 
肆、支持原則： 
1. 協助家庭照顧者減輕照顧壓力，提供支持服務。 
2. 支持服務應根據個別需求以定點、到宅等方式提供，並連結相關服務。 
3. 維護家庭照顧者的身心健康、經濟安全與就業權利。 
4. 尊重家庭照顧者的自主性與選擇。 
5. 提供充分的照顧安排資訊與資源。 
6. 提供減輕照顧者負荷及壓力的支持性服務。 
伍、主要服務項目包括服務資訊提供及轉介、長照知識及技能訓練、喘息服務、情緒支持及團體服務之轉介，以及其他提升家庭照顧者能力和生活品質的服務。


In [9]:
question = "What are the requirements for long-term care facilities to receive quality improvement incentives?"
response = ask_question(question)
print(f"Agent's Response: {response}")


Output()

Output()

Agent's Response: The requirements for long-term care facilities to receive quality improvement incentives include that the facility must have the following conditions: 
1. The facility must have passed its most recent evaluation as qualified or rated B or above and be within the evaluation validity period. 
2. The incentive amounts are based on the number of beds as follows:
   - For fewer than 49 beds: 1,010,000 TWD
   - For 50-75 beds: 1,500,000 TWD
   - For 76-99 beds: 2,000,000 TWD
   - For 100-149 beds: 2,200,000 TWD
   - For over 150 beds: 2,400,000 TWD
3. The facility must achieve the targets set within seven key indicators which include:
   - Emergency disaster response (mandatory)
   - Individualized support plan execution
   - Strengthening personnel management
   - Enhancing professional knowledge
   - Building care information systems
   - Use of smart supportive care technology
   - Safeguarding the rights of service recipients
The application period for these incentives 