In [11]:
import pandas as pd
import minsearch

In [13]:
df = pd.read_csv("data/taiwan_hiking_data.csv")

In [14]:
df.to_dict(orient='records')

[{'id': 'c51b0dba91466efa',
  'type': 'C01-Application Regulations',
  'title': '《Important Notices on Application for Entry into the Park Ecological Protection Areas and Other Restricted Zones》',
  'link_content': 'Modified Date: 2024/08/12\r\n                \r\n                Released by Taroko National Park Headquarters',
  'link': 'https://hike.taiwan.gov.tw/en/news_7_1.aspx?ID=2552',
  'released_by': 'Taroko National Park Headquarters',
  'modified_date': '2024/08/12'},
 {'id': '418c732ed38f0842',
  'type': 'C15-Online Application System',
  'title': 'Instructions',
  'link_content': 'Modified Date: 2018/09/10\r\n                \r\n                Released by CPAMI Position introduction: Trail head: mountaineering entrance Mountain house or camp: the last position every day General position: except the Trail head and camping sites Step 1: Select the position. Step 2: Select the position until the route of first day is completed, press [End a day] Please note: the last position 

In [21]:
documents = df.to_dict(orient='records')

In [1]:
from elasticsearch import Elasticsearch

In [6]:
from openai import OpenAI

client = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama'
)

In [54]:
response = client.chat.completions.create(
    model='llama3.2',
    messages=[{"role": "user", "content": "Why is the sky blue?"}]
)

InternalServerError: Error code: 500 - {'error': {'message': 'model requires more system memory (5.4 GiB) than is available (5.0 GiB)', 'type': 'api_error', 'param': None, 'code': None}}

In [9]:
response.choices[0].message.content

"<think>\n\n</think>\n\nThe sky appears blue because of Rayleigh scattering. When sunlight enters the Earth's atmosphere, it interacts with molecules and small particles in the air—primarily nitrogen and oxygen. Sunlight consists of a spectrum of colors, but when it scatters through the atmosphere at smaller angles (like directly overhead), most of red, orange, yellow light is scattered away. Blue light has a shorter wavelength, which is scattered more often by the molecules in the air.\n\nAs a result, we see blue when looking towards the horizon or upwards rather than downwards. The phenomenon explains why sunsets are red—because sunlight has to pass through more atmosphere layers to reach our eyes at lower altitudes where the Sun is setting, scattering those longer wavelengths of light away."

In [18]:
es_client = Elasticsearch('http://localhost:9200')

In [19]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "type": {"type": "text"},
            "title": {"type": "text"},
            "link_content": {"type": "text"},
            "released_by": {"type": "text"},
            "id": {"type": "keyword"} 
        }
    }
}

index_name = "mountain-questions"

es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'mountain-questions'})

In [20]:
from tqdm.auto import tqdm

In [22]:
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

  0%|          | 0/92 [00:00<?, ?it/s]

In [58]:
def elastic_search(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["link_content", "title", "released_by"],
                        "type": "cross_fields"
                    }
                }
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)

    result_docs = []

    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])

    return result_docs

In [48]:
elastic_search("How do I apply the access for Jade mountain?")

[{'id': 'd35d29bdff69b2b4',
  'type': 'C07-Foreign Visitor',
  'title': 'What is the difference between an Emergency Coordinator and an Emergency Contact? Does my Emergency Coordinator/Contact have to be a Taiwanese Citizen?',
  'link_content': 'Modified Date: 2019/04/02\r\n                \r\n                Released by Taroko National Park Headquarters An Emergency Contact is the team member’s next of kin. The Emergency Coordinator refers to a close friend or family member who does not hike with the team and are responsible to keep in constant contact with the team to monitor their progress. The emergency contact and emergency coordinator can be the same person if they are also the next of kin for one or more of the team members.\xa0If the team encounters difficulties and needs help or rescue, the emergency coordinator will be able to notify the authorities immediately. For the Park Entry Permit: you are free to provide an emergency contact/coordinator that is not a Taiwanese citizen

In [29]:
index = minsearch.Index(
    text_fields=["link_content", "title", "released_by"],
    keyword_fields=["id"]
)

In [30]:
index.fit(documents)

<minsearch.Index at 0x7146f89412e0>

In [31]:
def search(query):
    boost = {'link_content': 2.0, 'title': 0.5, "released_by": 1}

    results = index.search(
        query=query,
        boost_dict=boost,
        num_results=5
    )

    return results

In [32]:
search("How do I apply the access for Jade mountain?")

[{'id': 'bcb7e6d2fd5c8a92',
  'type': 'C03-Application Progress Inquiry',
  'title': 'If I already received a Park Entry Permit, how do I apply for a mountain entry permit?',
  'link_content': 'Modified Date: 2024/09/20\r\n                \r\n                Released by Taroko National Park Headquarters After receiving your Park Entry Permit, you may need to apply for a Mountain Entry Permit.\xa0Please see "Do I need a permit for my trail itinerary?" for specific information on which trails need which permits. You can apply for the Mountain Entry Permit in three ways: If you have already received your Park Entry Permit, you can automatically transfer your details from the Park Permit application to the Police Mountain Permit application. This is the most common method. To do this, please go on our website to: Online Application > Check Status/Print Permit/Apply for Mountain Permit > [Enter in your details] > Apply Mountain (Police) Permit. This option should be the second of four, on t

In [59]:
def build_prompt(query, search_results):
    prompt_template = """
    You're a mountain ranger assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
    Use only the facts from the CONTEXT when answering the QUESTION.
    
    QUESTION: {question}
    
    CONTEXT: 
    {context}
    """.strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['type']}\nquestion: {doc['title']}\nanswer: {doc['link_content']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

def llm(prompt):
    response = client.chat.completions.create(
        model='llama3.2',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [56]:
def rag(query):
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [60]:
query = "How do I apply the access for Jade mountain?"
rag(query)

'To answer your question "How do I apply the access for Jade mountain?" there is no information available in the context about \'Jade mountain.\' There\'s more info related to other mountains, like Qilai Mountain, Nanhu, or other trails within Taroko National Park.\n\nHowever, According to section C01-Application Regulations, both a Mountain Access Permit and a Park Access Permit are  required for certain trails within the park.'

In [36]:
r = search("How do I apply the access for Jade mountain?")

In [38]:
r[0]['title']

'If I already received a Park Entry Permit, how do I apply for a mountain entry permit?'

In [52]:
search_results = elastic_search("How do I apply the access for Jade mountain?")
build_prompt("How do I apply the access for Jade mountain?", search_results)

'You\'re a mountain ranger assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.\n    QUESTION and CONTEXT are for reference. They are supporting you answering the question.\n    \n    QUESTION: How do I apply the access for Jade mountain?\n    \n    CONTEXT: \n    section: C07-Foreign Visitor\nquestion: What is the difference between an Emergency Coordinator and an Emergency Contact? Does my Emergency Coordinator/Contact have to be a Taiwanese Citizen?\nanswer: Modified Date: 2019/04/02\r\n                \r\n                Released by Taroko National Park Headquarters An Emergency Contact is the team member’s next of kin. The Emergency Coordinator refers to a close friend or family member who does not hike with the team and are responsible to keep in constant contact with the team to monitor their progress. The emergency contact and emergency coordinator can be the same person if they are also the next of kin for one or more of the team members.\xa0If the team e

In [53]:
llm("How do I apply the access for Jade mountain?")

InternalServerError: Error code: 500 - {'error': {'message': 'model requires more system memory (5.4 GiB) than is available (5.1 GiB)', 'type': 'api_error', 'param': None, 'code': None}}