<a href="https://colab.research.google.com/github/waishun78/hungry-rag/blob/main/hungry_rag.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# import locale
# def getpreferredencoding(do_setlocale = True):
#     return "UTF-8"
# locale.getpreferredencoding = getpreferredencoding

In [None]:
# Installing the necessary packages
!pip install --upgrade pip
!pip install 'farm-haystack[all-gpu]' ## or 'all-gpu' for the GPU-enabled dependencies

!pip install -U accelerate
!pip install bitsandbytes
!pip install SentencePiece
!pip install evaluate
!pip install bert_score
!pip install transformers

!pip install googlemaps

!pip install bert_score
# !apt install libgraphviz-dev
# !pip install pygraphviz

In [None]:
!nvidia-smi

In [None]:
# Import necessary modules
import os
import torch
import torch.nn as nn
import bitsandbytes as bnb
import accelerate
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
from transformers import LlamaTokenizer

In [None]:
load_in_8bit = True
model_name_l = "lmsys/vicuna-7b-v1.5"

model_l = AutoModelForCausalLM.from_pretrained(
    model_name_l,
    torch_dtype=torch.float16,
    load_in_8bit=load_in_8bit,
    device_map="auto"
)
tokenizer_l = LlamaTokenizer.from_pretrained(model_name_l)

In [None]:
# !pip install farm-haystack[all]
# !pip install --upgrade pip
# !pip install 'farm-haystack[all]' ## or 'all-gpu' for the GPU-enabled dependencies

In [None]:
from haystack.nodes.base import BaseComponent

class Query_Rewriter(BaseComponent):
    outgoing_edges = 1

    def __init__(self, model, tokenizer):
      self.model = model
      self.tokenizer = tokenizer

    def run(self, query: str):

      prompt=f"""Generate some follow up questions I need to better answer your query
                  Example:
                  Question: Where to get Japanese food in Marina Bay Sands that is fine-dining and has a romantic atmosphere at night?
                  Answer:
                  1. What type of Japanese food are you looking for? e.g. sushi, ramen, teppanyaki, etc.
                  2. Do you have a preference for specific dishes or would you like to try a variety of options?
                  3. Are you looking for a specific price range for the food?
                  4. What are you looking for in a romantic atmsphere?
                  5. How late at night are you eating?
                  Question: {query}
                  Answer:"""
      input = self.tokenizer(prompt, return_tensors='pt')
      input_ids = input["input_ids"].to("cuda")
      temperature=0.7
      with torch.no_grad():
        generation_output = self.model.generate(
            input_ids=input_ids,
            temperature=temperature,
            top_p = 1.0,
            do_sample=True,
            return_dict_in_generate=True,
            max_new_tokens=200,
          )

      s = generation_output.sequences[0][len(input_ids[0]):]

      output = self.tokenizer.decode(s)
      return {"query":query, "prompt":prompt, "output":output}, "output_1"

    def run_batch(self, queries):
        # Insert code here to manipulate the input and produce an output dictionary
        output = []
        for query in queries:
            output.append(self.run(query))
        return output, "output_1"

In [None]:
# Create Custom Query_Rewriter
query_rewriter = Query_Rewriter(model_l, tokenizer_l)

In [None]:
from haystack import Pipeline

# Create a Haystack pipeline
p = Pipeline()
p.add_node(component=query_rewriter, name="QueryRewriter", inputs=["Query"])

In [None]:
indian_query = "Where to get Indian food in Singapore that is near the Esplanade and suitable for a group of 10 people?"

In [None]:
indian_result = p.run(query = indian_query)

In [None]:
print(f'''
  query: {indian_result["query"]}
  output: {indian_result["output"]}
  ''')

In [None]:
updated_indian_query = {
    "question":indian_result["query"],
    "clarifier": indian_result["output"],
    "additional_user_criteria":
      """
      1. North Indian
      2. Naan
      3. Cheap
      4. NIL
      """
}

In [None]:
# korean_query = "I want to buy bubble tea and then have Korean fried chicken. Can you give me some suggestions?"

In [None]:
# korean_result = p.run(query = korean_query)

In [None]:
# print(f'''
#   query: {korean_result["query"]}
#   output: {korean_result["output"]}
#   ''')

In [None]:
# updated_korean_query = {
#     "question":korean_result["query"],
#     "clarifier": korean_result["output"],
#     "additional_user_criteria":
#       """
#       1. NIL
#       2. NIL
#       3. NIL
#       4. Sit-down
#       5. Should be in the same mall
#       """
# }

In [None]:
class Annotate_Needs(BaseComponent):
    outgoing_edges = 1

    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer

    def run(self, query: dict):
        print(f'Summarizing user needs.... using query:{query}\n\n')
        prompt=f"""Generate the user's query requirements annotated with the requirement labels. Here are the categories:
                    Price, Opening, Location, Cuisine, Dietary Restrictions, Ratings, Reservations, Landmark, General (Atmosphere, idea-related needs)

                  Example:
                  Question: Where to get Japanese food in Marina Bay Sands that is fine-dining and has a romantic atmosphere at night?
                  Clarifying Questions:
                  1. What type of Japanese food are you looking for? e.g. sushi, ramen, teppanyaki, etc.
                  2. Do you have a preference for specific dishes or would you like to try a variety of options?
                  3. Are you looking for a specific price range for the food?
                  4. What are you looking for in a romantic atmsphere?
                  5. How late at night are you eating?
                  Additional User Criteria:
                  1. Sushi
                  2. Mostly just sushi
                  3. $$
                  4. Intimate with nice lighting, Inside Marina Bay Sands
                  5. 22:00 minimally
                  Answer:
                  Sushi[Cuisine], $$ [Price], Intimate with nice lighting [General-Atmosphere], Inside Marina Bay Sands [Location], 22:00 minimally [Opening]

                  ________________________________________________________________________________________________________
                  Question: {query["question"]}
                  Clarifying Questions: {query["clarifier"]}
                  Additional User Criteria:{query["additional_user_criteria"]}
                  Answer:"""
        input = self.tokenizer(prompt, return_tensors='pt')
        input_ids = input["input_ids"].to("cuda")
        temperature=0.4
        with torch.no_grad():
            generation_output = self.model.generate(
                input_ids=input_ids,
                temperature=temperature,
                top_p = 1.0,
                do_sample=True,
                return_dict_in_generate=True,
                max_new_tokens=200,
              )

        s = generation_output.sequences[0][len(input_ids[0]):]
        query["requirements"] = self.tokenizer.decode(s)
        
        del s
        del input
        del input_ids
        torch.cuda.empty_cache()
        
        return query, "output_1"

    def run_batch(self, queries):
        # Insert code here to manipulate the input and produce an output dictionary
        output = []
        for query in queries:
            output.append(self.run(query))
        return output, "output_1"

In [None]:
import re

class Search_Term_Generator(BaseComponent):
    outgoing_edges = 1

    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer
      # TODO: Might need to change the temperature to be less random and favour more "safe" search terms
      # TODO: Might reduce the max tokens generated as well

    def run(self, query: dict):
        print(f'Generating Search Query.... with query:{query}\n\n')
        prompt=f"""Generate a search query based on the example given below and the actual inputs. Here are the categories and increasing importance (1-5), include more important terms more often in queries:
                    Price - 3, Opening - 4, Location - 5, Cuisine - 5, Dietary Restrictions - 5, Ratings - 2, Reservations - 2, Landmark - 2 , General - 1 (Atmosphere, idea-related needs)

                  Example:
                  Question:
                  Where to get Japanese food in Marina Bay Sands that is fine-dining and has a romantic atmosphere at night?
                  Clarifying Questions:
                  1. What type of Japanese food are you looking for? e.g. sushi, ramen, teppanyaki, etc.
                  2. Do you have a preference for specific dishes or would you like to try a variety of options?
                  3. Are you looking for a specific price range for the food?
                  4. What are you looking for in a romantic atmsphere?
                  5. How late at night are you eating?
                  Additional User Input:
                  1. Sushi
                  2. Mostly just sushi
                  3. $$
                  4. Intimate with nice lighting, Inside Marina Bay Sands
                  5. 22:00 minimally
                  Requirements:
                  Sushi[Cuisine], $$ [Price], Intimate with nice lighting [General-Atmosphere], Inside Marina Bay Sands [Location], 22:00 minimally [Opening]
                  Answer:
                  Romantic Japanese Sushi restaurant in Marina Bay Sands open till 2200 

                  ________________________________________________________________________________________________________
                  Actual:
                  Question:
                  {query["question"]}
                  Clarifying Questions:
                  {query["clarifier"]}
                  Additional User Input:
                  {query["additional_user_criteria"]}
                  Requirements:
                  {query["requirements"]}
                  Answer:
                  """
        input = self.tokenizer(prompt, return_tensors='pt')
        input_ids = input["input_ids"].to("cuda")
        temperature=0.4
        with torch.no_grad():
            generation_output = self.model.generate(
                input_ids=input_ids,
                temperature=temperature,
                top_p = 1.0,
                do_sample=True,
                return_dict_in_generate=True,
                max_new_tokens=100,
              )
        s = self.tokenizer.decode(generation_output.sequences[0][len(input_ids[0]):])
        print(f'\nPREPROCESSED SEARCH TERM:{s}____\n')
        s_ls = re.split('\n |,|2.', s)
        print(f'\nPROCESSED SEARCH TERM LS:{s_ls}____\n')

        query["search_terms"]= s_ls[0]
        print(f'\n____SEARCH_TERMS:____\n{s_ls[0]}\n\n')
        
        del s
        del input
        del input_ids
        torch.cuda.empty_cache()

        return query, "output_1"

    def run_batch(self, queries):
        # Insert code here to manipulate the input and produce an output dictionary
        output = []
        for query in queries:
            output.append(self.run(query))
        return output, "output_1"

In [None]:
import urllib.parse
import json
import requests
import googlemaps

class GoogleNode(BaseComponent):
    outgoing_edges = 1

    def __init__(self, api_key):
        self.api_key = api_key

    def top_results(self, searchquery):
        gmaps = googlemaps.Client(self.api_key)
        places_result = gmaps.places(searchquery, region="SG")
        
        top3_results = places_result['results'][:3]
        
        formatted_results = []
        for result in top3_results:
            formatted_results.append({
                'place_id': result['place_id'],
                'name': result['name'],
                'address': result.get('formatted_address'),
                'rating': str(result['rating']),
                'price_level': result.get('price_level')
            })
        return formatted_results

    def business_info_retrieval(self, top3_results):
        gmaps = googlemaps.Client(self.api_key)
        top3_business_info = []

        for result in top3_results:
            business_result = gmaps.place(result['place_id'])

            phone_number = business_result['result'].get('formatted_phone_number')
            delivery = business_result['result'].get('delivery')
            dine_in = business_result['result'].get('dine_in')

            opening_hours = business_result['result'].get('opening_hours', {})
            opening_hours_text = "\n".join(opening_hours.get('weekday_text', []))

            reviews = business_result['result']['reviews'][:3]
            review_texts = [review['text'] for review in reviews]

            top3_business_info.append({
                'place_id': result['place_id'],
                'phone_number': phone_number,
                'delivery': delivery,
                'dine_in': dine_in,
                'opening_hours': opening_hours_text,
                'reviews': review_texts
            })

        return top3_business_info

    def final_context_generator(self, top3_results, top3_business_info):
        final_context_ls = []

        for i in range(len(top3_results)):
            combined_info = {**top3_results[i], **top3_business_info[i]}

            final_context_ls.append({
                'Option': i + 1,
                'Name': combined_info['name'],
                'Address': combined_info['address'],
                'Phone': combined_info['phone_number'],
                'Delivery Available': 'Yes' if combined_info.get('delivery') else 'No',
                'Dine-In Available': 'Yes' if combined_info.get('dine_in') else 'No',
                'Opening Hours': combined_info['opening_hours'],
                'Price Level': combined_info.get('price_level', 'Price Data Not Available'),
                'Rating': combined_info['rating'],
                'Reviews': combined_info['reviews']
            })

        return final_context_ls

    def run(self, query: dict):
        search_terms = query["search_terms"]
        first_search_term = search_terms.split(",")[0]

        print(f'Generating Google Response.... with query: {query} and the FIRST is {first_search_term}')

        top_results = self.top_results(first_search_term)
        business_info = self.business_info_retrieval(top_results)
        context_ls = self.final_context_generator(top_results, business_info)
        
        query['context'] = context_ls

        print(f'\n____CONTEXT:____\n{query["context"]}\n\n')

        return {"query": query, "context": context_ls}, "output_1"

    def run_batch(self, queries: [dict]):
        output = []
        for query in queries:
            output.append(self.run(query))
        return output, "output_1"


In [None]:
class Reader(BaseComponent):
    outgoing_edges = 1

    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer

    def run(self, query: dict):
        print(query)
        query = query["query"]
        print(f'LLM doing RAG.... with information:{query}\n\n')
        prompt=f"""Answer the question based on the following additional requirements and context, explain with references and then rank the top 3 restaurant options:
                  Example:
                  Question:
                  Where to get Japanese food in Marina Bay Sands that is fine-dining and has a romantic atmosphere at night?
                  Requirements:
                  Sushi[Cuisine], $$ [Price], Intimate with nice lighting [General-Atmosphere], Inside Marina Bay Sands [Location], 22:00 minimally [Opening]
                  Context:
                        Option: 1:
                        Name: Waku Ghin
                        Address: Level 2 Dining, L2-03 The Shoppes at, 2 Bayfront Ave, Marina Bay Sands, 018956


                        Price Level: 4 / 4 (0- Free, 1- Inexpensive, 2- Moderate, 3- Expensive, 4- Very Expensive, None- Price Data Not Available)
                        Rating: 4.1
                        Review 1: Absolutely amazing food; had the Omakase at the chef’s table. The umami and creamy sea urchin balanced with the sweetness of the shrimp and slight saltiness of the caviar to produce a full flavour profile with each mouthful. Loved the perfectly (teppanyaki) seared abalone with risoni and the A5 Kobe wagyu dipped in the egg yolk (it was so silky, tender and flavourful from the marbling). Service was excellent as well; they were jovial and attentive and made the experience delightful.
                        Review 2: This is a late review. I came here during valentines weekend. Both my friend and I had food poisoning and I was bed ridden for three days. Only decided to review now because my friend told me that her group of friends also got food poisoning from the same group of restaurants. So totally not worth it even tho the food was great. Spent 750 per pax to get food poisoning.. really ridiculous..
                        Review 3: We had some Izakaya food and beer before going for show. Food must be good considering the price is not cheap for beer food, but its a tad a bit too salty for me. Service is great of course. My favorite will be the lobster spaghetti, a bit of fusion.

                        Option: 2:
                        Name: Sen of Japan
                        Address: 2 Bayfront Ave, #01 - 86, Singapore 018972

                        Price Level: 3 / 4 (0- Free, 1- Inexpensive, 2- Moderate, 3- Expensive, 4- Very Expensive, None- Price Data Not Available)
                        Rating: 4.4
                        Review 1: Lunch with a view.

                        The marugo pizza is really nice and thin.

                        Pork jowl is thinly slice. The thinnest I’ve seen. Nice in paring with the sauce

                        Black Cod fish is soft and nicely cook too.

                        V worth it if you have American Express card , to enjoy 50% disc on the bill. This meal of 3 fish cause only $80+.

                        They also have some pretty affordable lunch set.
                        Review 2: Five stars for everything (except maybe the receptionist that received us.. kind of rude). The chefs were super friendly (sat at the counter), and I had nice small talk with them while eating.
                        Review 3: Excellent upscale Japanese restaurant great for a date.
                        Lobster uni maki was out of this world. Amazing flavours and textures in a single mouthful.

                        Option: 3:
                        Name: KOMA Singapore
                        Address: 2 Bayfront Ave, # B1 - 67, Singapore 018972

                        Price Level: None / 4 (0- Free, 1- Inexpensive, 2- Moderate, 3- Expensive, 4- Very Expensive, None- Price Data Not Available)
                        Rating: 4.3
                        Review 1: Really nice restaurant with great food and perfect service. The atmosphere can get quite uncomfortable because it is always packed. Prices are reasonable for quality and location. Very recommended for date nights.
                        Review 2: Obviously, the main attraction would be the interior lighting. Can't deny the entire place makes a great backdrop for IG-worthy photos.
                        Review 3: Celebrated my wife birthday there recently, was a very nice place. Food was truly amazing and the staffs were very friendly and helpful in many ways.
                  Answer:
                  1. Sen of Japan is the best choice as it has serves sushi ('Lobster uni maki'), on top of that it has the highest rating of 4.3 out of the other options. Lastly, it was 'great for a date' as mentioned in the review. It is also open till 22:00 as needed.
                  2. A close scond would be KOMA Singapore. It has a great great interior lighting for a romantic atmospher and quality food. It also has a good rating of 4.3 but did not explicitly mention if sushi is served.
                  3. Waku Ghin is the third choice as it is more expensive than the other choices. It is 'totally not worth it even tho the food was great' as quoted from a review.
                
                  ________________________________________________________________________________________________________
                  Question:
                  {query["question"]}
                  Requirements:
                  {query["requirements"]}
                  Context:
                  {query["context"]}
                  Answer:
                  """
        input = self.tokenizer(prompt, return_tensors='pt')
        input_ids = input["input_ids"].to("cuda")
        temperature=0.8
        with torch.no_grad():
            generation_output = self.model.generate(
                input_ids=input_ids,
                temperature=temperature,
                top_p = 1.0,
                do_sample=True,
                return_dict_in_generate=True,
                max_new_tokens=200,
              )
        s = generation_output.sequences[0][len(input_ids[0]):]
        query["generated_answer"]= self.tokenizer.decode(s)
        print(f'\n____GENERATED ANSWER:_________\n{query["generated_answer"]} \n\n')
        
        del s
        del input
        del input_ids
        torch.cuda.empty_cache()
        
        return query, "output_1"

    def run_batch(self, queries):
        # Insert code here to manipulate the input and produce an output dictionary
        output = []
        for query in queries:
            output.append(self.run(query))
        return output, "output_1"

In [None]:
annotater = Annotate_Needs(model_l, tokenizer_l)
search_terms_generator = Search_Term_Generator(model_l, tokenizer_l)
google_node = GoogleNode("AIzaSyDoX1_IHRHkhoS9Ut15t1JfC-pFwv9L5qY")
reader = Reader(model_l, tokenizer_l)


# Create a Haystack pipeline
p3 = Pipeline()
p3.add_node(component=annotater, name="Annotate_Needs", inputs=["Query"])
p3.add_node(component=search_terms_generator, name="Search_Term_Generator", inputs=["Annotate_Needs"])
p3.add_node(component=google_node, name="Google_Node", inputs=["Search_Term_Generator"])
p3.add_node(component=reader, name="Reader", inputs=["Google_Node"])

In [None]:
result = p3.run(query = updated_indian_query)
# print(f'''
#   query: {result["query"]}
#   clarifier: {result["clarifier"]}
#   additional_user_criteria: {result["additional_user_criteria"]}
#   requirements:{result["requirements"]}
#   search_terms: {result["search_terms"]}
#   context: {result["context"]}
#   ''')

In [None]:
print(result['generated_answer'])

In [None]:
result.keys()