In [349]:
import os
import base64
import ast
import json

from groq import Groq
from llama_index.embeddings.openai import OpenAIEmbedding
from dotenv import load_dotenv
from llama_index.core.node_parser import LangchainNodeParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.core import VectorStoreIndex, Document, Settings, StorageContext
from llama_index.llms.openai_like import OpenAILike
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.readers import StringIterableReader
import faiss


In [350]:
load_dotenv()

client = Groq(api_key=os.getenv("GROQ_API_KEY"))
model_name = "llama-3.2-90b-vision-preview"
model2_name = "meta-llama/Meta-Llama-3.1-70B-Instruct"
client2 = OpenAILike(
    model=model2_name,
    is_chat_model=True,
    api_base="https://api.studio.nebius.ai/v1/",
    api_key=os.environ.get("NEBIUS_API_KEY"),
    temperature=0,
)

In [351]:
# testing
response = client.chat.completions.create(
    model=model_name,
    messages=[{"role": "user",
               "content": "Who was the first PM in Canada?"}]
)
print(response.choices[0].message.content)


Sir John A. Macdonald was the first Prime Minister of Canada, serving from July 1, 1867, to November 5, 1873.


## 1. Provide travel idea along with destination

In [304]:
image_path = './pics/french_cuisine.jpg'
with open(image_path, "rb") as image_file:
    enc_image = base64.b64encode(image_file.read()).decode('utf-8')
    photo_prompt = "Identify and name the main object in the photo and describe it in a short paragraph."
    photo_description = client.chat.completions.create(
        model=model_name,
        messages=[{"role": "user",
                   "content": [
                       {"type": "text", "text": photo_prompt},
                       {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{enc_image}"}}
                   ]}],
        temperature=0,
    )
    photo_description = photo_description.choices[0].message.content
    print(photo_description)

The main object in the photo is a white, ceramic escargot dish with nine indentations. The dish is filled with cooked snails covered in a green sauce, and there is a silver fork resting on top of it. The dish is placed on a tablecloth with a green and brown striped pattern.


In [305]:
# user_prompt = "I want to travel to Paris and try authentic French cuisine, including the dish in the photo. Give me spot recommendations most tourists don't know about."

In [372]:
user_prompt = "I wanna go to Rio de Janeiro and go to local craft shops possibly not know by many. I'm interested in local culture and art."
photo_description = "No photo uploaded"

In [373]:
assistant_prompt = f"""
User prompt:
{user_prompt}

Descriptions of uploaded photos:
{photo_description}

Instructions:
You are a travelling assistant. Find out what kind of places the user wants to visit by finding out keywords in the user prompt. If there are uploaded photos, try to find out how they relates to the user's intent and find key words in it. Next, create ONE concise query you'd put in a search engine to find suitable local places which are less-known to tourists for the user to visit according to the user's requests. For example, if the user wants to visit museums in London, the search query would be "London local museums". Output this ONE query in the language used in the user's travel destination. ONLY output the query and nothing else. DO NOT include your thought process. 
"""

In [374]:
response = client.chat.completions.create(
    model=model_name,
    messages=[{"role": "user",
               "content": assistant_prompt},],
    temperature=0,
)
search_query = response.choices[0].message.content
print(search_query)

"lojas de artesanato locais em Rio de Janeiro"


In [355]:
p2 = f"Given this search query: {search_query}, locate keywords in it and shorten it to around 6 words. ONLY output the shortened search query and nothing else."
response = client.chat.completions.create(
    model=model_name,
    messages=[{"role": "user",
               "content": p2},]
              
)
search_query_short = response.choices[0].message.content
print(search_query_short)

Artesanato local Rio de Janeiro


In [356]:
response = client.chat.completions.create(
    model=model_name,
    messages=[{"role": "user",
               "content": f"User prompt:\n{user_prompt}\n\nGet the language of the user prompt. Return only this language and nothing else, so return ONE word."},]
              
)
user_language = response.choices[0].message.content
print(user_language)

English


In [357]:
search_prompt = f"""Given the list of languages choose one that you want to find search results in. ONLY provide the language code.

User prompt:
{user_prompt}

The list of languages:
- en
- fr
- de
- es
- lang_it
- pt-pt
- pt-br
- th
- hi
"""
response = client.chat.completions.create(
    model=model_name,
    messages=[{"role": "user",
               "content": search_prompt},]
              
)
language_code = response.choices[0].message.content
print(language_code)

pt-br


In [358]:
coun_code_prompt = f"""Given the list of countries choose one that you want to find search results in. ONLY provide the countries.

User prompt:
{user_prompt}

The list of countries:
GB, US, CA, NZ, AU, BR, FR, DE, ES, IT, PT, BR, IN
"""
response = client.chat.completions.create(
    model=model_name,
    messages=[{"role": "user",
               "content": coun_code_prompt},],
    temperature=0
)
country_code = response.choices[0].message.content
print(country_code)
response = client.chat.completions.create(
    model=model_name,
    messages=[{"role": "user",
               "content": f"Here is user prompt: {user_prompt}\n\nWhich country is the user insterested in? Only output country name and nothing else."},],
    temperature=0,
)
country_name = response.choices[0].message.content
print(country_name)

BR
Brazil


## 2. Set up RAG for scraped contents

In [375]:
contents = []
for i in range(5):
    with open(f'./scraped_website/text_{i}.txt', 'r') as f:
        contents.append(f.read())
print(len(contents))

5


In [376]:
# Split web content into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=32)
chunks_text = []
for i, content in enumerate(contents):
    chunks_text.append(splitter.split_text(content))
parser = LangchainNodeParser(splitter)
content_docs = StringIterableReader().load_data(texts=contents)
print(len(content_docs))
chunks = parser.get_nodes_from_documents(content_docs)
print(len(chunks))

5
334


In [377]:
# Set up embedding model and llm
embeddings = OpenAIEmbedding()
Settings.embed_model = embeddings
Settings.llm = client2

In [378]:
# Create Vector store and store chunks
faiss_dim = len(embeddings.get_text_embedding("Hello world"))
faiss_index = faiss.IndexFlatL2(faiss_dim)

vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

vector_index = VectorStoreIndex(chunks, storage_context=storage_context)

## 3. Extract all recommended travel spots

In [380]:
num_chunks = 100
res_str_list = []
for chunk in chunks_text:
    docs_text = f"\n{'-' * 100}\n".join([f"Chunk {i+1}:\n\n" + d for i, d in enumerate(chunk[:num_chunks])])
    context_prompt = docs_text
    loc_prompt = f"""
        User prompt:
        {user_prompt}

        Context prompt:
        {context_prompt}

        Extract at most 5 recommended travel spots the user wants to find according to the user prompt. Only mention the NAMES of the spots and nothing else. Every spot should be a physical location and NOT an event or carnival. DO NOT repeat any names. Return a python list with each element containing a tuple, (spot name, category). Category is the type of spot (eg restaurant, park, museum, zoo, shrine, statue, etc). If there are no new spots in the context prompt, output an empty list. Return only a python list.
        """
    res_str_list.append(str(client2.complete(loc_prompt)))
print(res_str_list)

['[]', '[]', '[]', "[('Shopping da Rua Siqueira Campos 143', 'Shopping'), ('Shopping da Siqueira n. 43', 'Shopping'), ('O Sol', 'Artisan Shop'), ('Casa de Artesanato do Estado do Rio de Janeiro', 'Artisan Shop'), ('Cidade do Samba', 'Cultural Space')]", '[]']


In [381]:
res_list = []
for res_str in res_str_list:
    formatted_res = ast.literal_eval(res_str)
    res_list.extend(formatted_res)
res_set = set(res_list)
print(res_set)

{('O Sol', 'Artisan Shop'), ('Cidade do Samba', 'Cultural Space'), ('Shopping da Rua Siqueira Campos 143', 'Shopping'), ('Shopping da Siqueira n. 43', 'Shopping'), ('Casa de Artesanato do Estado do Rio de Janeiro', 'Artisan Shop')}


## 4. Retrieve descriptions and reviews for every recommended spot

In [382]:
retriever = vector_index.as_retriever(similarity_top_k=2)
query_engine = RetrieverQueryEngine(retriever=retriever)

In [383]:
short_desc_map = {}
for restaurant, _ in res_list[:6]:
    retrieved_docs = retriever.retrieve(restaurant)
    retrieved_docs_text = f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + d.text for i, d in enumerate(retrieved_docs)])
    details_prompt = f"""
        User prompt:
        {user_prompt}

        Context prompt:
        {retrieved_docs_text}
        
        Give descriptions for the spot: "{restaurant}". Give a description in ten words. Translate to the user's language if necessary: {user_language}. Display only the description and nothing else.
    """
    description = str(query_engine.query(details_prompt))
    short_desc_map[restaurant] = description
    print(description)
    print('\n---\n')

Local craft shop in old Copacabana shopping center nearby metro.

---
Similar craft shop on same street, different building number.

---
Dynamic space with craft shop and art courses available.

---
Local craft shop showcasing northeastern Brazilian art and culture.

---
Cultural space showcasing local art and carnival-related souvenirs nearby.

---


In [384]:
desc_map = {}
for restaurant, _ in res_list:
    retrieved_docs = retriever.retrieve(restaurant)
    retrieved_docs_text = f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + d.text for i, d in enumerate(retrieved_docs)])
    details_prompt = f"""
        User prompt:
        {user_prompt}

        Context prompt:
        {retrieved_docs_text}
        
        Give descriptions for the spot: "{restaurant}". Give a description in one paragraph. Translate to the user's language if necessary: {user_language}. Display only the description and nothing else.
    """
    description = str(query_engine.query(details_prompt))
    desc_map[restaurant] = description
    print(description)
    print('\n---\n')

This shopping center in Copacabana is a hidden gem for those interested in local culture and art. Located at Rua Siqueira Campos 143, it's situated next to the Siqueira Campos metro station, making it easily accessible. Inside, you'll find unique shops selling unfinished wooden, MDF, and plaster pieces, just waiting for your creative touch to be completed with paint, collages, and more.

---
Shopping da Siqueira n. 43 is a shopping center located on Rua Siqueira Campos in Copacabana, Rio de Janeiro. Although it's not the primary location mentioned, it's worth noting that this shopping center also features stores selling unfinished craft items made of wood, MDF, and plaster, similar to those found at the nearby Shopping da Rua Siqueira Campos 143.

---
O Sol is a dynamic space that houses a store with various products and offers courses in handicrafts.

---
Unfortunately, the provided context does not mention "Casa de Artesanato do Estado do Rio de Janeiro". However, based on the contex

In [386]:
review_map = {}
review_rest_list = ['Shopping Cidade Copacabana', 'The Sun Crafts', 'Loja de Artesanato Arte Palha RJ', 'Samba City']
for restaurant in review_rest_list:
    with open(f"./place_details_br/{restaurant}.json", "r", encoding="utf-8") as file:
        data = json.load(file)
    reviews = data.get("reviews", [])
    review_text = "\n\n".join([r for r in reviews])
    review_prompt = f"""
        Reviews:
        {review_text}
        
        Give a summary of the reviews for the spot: {restaurant}. Translate to the user's language: {user_language}. Display ONLY the summary and nothing else. Don't say "here's the summary" or anything similar.
    """
    review_summary = str(query_engine.query(review_prompt))
    review_map[restaurant] = review_summary
    print(f"{restaurant}: Rating: {data.get('localRating')}/5\n\n")
    print(review_summary)
    print('\n---\n')

Shopping Cidade Copacabana: Rating: 4.2/5


The Shopping Cidade Copacabana is a traditional and well-located spot with a variety of shops, services, and attractions, including antique stores, art exhibitions, a theater, restaurants, cafes, and a supermarket. Reviewers praise its unique and eclectic offerings, making it an excellent destination for those seeking one-of-a-kind items for their homes or offices. The spot is also commended for its convenient location, security, and amenities, although the parking area is noted to be in need of improvement. Overall, it is a great place to find something special, enjoy a meal or coffee, and take in the local culture.

---
The Sun Crafts: Rating: 5.0/5


The Sun Crafts is a special place with a wide variety of high-quality, exclusive artisanal products, including sculptures, ceramics, textiles, and more. Reviewers praise the friendly staff, beautiful products, and the organization's social and educational work, which supports people in need. V