In [1]:
from interface.inference import get_answer
# from model.inference_model import LlaMA2
from model.embedding_model import Encoder
from RAG.utils import cos_similarity
from pinecone import Pinecone
import torch
import pandas as pd
import os
from RAG.chunk_data import sliding_window
from interface.get_response import retrieve_context

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
cards = ['magician_reversed', 'empress_upright', 'moon_upright']
question = f'''Crystal, 24 year-old, single, who just left a company. She wanted to ask a question what she should do for her next job?'''

In [4]:
answer, context = get_answer(question=question, cards = cards)

Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.13s/it]


In [5]:
answer

("As the Magician Reversed appears in your reading, it suggests that you may have been relying too heavily on your logical, rational mind in your search for your next job. While this is important, it's also crucial to tap into your intuition and creativity when making decisions. Consider taking some time to meditate or engage in other mindfulness practices to connect with your inner wisdom. This will help you make decisions that are in alignment with your true desires and values.\n\n    As the Empress appears in your reading, it's a sign that abundance and pleasure are on the horizon for you. You may be feeling more creative and inspired in your work, and you may be more attractive to potential employers. Trust in your own worth and value, and don't be afraid to ask for what you want.\n\n    As the Moon appears in your reading, it's a reminder that your intuition is a powerful tool in your job search. Pay attention to your gut feelings and inner wisdom, as they may be guiding you towar

In [9]:
answer[0].split('\",')[0].strip('\"')

"As the Magician Reversed appears in your reading, it suggests that you may have been relying too heavily on your logical, rational mind in your search for your next job. While this is important, it's also crucial to tap into your intuition and creativity when making decisions. Consider taking some time to meditate or engage in other mindfulness practices to connect with your inner wisdom. This will help you make decisions that are in alignment with your true desires and values.\n\n    As the Empress appears in your reading, it's a sign that abundance and pleasure are on the horizon for you. You may be feeling more creative and inspired in your work, and you may be more attractive to potential employers. Trust in your own worth and value, and don't be afraid to ask for what you want.\n\n    As the Moon appears in your reading, it's a reminder that your intuition is a powerful tool in your job search. Pay attention to your gut feelings and inner wisdom, as they may be guiding you toward

In [3]:
# context = '''
# '#0 life. Connect with your senses through taste, touch, sound, smell and sight. 
# Draw on these senses to experience pleasure and deep fulfilment. 
# Treat yourself to a day spa, learn massage, enjoy a fine restaurant or spend more time with your partner. 
# Discover different approaches to expressing yourself creatively, be it through painting, music, drama or other art forms. 
# This may be the perfect chance to take up a new hobby that enables you to access this part of yourself. 
# The Empress signifies abundance. You are surrounded by life’s pleasures and luxuries and have everything you need to live a 
# comfortable lifestyle. You are in a period of growth, in which all you have dreamed of is now coming to fruition. 
# When The Empress appears in your Tarot readings, take a moment to reflect on the bounty that surrounds you and 
# offer gratitude for all you have created so you can continue to build on this energy and create even more abundance in 
# your life. As the Mother Earth archetype, The Empress urges you to venture out into nature to ground your energy and be in 
# flow with the earth. Take a trip to your favorite natural setting, be it a forest, beach, mountain or lake, and sit for a 
# couple of minutes, hours or even days to breathe in the energy that surrounds you while marveling in the beauty of your 
# surroundings. Allow yourself the time and the space to enter a different frame of mind and receive the grounding spirit of 
# nature into your heart and consciousness. When you do this, you can reach higher planes of consciousness. When you are in 
# tune with the energy of The Empress, you will naturally take on her mothering nature. You feel a strong urge to nurture 
# and care for others, from \n#1 relationships. The arrival of this card in a Tarot reading shows that you have a beautiful,
#  soul-honoring connection with a loved one. You may believe you have found your soul mate or life partner, 
#  and the sexual energy between you both goes way beyond instant gratification and lust to something that is very
#    spiritual and almost Tantric. While The Lovers card typically refers to a romantic tie, it can also represent a close friendship or family relationship where love, respect and compassion flow. The Lovers is a card of open communication and raw honesty. Given that the man and woman are naked, they are both willing to be in their most vulnerable states and have learned to open their hearts to one another and share their truest feelings. They shape the container from which trust and confidence can emerge, and this makes for a powerful bond between the two. In a reading, this card is a sign that by communicating openly and honestly with those you care about, you will create a harmonious and fulfilling relationship built on trust and respect. On a more personal level, The Lovers card represents getting clear about your values and beliefs. You are figuring out what you stand for and your philosophy. Having gone through the indoctrination of The Hierophant, you are now ready to establish your belief system and decide what is and what is not essential to you. It’s time to go into the big wide world and make choices for yourself, staying true to who you are and being authentic and genuine in all your endeavors. At its heart, The Lovers is about choice. The choice about who you want to be in this lifetime, 
# how you connect with others and on what level, and about what you will and won’t \n'

# '''

In [4]:
from transformers import LlamaForCausalLM, LlamaTokenizer
import torch
import re
import os

class LlaMA2:
    """
    A class for generating results given questions using a LLaMA-2 model.
    """
    def __init__(self, checkpoint, max_seq_len = 512, device='cuda:0',API_KEY=None):
        self.device = torch.device(device if torch.cuda.is_available() else 'cpu')
        self.tokenizer = LlamaTokenizer.from_pretrained(checkpoint)
        self.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
        if API_KEY is not None:
            self.model = LlamaForCausalLM.from_pretrained(checkpoint,torch_dtype=torch.float16, token = API_KEY).to(self.device)  
        else:
            self.model = LlamaForCausalLM.from_pretrained(checkpoint,torch_dtype=torch.float16).to(self.device)
        self.max_seq_len = max_seq_len

    def predict(self, text):
        inputs = self.tokenizer.encode(text, return_tensors='pt', max_length=self.max_seq_len, truncation=True)
        inputs = inputs.to(self.device)
        
        # Calculate the maximum length for the model generation
        output_max_length = len(inputs[0]) + self.max_seq_len
        
        outputs = self.model.generate(inputs, max_length=output_max_length)
        full_output =  self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        # post-process the model outputs to find a proper answer
        answer = full_output.split('Context:')[1]
        answer_prefix = "Response to the Question:"
        start = answer.find(answer_prefix)

        # Extract the answer
        if start != -1:
            start += len(answer_prefix)
            answer = answer[start:].strip()
        else:
            answer = "Please try again."
        
        return answer, full_output, text

In [5]:
def ask_question(question, context, inference_model, cards):
    """
    Generates an answer to a given question using a few-shot learning approach and provided context.

    :param question: The question to be answered.
    :param few_shots: Examples provided to guide the model in the format of few-shot learning.
    :param context: The context or background information relevant to the question.
    :param inference_model: The model used to infer or predict the answer based on the query.

    :return: The predicted answer as a string, expected to be one word among [A, B, C, D].
    """
    
    query = f"""You are an AI Clairvoyant. The retrieved context may help answer the question.

    Context: {context}

    Question: {question}

    Tarot Cards: {cards}

    Response to the Question:
    """
    
    answer = inference_model.predict(query)
    
    return answer

In [6]:
inference_model_path = "meta-llama/Llama-2-7b-chat-hf"
# embedding_model_path = 'BAAI/bge-large-en-v1.5',
embedding_model_path = inference_model_path
inference_model = LlaMA2(inference_model_path, API_KEY=os.environ.get("HF_TOKEN"),max_seq_len=512)
embedding_model = Encoder(embedding_model_path, 512)
embedding_df = pd.read_parquet("./data/embeddings.parquet")

Loading checkpoint shards: 100%|██████████| 2/2 [01:17<00:00, 38.69s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.70s/it]


OutOfMemoryError: CUDA out of memory. Tried to allocate 64.00 MiB. GPU 0 has a total capacity of 15.77 GiB of which 23.06 MiB is free. Process 13222 has 15.75 GiB memory in use. Of the allocated memory 15.44 GiB is allocated by PyTorch, and 9.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
context = retrieve_context(question, cards, embedding_df, embedding_model, top_k=2)

In [None]:
answer, full_output, text = ask_question(question, context, inference_model,cards)

In [None]:
answer

"Crystal, the combination of cards you've drawn suggests that you should take a creative and intuitive approach to finding your next job. The Magician Reversed indicates that you may have been too focused on practicality and logic in your previous job search, and it's time to tap into your inner wisdom and creativity. The Empress Upright represents abundance and nurturing, which aligns with your desire to find a fulfilling and comfortable lifestyle. The Moon Upright symbolizes intuition and the subconscious, suggesting that you should trust your gut instincts and listen to your inner voice when exploring new career opportunities.\n\nTo manifest your next job, you may want to consider exploring creative fields such as art, music, or writing. These fields align with your intuition and creativity, and can provide a sense of fulfillment and abundance. Additionally, you may want to consider taking a course or workshop to develop your skills in these areas, as this can help you tap into your

In [None]:
full_output

"You are an AI Clairvoyant. The retrieved context may help answer the question.\n\n    Context: \n'#0 life. Connect with your senses through taste, touch, sound, smell and sight. \nDraw on these senses to experience pleasure and deep fulfilment. \nTreat yourself to a day spa, learn massage, enjoy a fine restaurant or spend more time with your partner. \nDiscover different approaches to expressing yourself creatively, be it through painting, music, drama or other art forms. \nThis may be the perfect chance to take up a new hobby that enables you to access this part of yourself. \nThe Empress signifies abundance. You are surrounded by life’s pleasures and luxuries and have everything you need to live a \ncomfortable lifestyle. You are in a period of growth, in which all you have dreamed of is now coming to fruition. \nWhen The Empress appears in your Tarot readings, take a moment to reflect on the bounty that surrounds you and \noffer gratitude for all you have created so you can continu

In [None]:
text

"You are an AI Clairvoyant. The retrieved context may help answer the question.\n\n    Context: \n'#0 life. Connect with your senses through taste, touch, sound, smell and sight. \nDraw on these senses to experience pleasure and deep fulfilment. \nTreat yourself to a day spa, learn massage, enjoy a fine restaurant or spend more time with your partner. \nDiscover different approaches to expressing yourself creatively, be it through painting, music, drama or other art forms. \nThis may be the perfect chance to take up a new hobby that enables you to access this part of yourself. \nThe Empress signifies abundance. You are surrounded by life’s pleasures and luxuries and have everything you need to live a \ncomfortable lifestyle. You are in a period of growth, in which all you have dreamed of is now coming to fruition. \nWhen The Empress appears in your Tarot readings, take a moment to reflect on the bounty that surrounds you and \noffer gratitude for all you have created so you can continu

In [None]:
# post-process the model outputs to find a proper answer
answer = full_output.split('Context:')[1]
answer_prefix = "Response to the Question: "
start = answer.find(answer_prefix)

In [17]:
full_output.find("Response to the Question:")

3754

In [16]:
full_output

"You are an AI Clairvoyant. The retrieved context may help answer the question.\n\n    Context: \n'#0 life. Connect with your senses through taste, touch, sound, smell and sight. \nDraw on these senses to experience pleasure and deep fulfilment. \nTreat yourself to a day spa, learn massage, enjoy a fine restaurant or spend more time with your partner. \nDiscover different approaches to expressing yourself creatively, be it through painting, music, drama or other art forms. \nThis may be the perfect chance to take up a new hobby that enables you to access this part of yourself. \nThe Empress signifies abundance. You are surrounded by life’s pleasures and luxuries and have everything you need to live a \ncomfortable lifestyle. You are in a period of growth, in which all you have dreamed of is now coming to fruition. \nWhen The Empress appears in your Tarot readings, take a moment to reflect on the bounty that surrounds you and \noffer gratitude for all you have created so you can continu

In [2]:
import os

In [3]:
import pandas as pd
from tqdm import tqdm


def sliding_window(tarrot_data, chunck_size = 20, step_size=10):
    
    """
    Dividing text into chunks using a sliding window approach.

    :param chunck_size: The number of words in each text chunk (default is 20).
    :param step_size: The step size for the sliding window to move over the text (default is 10).

    :return: A tuple of two lists: 
             - The first list contains chunks of text.
             - The second list contains the corresponding topics for each chunk.
    """
    chunks = []
    chunks_topic =  []
    sub_topics = ['upright_keywords','reverse_keywords','description','upright','reversed']

    for i in tqdm(range(len(tarrot_data))):
        card = tarrot_data[i][1]
        upright_keywords = tarrot_data[i][2]
        reverse_keywords = tarrot_data[i][3]
        description = tarrot_data[i][4]
        upright = tarrot_data[i][5]
        reversed = tarrot_data[i][6]

        topic_upright = card + '_upright'
        text_upright = upright_keywords + ' ' + description + ' ' + upright
        words_upright = text_upright.split()
    
        for i in range(0, len(words_upright) - chunck_size + 1, step_size):
            chunks.append(" ".join(words_upright[i:i + chunck_size]))
            chunks_topic.append(topic_upright)
        
        topic_reversed = card + '_reversed'
        text_reversed = reverse_keywords + ' ' + description + ' ' + reversed
        words_reversed = text_reversed.split()
    
        for i in range(0, len(words_reversed) - chunck_size + 1, step_size):
            chunks.append(" ".join(words_reversed[i:i + chunck_size]))
            chunks_topic.append(topic_reversed)

    return chunks, chunks_topic

In [4]:
# Load the data
tarrot_data = pd.read_csv("data/tarot.csv").values.tolist()

# Create chunks of text
chunks, chunks_topic = sliding_window(tarrot_data)


100%|██████████| 78/78 [00:00<00:00, 6546.52it/s]


In [5]:
import torch
import numpy as np


def cos_similarity(a, b):
    """
    Computes the cosine similarity.
    """
    if not isinstance(a, torch.Tensor):
        a = torch.tensor(np.array(a)).float()

    if not isinstance(b, torch.Tensor):
        b = torch.tensor(np.array(b)).float()

    if len(a.shape) == 1:
        a = a.unsqueeze(0)

    if len(b.shape) == 1:
        b = b.unsqueeze(0)
    a_norm = torch.nn.functional.normalize(a, p=2, dim=1)
    b_norm = torch.nn.functional.normalize(b, p=2, dim=1)
    
    return torch.mm(a_norm, b_norm.transpose(0, 1))

In [26]:
from pinecone import Pinecone, ServerlessSpec

def store_embedding(chunks_embedding, chunks, chunks_topic, index_name, API_key):
    
    """
    Stores embeddings with their associated text and topics into a Pinecone index.
    :return: The created Pinecone index object.
    """
    pc = Pinecone(api_key=API_key)
    
    pc.create_index(
        name=index_name,
        dimension=chunks_embedding.shape[1],
        metric="cosine",
        spec=ServerlessSpec(
            cloud = "aws",
            region='us-east-1'
        ) 
    )
    
    # index data int pinecone
    index = pc.Index(index_name)
    for i in tqdm(range(len(chunks_embedding))):
        index.upsert(
            vectors=[
                {
                    'id': f'vec_{i}',
                    'values': chunks_embedding[i],
                    'metadata': {"text":chunks[i], "topic":chunks_topic[i]}
                }
            ],
        )
    return index

def retrieve_all_embedding(index, num_embed):
    """
    Retrieves embeddings and their associated metadata from a Pinecone index.

    :return: A Pandas DataFrame containing the embeddings and their associated metadata. 
             The DataFrame has columns 'id', 'values', and 'text'.
    """
    # retrieve embeddings from vector database
    embeddings_data = {"id":[], "values":[], "text":[]}
    embeddings = index.fetch([f'vec_{i}' for i in range(num_embed)])
    for i in range(num_embed):
        embeddings_data["id"].append(i)
        idx = f"vec_{i}"
        embeddings_data["text"].append(embeddings['vectors'][idx]['metadata']['text'])
        embeddings_data["values"].append(embeddings['vectors'][idx]['values'])
        
    return pd.DataFrame(embeddings_data)

In [16]:
from transformers import AutoModel, AutoTokenizer
from torch.utils.data import DataLoader
from datasets import Dataset
from tqdm import tqdm
import torch
import torch.nn.functional as F
import numpy as np




class Encoder:
    
    """
    A class for encoding text sentences into embeddings using a transformer model.
    """
    def __init__(self, checkpoint, MAX_SEQ_LEN, device="cuda:0"):
        self.device = device
        self.checkpoint = checkpoint
        self.model = AutoModel.from_pretrained(checkpoint).to(self.device).half()
        self.tokenizer = AutoTokenizer.from_pretrained(checkpoint)
        self.sentences= None
        self.MAX_SEQ_LEN = MAX_SEQ_LEN

    def transform(self, batch):
        tokens = self.tokenizer(batch["text"], truncation=True, padding=True, return_tensors="pt", max_length=self.MAX_SEQ_LEN)
        return tokens.to(self.device)  

    def get_dataloader(self, sentences, batch_size=32):
        sentences = ["Represent this sentence for searching relevant passages: " + x for x in sentences]
        self.sentences= sentences
        dataset = Dataset.from_dict({"text": sentences})
        dataset.set_transform(self.transform)
        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
        return dataloader

    def encode(self, sentences, show_progress_bar=False, batch_size=32):
        dataloader = self.get_dataloader(sentences, batch_size=batch_size)
        pbar = tqdm(dataloader) if show_progress_bar else dataloader
        embeddings = []
        for batch in pbar:
            with torch.no_grad():
                e = self.model(**batch).pooler_output
                e = F.normalize(e, p=2, dim=1)
                embeddings.append(e.detach().cpu().numpy())
        embeddings = np.concatenate(embeddings, axis=0)
        return embeddings

In [8]:
from transformers import LlamaForCausalLM, LlamaTokenizer
import torch

class LlaMA2:
    """
    A class for generating results given questions using a LLaMA-2 model.
    """
    def __init__(self, checkpoint, max_seq_len = 512, device='cuda:0'):
        self.device = torch.device(device if torch.cuda.is_available() else 'cpu')
        self.tokenizer = LlamaTokenizer.from_pretrained(checkpoint)
        self.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
        self.model = LlamaForCausalLM.from_pretrained(checkpoint,torch_dtype=torch.float16).to(self.device)  
        self.max_seq_len = max_seq_len

    def predict(self, text):
        inputs = self.tokenizer.encode(text, return_tensors='pt', max_length=self.max_seq_len, truncation=True)
        inputs = inputs.to(self.device)
        
        # Calculate the maximum length for the model generation
        output_max_length = len(inputs[0]) + self.max_seq_len
        
        outputs = self.model.generate(inputs, max_length=output_max_length)
        full_output =  self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        # post-process the model outputs to find a proper answer
        answer = full_output.split('Context:')[1]
        answer_prefix = "### Answer: "
        start = answer.find(answer_prefix)

        # Extract the answer
        if start != -1:
            start += len(answer_prefix)
            answer = answer[start:].strip()
        else:
            answer = "Answer not found."
        
        return answer

In [27]:
chunks, chunks_topic = sliding_window(tarrot_data, chunck_size = 200, step_size =50)

index_name = "astrology-bot"

embedding_model_path = 'BAAI/bge-large-en-v1.5',
inference_model_path = 'chloeliu/llama-2-7b-chat-horoscope',

print("Starting chunks embedding!")
# load LLM as embedding encoder
model = Encoder(checkpoint='BAAI/bge-large-en-v1.5', MAX_SEQ_LEN = 256)
# embed chunks 
chunks_embedding = model.encode(chunks, show_progress_bar=True)


print("Indexing chunks embedding!")
# create new index if you do not have well-build index
try:
    pc = Pinecone(api_key=os.environ['PINECONE'])
    index = pc.Index(index_name)
except:
    index = store_embedding(chunks_embedding, chunks, chunks_topic, index_name, os.environ['PINECONE'])


100%|██████████| 78/78 [00:00<00:00, 5846.14it/s]


Starting chunks embedding!


100%|██████████| 34/34 [00:03<00:00,  8.88it/s]

Indexing chunks embedding!





In [28]:
index

<pinecone.data.index.Index at 0x7f8c34929ae0>

In [30]:
df = retrieve_all_embedding(index, 1000)

KeyError: 'vec_0'

In [None]:
import argparse
import pandas as pd
from model.embedding_model import Encoder
from model.inference_model import LlaMA2
# from dataset.mmlu_data import load_data
from interface.get_response import retrieve_context, ask_question
import numpy as np
from tqdm import tqdm
import os
import sys
import torch
import random
sys.path.insert(1, os.getcwd())

def seed_everything(seed: int):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(32)


def get_answer(
    embedding_model_path = "BAAI/bge-large-en-v1.5",
    inference_model_path = "chloeliu/llama-2-7b-chat-horoscope",
    top_k = 2,
    question = None,
    ):

    """
    Retrieves an answer for a given question or computes the QA accuracy on a test dataset using LLaMA-2-7B model.

    :param embedding_model_path: Path to the embedding model.
    :param inference_model_path: Path to the LLaMA-2-7B inference model.
    :param top_k: The number of top contexts to retrieve for the question.
    :param question: The question to be answered. If None, the function computes QA accuracy on a test dataset.

    :return: If a question is provided, returns a tuple (answer, context). 
             If no question is provided, returns the QA accuracy as a float.
    """

    
    # get embedding model and LLaMA-2-&B model
    embedding_model = Encoder(embedding_model_path, 512)
    inference_model = LlaMA2(inference_model_path)
    
    # load embeddings
    embedding_df = pd.read_parquet("./data/embeddings.parquet")
    test_df, few_shots_df = load_data()
    
    
    # merge few_shot samples into prompt
    few_shots = []
    for i in range(3):
        prompt = few_shots_df.iloc[[i], :].input.values[0]
        a, b, c, d = few_shots_df.iloc[[i], 1:-1].values[0]
        answer = few_shots_df.iloc[[i], -1].values[0]
        question_shot = f"""
            Question: {prompt}\n
            ### Answer: {answer}\n
            """
        few_shots.append(question_shot)

    few_shots = " \n ".join(few_shots)


    # if a question is not provided, the function will compute QA accuracy 
    if question is None:
        responses = []
        right_answers = []
        
        for i in tqdm(range(len(test_df))):
            
            
            prompt = test_df.iloc[[i], :].input.values[0]
            a, b, c, d = test_df.iloc[[i], 1:-1].values[0]
            right_answer = test_df.iloc[[i], -1].values[0]
            
            
            right_answers.append(right_answer)


            question = f"""
                Question: {prompt}
                """
            context = retrieve_context(question, embedding_df, embedding_model, top_k)
            answer = ask_question(question, few_shots, context, inference_model)
            responses.append(answer)
            
            if right_answer == answer:
                print(prompt)
                print(context)
            
        return (np.array(right_answers) == np.array(responses)).sum() / len(right_answers)
            
    else:
        context = retrieve_context(question, embedding_df, embedding_model, top_k)
        answer = ask_question(question, few_shots, context, inference_model)
        
        return answer, context