In [5]:
import os
import time
# import openai
import tiktoken 
import cohere
import chromadb
import tempfile
import google.generativeai as genai
from llama_index.llms import OpenAI, Gemini
from llama_index.memory import ChatMemoryBuffer
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, StorageContext, PromptHelper, LLMPredictor, load_index_from_storage
from llama_index.embeddings import OpenAIEmbedding, GeminiEmbedding
from llama_index.vector_stores import ChromaVectorStore
from llama_index.vector_stores.google.generativeai import GoogleVectorStore, set_google_config, genai_extension as genaix
from llama_index.indices.postprocessor import SentenceEmbeddingOptimizer, LLMRerank, CohereRerank, LongContextReorder
from llama_index import download_loader
from llama_index.text_splitter import TokenTextSplitter
from llama_index.node_parser import SimpleNodeParser
from flask import Flask, jsonify, flash, request, redirect, render_template, url_for, Response, stream_with_context
from google.oauth2 import service_account
import subprocess



## Configs

In [2]:
GOOGLE_API_KEY = ''
os.environ["GOOGLE_API_KEY"]  = GOOGLE_API_KEY

In [3]:
genai.configure(
    api_key=GOOGLE_API_KEY,
    client_options={"api_endpoint": "generativelanguage.googleapis.com"},
)

In [227]:
# set_google_config(api_endpoint=GOOGLE_API_KEY)

credentials = service_account.Credentials.from_service_account_file(
    "../backend/credentials.json",
    scopes=[
        "https://www.googleapis.com/auth/cloud-platform",
        "https://www.googleapis.com/auth/generative-language.retriever",
    ],
)
set_google_config(auth_credentials=credentials)

In [4]:
for m in genai.list_models():
    if "generateContent" in m.supported_generation_methods:
        print(m.name)

models/gemini-pro
models/gemini-pro-vision


In [4]:
CHROMADB_HOST = "localhost"
COHERE_RERANK_KEY = ''
ALLOWED_EXTENSIONS = {'txt', 'htm', 'html', 'pdf', 'doc', 'docx', 'ppt', 'pptx', 'csv'}


To start the chroma server, run the following in terminal:

`chroma run --path ./src/vector_db`

## Helpers

#### Custom Reader for `.yml` and `.sql` files

In [16]:
from llama_index import SimpleDirectoryReader
from llama_index.readers.base import BaseReader
from llama_index.schema import Document


class YMLReader(BaseReader):
    def load_data(self, file, extra_info=None):
        with open(file, "r") as f:
            print(file)
            text = f.read()
        # load_data returns a list of Document objects
        return [Document(text=text + "Foobar", extra_info={"filename": str(file), "file_type": ".yml"})]
        
class SQLReader(BaseReader):
    def load_data(self, file, extra_info=None):
        with open(file, "r") as f:
            print(file)
            text = f.read()
        # load_data returns a list of Document objects
        return [Document(text=text + "Foobar", extra_info={"filename": str(file), "file_type": ".sql"})]

In [208]:
def generate_vector_embedding_chroma(index_name, temp_dir):
        
    try:
        # initialize client, setting path to save data
        # db = chromadb.PersistentClient(path="./chroma_db")
        print("Connecting to Chroma database...")
        db = chromadb.HttpClient(host=CHROMADB_HOST, port=8000)
    except:
        return {'statusCode': 400, 'status': 'Could not connect to chroma database'}

    try:
        # create collection
        print("Creating vector embeddings......")
        print("Index name: ", index_name)
        start_time = time.time()
        chroma_collection = db.get_or_create_collection(
            name=index_name,
            metadata={"hnsw:space": "cosine"} # default: L2; used before: ip
            )
    except Exception as e:
        print("Error : : :", e)
        return {'statusCode': 400, 'status': 'A knowledge base with the same name already exists'}

    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

    # setup our storage (vector db)
    storage_context = StorageContext.from_defaults(
        vector_store=vector_store
    )

    llm = Gemini(api_key=GOOGLE_API_KEY, model='models/gemini-pro', temperature=0.6)
    embed_model = GeminiEmbedding(api_key=GOOGLE_API_KEY)
    node_parser = SimpleNodeParser.from_defaults(
        # text_splitter=TokenTextSplitter(chunk_size=1024, chunk_overlap=20)
        chunk_size=1024,
        chunk_overlap=20
        )

    service_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
        node_parser=node_parser,
        )

    documents = SimpleDirectoryReader(input_dir=temp_dir,
                                      file_extractor={".yml": YMLReader(), ".sql": SQLReader()} # extra custom extractor
                                    ).load_data()
    
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context,
        service_context=service_context
    )

    # temp_dir.cleanup() # delete document temp dir

    print(f"Vector embeddings created in {time.time() - start_time} seconds.")

    response = {
        'statusCode': 200,
        'status': 'Chroma embedding complete',
    }
    return response

In [245]:
def generate_vector_embedding_google(index_name, temp_dir):

    start_time = time.time()
    vector_store = GoogleVectorStore.create_corpus(corpus_id=index_name) # param:display_name

    llm = Gemini(model='models/gemini-pro', temperature=0)
    embed_model = GeminiEmbedding(api_key=GOOGLE_API_KEY)
    node_parser = SimpleNodeParser.from_defaults(
        chunk_size=1024,
        chunk_overlap=20
        )

    service_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
        node_parser=node_parser,
        )

    # documents = SimpleDirectoryReader(input_dir=temp_dir,
    #                                   file_extractor={".yml": YMLReader(), ".sql": SQLReader()} # extra custom extractor
    #                                 ).load_data()
    
    documents = SimpleDirectoryReader(input_dir=temp_dir).load_data()

    index = VectorStoreIndex.from_documents(
        documents,
        vector_store=vector_store,
        service_context=service_context
    )

    # temp_dir.cleanup() # delete document temp dir

    print(f"Vector embeddings created in {time.time() - start_time} seconds.")

    response = {
        'statusCode': 200,
        'status': 'Chroma embedding complete',
    }
    return response

In [170]:
def get_index_from_vector_db(index_name):
    
    try:
        # initialize client
        db = chromadb.HttpClient(host=CHROMADB_HOST, port=8000)
    except Exception as e:
        print('<<< get_index_from_vector_db() >>> Could not connect to database!\n', e)
        return None, None
    
    # get collection and embedding size
    try:
        chroma_collection = db.get_collection(index_name)
        doc_size = chroma_collection.count()
        print('Computing knowledge base size...', doc_size)
    except Exception as e:
        print(e)
        return None, None

    start_time = time.time()
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Experimented settings for large docs versus small. Don't change except you have tested extensively!!!
    # context_window=65000 if doc_size>200 else 16384
    context_window=32000 if doc_size>300 else 16000
    embed_model = GeminiEmbedding(api_key=GOOGLE_API_KEY)
    llm = Gemini(api_key=GOOGLE_API_KEY, model='models/gemini-pro', temperature=0)
    print_msg = "Using Gemini Pro..."
    print(print_msg)

    # node_parser = SimpleNodeParser.from_defaults(
    #     text_splitter=TokenTextSplitter(chunk_size=1024, chunk_overlap=20)
    #     )

    service_context = ServiceContext.from_defaults(
        llm=llm,
        context_window=context_window, 
        embed_model=embed_model,
        chunk_size=1024,
        chunk_overlap=20
    )

    print('Retrieving knowledge base index from ChromaDB...')
    index = VectorStoreIndex.from_vector_store(
        vector_store=vector_store, 
        storage_context=storage_context,
        service_context=service_context
    )

    print(f'Index retrieved from ChromaDB in {time.time() - start_time} seconds.')
    return index, doc_size

In [315]:
def get_index_from_vector_db_google(index_name):
    
    start_time = time.time()
    vector_store = GoogleVectorStore.from_corpus(corpus_id=index_name)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # doc_size = len(list(genaix.get_document(corpus_id=index_name, client=credentials)))
    # print(doc_size)

    # context_window=32000 if doc_size>300 else 16000
    embed_model = GeminiEmbedding(api_key=GOOGLE_API_KEY)
    llm = Gemini(api_key=GOOGLE_API_KEY, model='models/gemini-pro', temperature=0)
    print_msg = "Using Gemini Pro..."
    print(print_msg)
 
    # node_parser = SimpleNodeParser.from_defaults(
    #     text_splitter=TokenTextSplitter(chunk_size=1024, chunk_overlap=20)
    #     )

    service_context = ServiceContext.from_defaults(
        llm=llm,
        # context_window=context_window, 
        embed_model=embed_model,
        chunk_size=1024,
        chunk_overlap=20
    )

    print('Retrieving knowledge base index from Google vector store...')
    index = VectorStoreIndex.from_vector_store(
        vector_store=vector_store, 
        storage_context=storage_context,
        service_context=service_context
    )

    doc_size = len(list(index.index_struct.nodes_dict.values()))
    print(doc_size)

    print(f'Index retrieved from ChromaDB in {time.time() - start_time} seconds.')
    return index, doc_size

In [10]:
def parse_choice_select_answer_fn(
    answer: str, num_choices: int, raise_error: bool = False
):
    """Default parse choice select answer function."""
    answer_lines = answer.split("\n")
    # print(answer_lines)
    answer_nums = []
    answer_relevances = []
    for answer_line in answer_lines:
        line_tokens = answer_line.split(",")
        if len(line_tokens) != 2:
            if not raise_error:
                continue
            else:
                raise ValueError(
                    f"Invalid answer line: {answer_line}. "
                    "Answer line must be of the form: "
                    "answer_num: <int>, answer_relevance: <float>"
                )
        if len(line_tokens[0].split(":"))>1 and line_tokens[0].split(":")[1].strip().isdigit():
            answer_num = int(line_tokens[0].split(":")[1].strip())
            if answer_num > num_choices:
                continue
            answer_nums.append(answer_num)
            answer_relevances.append(float(line_tokens[1].split(":")[1].strip()))
    # print(answer_nums)
    return answer_nums, answer_relevances

In [9]:
def postprocessor_args(doc_size):
    if doc_size<30:
        return None
    
    print('Optimising context information...')
    
    # fastest postprocessor
    cohere_rerank = CohereRerank(api_key=COHERE_RERANK_KEY, top_n=30)

    # slower postprocessor
    embed_model = GeminiEmbedding(api_key=GOOGLE_API_KEY)
    service_context = ServiceContext.from_defaults(llm=None, embed_model=embed_model, chunk_size=256, chunk_overlap=20) # use llama_index default MockLLM (faster)

    rank_postprocessor = LLMRerank(
        choice_batch_size=10, top_n=100,
        service_context=service_context,
        parse_choice_select_answer_fn=parse_choice_select_answer_fn
    ) \
        if doc_size>100 \
            else None
    
    # node postprocessors run in the specified order
    node_postprocessors = [
        rank_postprocessor,
        cohere_rerank,
    ] \
        if doc_size>100 \
            else [cohere_rank]

    return node_postprocessors

In [331]:
def answer_query_stream(query, index_name, chat_history, prompt_style):

    index, doc_size = get_index_from_vector_db(index_name)
    # prompt_header = prompt_style()

    if index is None:
        response = "Requested information not found"
        return response
    else:
        node_postprocessors = postprocessor_args(doc_size)
        # similarity_top_k = 50 if doc_size>500 else 10
        similarity_top_k = 200 if doc_size>200 else doc_size
        chat_engine = index.as_chat_engine(chat_mode="context", 
                                            memory=chat_history,
                                            system_prompt=prompt_style, 
                                            similarity_top_k=similarity_top_k,
                                            verbose=True, 
                                            # streaming=True,
                                            function_call="query_engine_tool",
                                            node_postprocessors=node_postprocessors
                                            )

        message_body = f"""\nUse the tool to answer:\n{query}\n"""
        response = chat_engine.chat(message_body)
        # print(get_formatted_sources(response) if response.source_nodes else None)
        
        if response is None:
            print("Index retrieved but cannot stream response...")
            chat_response = "I'm sorry I couldn't find an answer to the requested information in your knowledge base. Please rephrase your question and try again."
            # for token in chat_response.split():
            #     print(token, end=" ")
            #     yield f"""{token} """
            return chat_response
        else:
            print('Starting response stream...\n...........................\n...........................')
            # return response.response
            # for token in response.response_gen:
            #     print(token, end="")
            #     yield f"""{token}"""
            return response.response

In [12]:

def get_formatted_sources(response, length=100, trim_text=True) -> str:
    """Get formatted sources text."""
    from llama_index.utils import truncate_text
    texts = []
    for source_node in response.source_nodes:
        fmt_text_chunk = source_node.node.get_content()
        if trim_text:
            fmt_text_chunk = truncate_text(fmt_text_chunk, length)
        # node_id = source_node.node.node_id or "None"
        node_id = source_node.node.metadata['page_label'] or "None"
        source_text = f"> Source (Page no: {node_id}): {fmt_text_chunk}"
        texts.append(source_text)
    return "\n\n".join(texts)

In [141]:
def semantic_prompt_style(): 

    prompt_header = f"""Your name is Alpha, a highly intelligent system for conversational business intelligence.
    Your task is to use the provided knowledege base, containing semantic models of a business dataset,
    to determine if my question can be answered based on the semantic knowledge. 
    If the semantic knowledge contains the parameter(s) useful for answering my question, respond with a Yes, and No otherwise.
    """

    return prompt_header   

In [328]:
def query_gen_prompt_style(): 

    with open("./assistants/mf_few_shot.txt", "r") as f:
        # print(file)
        few_shot_examples = f.read()

    prompt_header = f"""Your name is Alpha, a highly intelligent system for 
    conversational business intelligence. As an SQL expert, your goal is to use the provided knowledege base, 
    containing metrics and semantic models of a business dataset, to generate a MetricFlow query command needed to answer the question.
    The general syntax for the MetricFlow query command is:
    `query --metrics <measure(s)> --group-by <table-1__dimension-1, table-1__dimension-2,..., table-n__dimension-n> --limit <int> --order <table__dimension> --where <condition>`
    
    To generate the correct MetricFlow query command, determine the following:
    1. Which metrics are needed to answer the question?
    2. Which tables contain the required data?
    3. Which dimensions in the tables contain the required data?
    4. Do the results need to be filtered by a specific condition?
    5. Do the results need to be ordered by a specific dimension?
    6. Is there a limit on the number of records requested?

    Here's some examples of how a MetricFlow query command is generated using the information in the knowledge base.
    {few_shot_examples}

    The measures, dimensions, tables and other parameters referenced in the above examples are 
    obtained from the knowledege base provided below. Following these examples, generate a single-line query command that answers the question. 
    Return only this command or return "Null" if the provided information is not sufficient to answer the question.
    """

    return prompt_header   

In [391]:
def returned_data_prompt_style():

    prompt_header = f"""Your name is Alpha, a highly intelligent system for 
    conversational business intelligence. Your task is to use the entirety of the 
    provided data, which has been fetched from a database, to answer my question.
    """

    return prompt_header   

In [383]:
def llm_run_query(llm_query_input: str):
    output = subprocess.run("mf "+ str(llm_query_input), 
                            cwd="./semantics/",
                            # shell=True,
                            # text=True,
                            capture_output=True,)
    print(output.stdout)
    return output.stdout

In [375]:
def fetch_data(user_query, llm_query_input):
    from llama_index.schema import Document

    data = llm_run_query(llm_query_input)
    doc = Document(text=str(data))
    # doc.doc_id = "llm-output"
    # node_parser = SimpleNodeParser.from_defaults(
    #     chunk_size=1024,
    #     chunk_overlap=20
    #     )

    service_context = ServiceContext.from_defaults(
        llm=Gemini(model='models/gemini-pro', temperature=0),
        embed_model=GeminiEmbedding(),
        # node_parser=node_parser,
        chunk_size=1024,
        chunk_overlap=20
        )
    
    index = VectorStoreIndex.from_documents(doc, service_context=service_context)

    chat_engine = index.as_chat_engine(chat_mode="context", 
                                        memory=chat_history,
                                        system_prompt=returned_data_prompt_style(), 
                                        similarity_top_k=30,
                                        verbose=True, 
                                        # streaming=True,
                                        function_call="query_engine_tool",
                                        )

    message_body = f"""\nUse the tool to answer:\n{user_query}\n"""
    response = chat_engine.chat(message_body)
    
    if response is None:
        print("Index retrieved but couldn't stream response...")
        chat_response = "I'm sorry I couldn't find an answer to the requested information in your knowledge base. Please rephrase your question and try again."
        # for token in chat_response.split():
        #     print(token, end=" ")
        #     yield f"""{token} """
        return chat_response
    else:
        print('Starting response stream...\n...........................\n...........................')
        return response.response

## Embedding

#### Semantic layer embedding

In [320]:
project_name = "semantic-layer-6"
index_name = project_name.lower() #+ '_embeddings'

In [321]:
generate_vector_embedding(index_name, './semantics/models/semantic_models/')

Connecting to Chroma database...
Creating vector embeddings......
Index name:  semantic-layer-6
semantics\models\semantic_models\customers.yml
semantics\models\semantic_models\metricflow_time_spine.sql
semantics\models\semantic_models\orders.yml
semantics\models\semantic_models\products.yml
('filename', 'semantics\\models\\semantic_models\\customers.yml')
('file_type', '.yml')
('_node_content', '{"id_": "330aee08-4639-47f2-9690-e07c190f2e36", "embedding": null, "metadata": {"filename": "semantics\\\\models\\\\semantic_models\\\\customers.yml", "file_type": ".yml"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "f17a0c27-60a2-4e7c-bc67-0e34c5b96ad8", "node_type": "4", "metadata": {"filename": "semantics\\\\models\\\\semantic_models\\\\c

{'statusCode': 200, 'status': 'Chroma embedding complete'}

In [322]:
generate_vector_embedding_google(index_name, './semantics/models/semantic_models/')

ResourceExhausted: 429 Project has the maximum number of Corpora (5).

#### Metric layer embedding

In [103]:
project_name = "metric_layer"
index_name = project_name.lower() + '_embeddings'

In [91]:
generate_vector_embedding(index_name, './semantics/models/metrics/')

Connecting to Chroma database...
Creating vector embeddings......
Index name:  metric_layer_embeddings
semantics\models\metrics\discount_amount_average.yml
semantics\models\metrics\discount_amount_total.yml
semantics\models\metrics\price_average.yml
semantics\models\metrics\price_total.yml
semantics\models\metrics\product_cost_average.yml
semantics\models\metrics\product_cost_total.yml
semantics\models\metrics\quantity_average.yml
semantics\models\metrics\quantity_total.yml
semantics\models\metrics\revenue_average.yml
semantics\models\metrics\revenue_total.yml
('filename', 'semantics\\models\\metrics\\discount_amount_average.yml')
('file_type', '.yml')
('_node_content', '{"id_": "11388b69-5857-4ae0-af93-ab89040171d3", "embedding": null, "metadata": {"filename": "semantics\\\\models\\\\metrics\\\\discount_amount_average.yml", "file_type": ".yml"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "exclud

{'statusCode': 200, 'status': 'Chroma embedding complete'}

In [108]:
generate_vector_embedding(index_name, './semantics/models/marts/')

Connecting to Chroma database...
Creating vector embeddings......
Index name:  metric_layer_embeddings
semantics\models\marts\customers.sql
semantics\models\marts\orders.sql
semantics\models\marts\products.sql
('filename', 'semantics\\models\\marts\\customers.sql')
('file_type', '.sql')
('_node_content', '{"id_": "5cbc9e54-2cb2-4e3b-9b89-5481fa69e991", "embedding": null, "metadata": {"filename": "semantics\\\\models\\\\marts\\\\customers.sql", "file_type": ".sql"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "1864f422-3536-4520-a0ee-b3502ade74a6", "node_type": "4", "metadata": {"filename": "semantics\\\\models\\\\marts\\\\customers.sql", "file_type": ".sql"}, "hash": "7f9e93ad9eaa4628059a72fd5a30c3bd3c92af45eb810c41c7fa5ae29280598d",

{'statusCode': 200, 'status': 'Chroma embedding complete'}

#### Semantic and metric layers embedding

In [199]:
project_name = "semantic_metric_layer"
index_name = project_name.lower() + '_embeddings'

In [147]:
for i in ["marts","metrics","semantic_models"]:
    generate_vector_embedding(index_name, f'./semantics/models/{i}/')

Connecting to Chroma database...
Creating vector embeddings......
Index name:  semantic_metric_layer_embeddings
semantics\models\marts\customers.sql
semantics\models\marts\orders.sql
semantics\models\marts\products.sql
('filename', 'semantics\\models\\marts\\customers.sql')
('file_type', '.sql')
('_node_content', '{"id_": "126c0ec9-1157-448e-b50d-bc1e1ab036ae", "embedding": null, "metadata": {"filename": "semantics\\\\models\\\\marts\\\\customers.sql", "file_type": ".sql"}, "excluded_embed_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "excluded_llm_metadata_keys": ["file_name", "file_type", "file_size", "creation_date", "last_modified_date", "last_accessed_date"], "relationships": {"1": {"node_id": "c4ee99b2-11e6-449c-975d-1195d11bc436", "node_type": "4", "metadata": {"filename": "semantics\\\\models\\\\marts\\\\customers.sql", "file_type": ".sql"}, "hash": "7f9e93ad9eaa4628059a72fd5a30c3bd3c92af45eb810c41c7fa5ae29

## Retrieval and Chat

In [332]:
def message_thread(memory, reset=None):
    if reset:
        new_conversation_state = init_chat_history()
        return new_conversation_state
    return memory

def init_chat_history():
    new_conversation_state = ChatMemoryBuffer.from_defaults(token_limit=50000)
    return new_conversation_state


chat_history = init_chat_history()

In [333]:
query = "total revenue?"
answer_query_stream(query, index_name, chat_history, semantic_prompt_style())
    # for token in response.response_gen:
    # print(token, end="")

Computing knowledge base size... 17
Using Gemini Pro...
Retrieving knowledge base index from ChromaDB...
Index retrieved from ChromaDB in 1.403778076171875 seconds.
Starting response stream...
...........................
...........................


'Yes'

## Function Calling

In [380]:
chat_history = init_chat_history()

In [399]:
project_name = "semantic_metric_layer"
index_name = project_name.lower() + '_embeddings'

query = 'how much did we make in January?'
mf_command = answer_query_stream(query, index_name, chat_history, query_gen_prompt_style())
mf_command

Computing knowledge base size... 17
Using Gemini Pro...
Retrieving knowledge base index from ChromaDB...
Index retrieved from ChromaDB in 2.2684946060180664 seconds.
Starting response stream...
...........................
...........................


'query --metrics revenue_total --group-by sales_key__order_date --where "strftime(\'%m\', sales_key__order_date) = \'01\'"'

In [386]:
fetch_data(query, mf_command)

b'\r\r- Initiating query\xe2\x80\xa6\r\r\\ Initiating query\xe2\x80\xa6\r\r| Initiating query\xe2\x80\xa6\r\r/ Initiating query\xe2\x80\xa6\r\r- Initiating query\xe2\x80\xa6\r\r\\ Initiating query\xe2\x80\xa6\r\r| Initiating query\xe2\x80\xa6\r\r/ Initiating query\xe2\x80\xa6\r\r- Initiating query\xe2\x80\xa6\r\r\\ Initiating query\xe2\x80\xa6\r\r| Initiating query\xe2\x80\xa6\r\r/ Initiating query\xe2\x80\xa6\r\r- Initiating query\xe2\x80\xa6\r\r\\ Initiating query\xe2\x80\xa6\r\r| Initiating query\xe2\x80\xa6\r\r/ Initiating query\xe2\x80\xa6\r\r- Initiating query\xe2\x80\xa6\r\r\\ Initiating query\xe2\x80\xa6\r\r| Initiating query\xe2\x80\xa6\r\r/ Initiating query\xe2\x80\xa6\r\r- Initiating query\xe2\x80\xa6\r\r\\ Initiating query\xe2\x80\xa6\r\r| Initiating query\xe2\x80\xa6\r\r/ Initiating query\xe2\x80\xa6\r\r- Initiating query\xe2\x80\xa6\r\r\\ Initiating query\xe2\x80\xa6\r\r| Initiating query\xe2\x80\xa616:48:46  BigQuery adapter: https://console.cloud.google.com/bigquery?pro

AttributeError: 'tuple' object has no attribute 'get_doc_id'

## Evaluation

In [3]:
import os
os.environ["OPENAI_API_KEY"] = ""

In [4]:
from trulens_eval import Feedback, Tru, TruLlama
from trulens_eval.feedback import Groundedness
from trulens_eval.feedback.provider.openai import OpenAI

tru = Tru()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


In [405]:
def answer_query_stream(query, index_name):

    index, doc_size = get_index_from_vector_db(index_name)

    # node_postprocessors = postprocessor_args(doc_size)
    similarity_top_k = 200 if doc_size>200 else doc_size
    chat_engine = index.as_chat_engine(chat_mode="context", 
                                        system_prompt=query_gen_prompt_style(), 
                                        similarity_top_k=similarity_top_k,
                                        verbose=True, 
                                        # streaming=True,
                                        function_call="query_engine_tool",
                                        )

    message_body = f"""\nUse the tool to answer:\n{query}\n"""
    response = chat_engine.chat(message_body)
    return index, response.response

In [402]:
query = 'how any orders have been shipped'
# mf_command = answer_query_stream(query, "semantic_metric_layer_embeddings", chat_history, query_gen_prompt_style())

index, response = answer_query_stream(query, 'semantic_metric_layer_embeddings')
print(response)

Computing knowledge base size... 17
Using Gemini Pro...
Retrieving knowledge base index from ChromaDB...
Index retrieved from ChromaDB in 1.0494778156280518 seconds.
query --metrics quantity_total --group-by order_date --where "ship_date IS NOT NULL"


In [403]:
import numpy as np

# Initialize provider class
openai = OpenAI()

grounded = Groundedness(groundedness_provider=OpenAI())

# Define a groundedness feedback function
f_groundedness = Feedback(grounded.groundedness_measure_with_cot_reasons).on(
    TruLlama.select_source_nodes().node.text.collect()
    ).on_output(
    ).aggregate(grounded.grounded_statements_aggregator)

# Question/answer relevance between overall question and answer.
f_qa_relevance = Feedback(openai.relevance).on_input_output()

# Question/statement relevance between question and each context chunk.
f_qs_relevance = Feedback(openai.qs_relevance).on_input().on(
    TruLlama.select_source_nodes().node.text
    ).aggregate(np.mean)

✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In qs_relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In qs_relevance, input statement will be set to __record__.app.query.rets.source_nodes[:].node.text .


In [404]:
tru_query_engine_recorder = TruLlama(query_engine,
    app_id='LlamaIndex_App1',
    feedbacks=[f_groundedness, f_qa_relevance, f_qs_relevance])



In [413]:
# or as context manager
query_engine = index.as_query_engine(system_prompt=query_gen_prompt_style(),
                                        similarity_top_k=10,
                                        function_call="query_engine_tool",)
with tru_query_engine_recorder as recording:
    query_engine.query("how any orders have been shipped?")

A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x2c177c93370 is calling an instrumented method <function BaseQueryEngine.query at 0x000002C168B75BD0>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x2c174543d30) using this function.


A new object of type <class 'llama_index.query_engine.retriever_query_engine.RetrieverQueryEngine'> at 0x2c177c93370 is calling an instrumented method <function RetrieverQueryEngine.retrieve at 0x000002C16D620DC0>. The path of this call may be incorrect.
Guessing path of new object is app based on other object (0x2c174543d30) using this function.
A new object of type <class 'llama_index.indices.vector_store.retrievers.retriever.VectorIndexRetriever'> at 0x2c175a84340 is calling an instrumented method <function BaseRetriever.retrieve at 0x000002C168B74E50>. The path of this call may be incorrect.
Guessing path of new object is app.retriever based on other object (0x2c174543340) using this function.
A new object of type <class 'llama_index.response_synthesizers.compact_and_refine.CompactAndRefine'> at 0x2c175b5f0d0 is calling an instrumented method <function BaseSynthesizer.synthesize at 0x000002C1698593F0>. The path of this call may be incorrect.
Guessing path of new object is app._resp

In [414]:
tru.get_records_and_feedback(app_ids=[])[0] 

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,relevance,groundedness_measure_with_cot_reasons,qs_relevance,relevance_calls,groundedness_measure_with_cot_reasons_calls,qs_relevance_calls,latency,total_tokens,total_cost
0,LlamaIndex_App1,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_22762836f0873b6288489f235c54e5bf,"""how any orders have been shipped?""","""This context does not mention anything about ...",-,"{""record_id"": ""record_hash_22762836f0873b62884...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2023-12-22T18:17:24.780966"", ""...",2023-12-22T18:17:31.556437,1.0,0.0,0.22,[{'args': {'prompt': 'how any orders have been...,[{'args': {'source': ['WITH products AS (\n ...,[{'args': {'question': 'how any orders have be...,6,0,0.0
1,LlamaIndex_App1,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_8ecd255e7ca1d761622b4e5e6c1efe72,"""how any orders have been shipped?""","""This context does not mention anything about ...",-,"{""record_id"": ""record_hash_8ecd255e7ca1d761622...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2023-12-22T18:17:38.947006"", ""...",2023-12-22T18:17:45.110991,1.0,0.0,0.2,[{'args': {'prompt': 'how any orders have been...,[{'args': {'source': ['WITH products AS (\n ...,[{'args': {'question': 'how any orders have be...,6,0,0.0
2,LlamaIndex_App1,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_be64e3d6ec56d48f87ecce7f72ec5e59,"""how any orders have been shipped?""","""This context does not mention anything about ...",-,"{""record_id"": ""record_hash_be64e3d6ec56d48f87e...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2023-12-22T18:24:51.393517"", ""...",2023-12-22T18:24:58.720887,1.0,,,[{'args': {'prompt': 'how any orders have been...,,,7,0,0.0


In [None]:
'''query --metrics revenue_total,quantity_total --group-by order_date --where "order_date >= DATE('now', '-7 days')"'''
'''query --metrics quantity_total,revenue_total --group-by product__product_name --limit 10 --order -quantity_total --where "product__sell_start_date >= '2022-12-01'"'''

In [66]:
import subprocess

In [74]:
output = subprocess.run("mf query --metrics revenue_total --group-by metric_time --start-time '2017-08-01' --end-time '2017-08-31' >" +  "../retrieval/data/output.txt", 
                cwd="../semantics/",
                shell=True, text=True,
                capture_output=True)

In [73]:
print(output.stdout)




In [60]:
decompose_metricflow_command("mf query --metrics revenue_total --group-by revenue_total,metric_time --start-time '2017-08-01' --end-time '2017-08-31' --where 'product_order>6' ")

<re.Match object; span=(9, 32), match='--metrics revenue_total'>


{'metrics': ['revenue_total'],
 'dimensions': ['revenue_total', 'metric_time'],
 'start_time': '2017-08-01',
 'end_time': '2017-08-31',
 'where': 'product_order>6'}

In [61]:
from metricflow import MetricFlowClient as mfc
from warnings import f

In [52]:
import re

def decompose_metricflow_command(command_string):
   """Decomposes a MetricFlow command string into input parameters for a query function.

   Args:
       command_string: The MetricFlow command string to parse.

   Returns:
       A dictionary containing the extracted parameters:
           metrics: A list of metrics.
           dimensions: A list of dimensions.
           limit: An integer for the limit (if present).
           start_time: The start time (if present).
           end_time: The end time (if present).
           where: The WHERE clause (if present).
           order: The ORDER clause (if present).
   """

   params = {}

   # Extract metrics
   metrics_match = re.search(r"--metrics (\S+)", command_string)
   print(metrics_match)
   if metrics_match:
       params["metrics"] = metrics_match.group(1).split(",")

   # Extract dimensions
   dimensions_match = re.search(r"--group-by (\S+)", command_string)
   if dimensions_match:
       params["dimensions"] = dimensions_match.group(1).split(",")

   # Extract limit
   limit_match = re.search(r"--limit (\d+)", command_string)
   if limit_match:
       params["limit"] = int(limit_match.group(1))

   # Extract start_time
   start_time_match = re.search(r"--start-time '(\S+)'", command_string)
   if start_time_match:
       params["start_time"] = start_time_match.group(1)

   # Extract end_time
   end_time_match = re.search(r"--end-time '(\S+)'", command_string)
   if end_time_match:
       params["end_time"] = end_time_match.group(1)

   # Extract where clause
   where_match = re.search(r"--where '(.+)'", command_string)
   if where_match:
       params["where"] = where_match.group(1)

   # Extract order clause
   order_match = re.search(r"--order (\S+)", command_string)
   if order_match:
       params["order"] = order_match.group(1)

   return params

In [435]:
llm_query_input = "query --metrics quantity_total --group-by sales_key__order_date --start-time '2017-08-01' --end-time '2017-08-30'"
output = subprocess.run("mf "+ llm_query_input, 
                        cwd="./semantics/",
                        shell=True,
                        text=True,
                        capture_output=True)

In [436]:
print(output.stdout)




- Initiating query…

\ Initiating query…

| Initiating query…

/ Initiating query…

- Initiating query…

\ Initiating query…

| Initiating query…

/ Initiating query…

- Initiating query…

\ Initiating query…

| Initiating query…

/ Initiating query…

- Initiating query…

\ Initiating query…

| Initiating query…

/ Initiating query…

- Initiating query…

\ Initiating query…

| Initiating query…

/ Initiating query…

- Initiating query…

\ Initiating query…

| Initiating query…

/ Initiating query…

- Initiating query…

\ Initiating query…

| Initiating query…

/ Initiating query…
v Success 🦄 - query completed after 3.49 seconds
| sales_key__order_date__day   |   quantity_total |
|:-----------------------------|-----------------:|
| 2017-08-02                   |                8 |
| 2017-08-03                   |                6 |
| 2017-08-04                   |                9 |
| 2017-08-05                   |                7 |
| 2017-08-06                   |               12 