[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vectara/example-notebooks/blob/main/notebooks/retrieval-demo.ipynb)

In [None]:
from langchain.embeddings import OpenAIEmbeddings, CohereEmbeddings
from langchain.chat_models.openai import ChatOpenAI

from llama_index import ServiceContext
from llama_index.llms import OpenAI
from llama_index.node_parser import SentenceWindowNodeParser
from llama_index import VectorStoreIndex
from llama_index.indices import VectaraIndex
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
from llama_index.schema import Document

from deep_translator import GoogleTranslator
from functools import cache
from ratelimiter import RateLimiter

import pandas as pd
from IPython.display import display
import time
import json
import requests

from pathlib import Path
from llama_hub.file.unstructured.base import UnstructuredReader

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Setup

In [2]:
file_name = '../data/llama2.pdf'
llm = ChatOpenAI(model_name='gpt-3.5-turbo-16k', temperature=0.0)

In [3]:
# default pyPDF fails miserably on llama2.pdf so we're using custom code to load the text properly
def read_file(fname):
    loader = UnstructuredReader()
    documents = loader.load_data(file=Path(fname))
    return documents
    
def get_index(documents, embedding):
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=3,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )
    ctx = ServiceContext.from_defaults(llm=llm, embed_model=embedding, 
                                       node_parser=node_parser)
    nodes = node_parser.get_nodes_from_documents(documents)
    index = VectorStoreIndex(nodes, service_context=ctx)
    return index

def get_vectara_index(documents):
    index = VectaraIndex.from_documents(documents)
    return index

def get_answer(index, query):
    query_engine = index.as_query_engine(similarity_top_k=5,
        node_postprocessors=[
            MetadataReplacementPostProcessor(target_metadata_key="window")
        ],
    )
    response = str(query_engine.query(query))
    return response

def get_vectara_answer(index, query):
    response = str(index.as_query_engine(similarity_top_k=5, n_sentences_before=3, n_sentences_after=3).query(query))
    return response

def get_context(index, query_tr):
    query_engine = index.as_query_engine(similarity_top_k=5,
        node_postprocessors=[
            MetadataReplacementPostProcessor(target_metadata_key="window")
        ],
    )
    response = query_engine.query(query)
    txt_and_window = [(t.node.metadata['original_text'], t.node.metadata['window']) for t in response.source_nodes]
    return txt_and_window

def get_vectara_context(index, query_tr):
    response = index.as_query_engine(similarity_top_k=5, n_sentences_before=1, n_sentences_after=1).query(query)
    return response

In [4]:
rate_limiter = RateLimiter(max_calls=1, period=2)

@cache
def translate(s, lang):
    with rate_limiter:
        return GoogleTranslator(source='auto', target=lang).translate(s)

In [5]:
queries = [
    'What learning rate was used for pre-training?',
    'Was RLHF used?',
    'which models are released for commercial use?',
    'was red teaming used?', 
]

In [6]:
languages = {
    'en': 'English',
    'iw': 'Hebrew',
    'tr': 'Turkish',
    'ar': 'Arabic',
    'de': 'German',
    'ur': 'Urdu',
    'ja': 'Japanese',
}

In [7]:
documents = read_file(file_name)

openai_index = get_index(documents, OpenAIEmbeddings())
cohere_index = get_index(documents, CohereEmbeddings())
vectara_index = get_vectara_index(documents)

[nltk_data] Downloading package punkt to /Users/ofer/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/ofer/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [8]:
data = []

for lang in languages.keys():
    for q in queries:

        time.sleep(5)
        if lang != 'en':
            q_tr = translate(q, lang)
        else:
            q_tr = q
        print(f"Lang = {lang}, query = {q_tr}")
                
        r = get_answer(openai_index, q_tr)
        data.append(['openai', lang, q_tr, r])

        r = get_answer(cohere_index, q_tr)
        data.append(['cohere', lang, q_tr, r])

        r = get_vectara_answer(vectara_index, q_tr)
        data.append(['vectara', lang, q_tr, r])

df = pd.DataFrame(data, columns = ['vendor', 'lang', 'question', 'answer'])

Lang = en, query = What learning rate was used for pre-training?
Lang = en, query = Was RLHF used?
Lang = en, query = which models are released for commercial use?
Lang = en, query = was red teaming used?
Lang = iw, query = באיזה קצב למידה נעשה שימוש בהכשרה מוקדמת?
Lang = iw, query = האם נעשה שימוש ב-RLHF?
Lang = iw, query = אילו דגמים משוחררים לשימוש מסחרי?
Lang = iw, query = האם נעשה שימוש בצוות אדום?
Lang = tr, query = Ön eğitim için hangi öğrenme oranı kullanıldı?
Lang = tr, query = RLHF kullanıldı mı?
Lang = tr, query = Hangi modeller ticari kullanıma sunuluyor?
Lang = tr, query = kırmızı takım kullanıldı mı?
Lang = ar, query = ما هو معدل التعلم الذي تم استخدامه للتدريب المسبق؟
Lang = ar, query = هل تم استخدام RLHF؟
Lang = ar, query = ما هي النماذج التي تم إصدارها للاستخدام التجاري؟
Lang = ar, query = هل تم استخدام الفريق الأحمر؟
Lang = de, query = Welche Lernrate wurde für das Vortraining verwendet?
Lang = de, query = Wurde RLHF verwendet?
Lang = de, query = Welche Modelle sind f

In [9]:
df.to_csv('retrieval-demo.csv', index=False)

In [10]:
def summary_per_lang(lang: str):
    df1 = df[df['lang']==lang].drop('lang', axis=1)
    df2 = df1.pivot(index='question', columns='vendor', values='answer')
    for col in ['cohere', 'openai', 'vectara']:
        df2[col] = df2[col].map(lambda x: translate(x, 'en'))
    return df2

In [11]:
summary_per_lang('en')

vendor,cohere,openai,vectara
question,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Was RLHF used?,"Yes, RLHF (Reinforcement Learning from Human Feedback) was used in the study mentioned in the context information.","Yes, RLHF (Reinforcement Learning from Human Feedback) was used in the study mentioned in the context information.","Yes, RLHF (Reinforcement Learning from Human Feedback) was used in the earlier versions of the model, up to RLHF V3."
What learning rate was used for pre-training?,The learning rate used for pre-training is a constant learning rate of 10^-6.,The learning rate used for pre-training is not mentioned in the given context information.,The learning rate used for pre-training was 5 × 10−6 for the 70B parameter Llama 2-Chat and 1 × 10−5 for the rest.
was red teaming used?,"Yes, red teaming was used in the study.","Yes, red teaming was used as a proactive risk identification method in the development of the LLMs.","Yes, red teaming was used in the process."
which models are released for commercial use?,Llama 2 is the model that has been released for commercial use.,Llama 2 is the model that is released for commercial use.,Llama 2 is the model that is released for commercial use.


In [12]:
summary_per_lang('iw')

vendor,cohere,openai,vectara
question,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
אילו דגמים משוחררים לשימוש מסחרי?,The context information does not provide any specific information about released models for commercial use.,"Noam Shazeer's ""Fast transformer decoding: One write-head is all you need"" from 2019 and ""Glu variants improve transformer"" from 2020, Gabriel Synnaeve's ""Growing up together: Structured exploration for large action spaces"" from 2019, Yarden Tal, Inbal Magar, and Roy Schwartz's ""Fewer errors, but more stereotypes? The effect of model size on gender bias"" from 2020b, Rico Sennrich, Barry Haddow, and Alexandra Birch's ""Neural machine translation of rare words with subword units"" from 2016, Uri Shaham, Elad Segal, Maor Ivgi, Avia Efrat, Ori Yoran, Adi Haviv, Ankit Gupta, Wenhan Xiong, Mor Geva, Jonathan Berant, and Omer Levy's ""SCROLLS: Standardized CompaRison over long language sequences"" from 2022, Jacob Austin, Augustus Odena, Maxwell Nye, Maarten Bosma, Henryk Michalewski, David Dohan, Ellen Jiang, Carrie Cai, Michael Terry, Quoc Le, and Charles Sutton's ""Program synthesis with large language models"" from 2021, and David Autor and Anna Salomons' ""Is automation labor-displacing? Productivity growth, employment, and the labor share"" are all released for commercial use.",Llama 2 is the model that is released for commercial use.
באיזה קצב למידה נעשה שימוש בהכשרה מוקדמת?,"According to the information in the context, there is no specific information on the rate of use of early training.",The rate at which pretraining is used is not mentioned in the given information.,The rate used in pretraining is a cosine learning rate with an initial learning rate of 2 × 10−5.
האם נעשה שימוש ב-RLHF?,"Yes, there is usage of RLHF mentioned in the context information.","Yes, RLHF (Rejection Sampling fine-tuning) is one of the algorithms explored in the context.","Yes, RLHF (Reinforcement Learning from Human Feedback) is mentioned in the context information. It is described as a powerful strategy for fine-tuning Large Language Models (LLMs) and has been used to align the models' responses more closely with human expectations and preferences. RLHF has been applied in various applications and has shown improvements in the performance of LLMs."
האם נעשה שימוש בצוות אדום?,It is not possible to determine whether a red team was used based on the given information.,"According to the information in the context, there is no specific information or answer to the question of whether a red team was used.","Yes, a red team was used during the process. The Red Team includes more than 350 people, including experts in cyber, election riots, social media information bombardment, law, policy, civil rights, ethics, software engineering, machine learning, AI credibility, and creative writing. The team also includes people from a diverse demographic of social class, gender, ethnicity, and race."


In [13]:
summary_per_lang('tr')

vendor,cohere,openai,vectara
question,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Hangi modeller ticari kullanıma sunuluyor?,It is not stated which models are available for commercial use.,Llama 2 model is being released to the general public for research and commercial use.,Llama 2 and Llama 2-Chat models are available for commercial use.
RLHF kullanıldı mı?,"Yes, RLHF was used in the study mentioned in the context information.","Yes, RLHF (Reinforcement Learning from Human Feedback) was used in the study mentioned in the context information.","Yes, RLHF (Reinforcement Learning from Human Feedback) was used in the described research. The authors trained successive versions of RLHF models, referred to as RLHF-V1, RLHF-V2, RLHF-V3, RLHF-V4, and RLHF-V5. They explored RLHF fine-tuning using two main algorithms: Proximal Policy Optimization (PPO) and Rejection Sampling fine-tuning."
kırmızı takım kullanıldı mı?,There is no information in the given context about whether the red team was used or not.,"Yes, the red team was used.","Yes, the red team was used."
Ön eğitim için hangi öğrenme oranı kullanıldı?,No information is given about the learning rate.,The learning rate used for pretraining is not specified.,10−6


In [14]:
query = queries[2]

c = get_context(openai_index, query)
print("--- OPENAI ---")
print('\n**\n'.join([t[0] for t in c]))

c = get_context(cohere_index, query)
print("--- COHERE ---")
print('\n**\n'.join([t[0] for t in c]))

c = get_vectara_context(vectara_index, query)
print("--- VECTARA ---")
print('\n**\n'.join([t.text.replace('\n', ' ') for t in c.source_nodes]))

--- OPENAI ---
3

We are releasing the following models to the general public for research and commercial use‡:

1. 
**
2 models and others open-source models.


**
Lastly, openly releasing these models consolidates costs and eliminates barriers to entry, allowing small businesses to leverage innovations in LLMs to explore and build text-generation use cases. 
**
Progression of Models. 
**
Collaboration will make these models better and safer. 
--- COHERE ---
2 models and others open-source models.


**
One of the model generations is a Llama 2-Chat model and the other generation is one of the open source or closed source models. 
**
3

We are releasing the following models to the general public for research and commercial use‡:

1. 
**
Lastly, openly releasing these models consolidates costs and eliminates barriers to entry, allowing small businesses to leverage innovations in LLMs to explore and build text-generation use cases. 
**
(2023), as well as closed-source models (Chat- GPT (

In [15]:
query = translate(queries[2], 'iw')

c = get_context(openai_index, query)
print("--- OPENAI ---")
print('\n**\n'.join([t[0] for t in c]))

c = get_context(cohere_index, query)
print("--- COHERE ---")
print('\n**\n'.join([t[0] for t in c]))

c = get_vectara_context(vectara_index, query)
print("--- VECTARA ---")
print('\n**\n'.join([t.text.replace('\n', ' ') for t in c.source_nodes]))

--- OPENAI ---
Noam Shazeer. 
**
Noam Shazeer. 
**
Yarden Tal, Inbal Magar, and Roy Schwartz. 
**
Uri Shaham, Elad Segal, Maor Ivgi, Avia Efrat, Ori Yoran, Adi Haviv, Ankit Gupta, Wenhan Xiong, Mor Geva, Jonathan Berant, and Omer Levy. 
**
David Autor and Anna Salomons. 
--- COHERE ---
Uri Shaham, Elad Segal, Maor Ivgi, Avia Efrat, Ori Yoran, Adi Haviv, Ankit Gupta, Wenhan Xiong, Mor Geva, Jonathan Berant, and Omer Levy. 
**
Kilem L. Gwet. 
**
Yarden Tal, Inbal Magar, and Roy Schwartz. 
**
Noam Shazeer. 
**
Noam Shazeer. 
--- VECTARA ---
We also share novel observations we made during the development of Llama 2 and Llama 2-Chat, such as the emergence of tool usage and temporal organization of knowledge. 3  We are releasing the following models to the general public for research and commercial use‡:  1. Llama 2, an updated version of Llama 1, trained on a new mix of publicly available data.
**
We also share novel observations we made during the development of Llama 2 and Llama 2-Chat, s

In [16]:
query = translate(queries[2], 'tr')

c = get_context(openai_index, query)
print("--- OPENAI ---")
print('\n**\n'.join([t[0] for t in c]))

c = get_context(cohere_index, query)
print("--- COHERE ---")
print('\n**\n'.join([t[0] for t in c]))

c = get_vectara_context(vectara_index, query)
print("--- VECTARA ---")
print('\n**\n'.join([t.text.replace('\n', ' ') for t in c.source_nodes]))

--- OPENAI ---
2 models and others open-source models.


**
If applicable, the model can advise on legal alternatives. 
**
Model cards for model reporting. 
**
3

We are releasing the following models to the general public for research and commercial use‡:

1. 
**
Progression of Models. 
--- COHERE ---
2 models and others open-source models.


**
For Vicuna models, we use vicuna-13b-delta-v1.1 and vicuna-33b-delta-v1.3 models from lmsys. 
**
Mirac Suzgun, Nathan Scales, Nathanael Schärli, Sebastian Gehrmann, Yi Tay, Hyung Won Chung, Aakanksha Chowdhery, Quoc V Le, Ed H Chi, Denny Zhou, et al. 
**
For Falcon models, we use the Falcon-40B-Instruct model which is a chat/instruct model. 
**
All model weights were obtained from HuggingFace.


--- VECTARA ---
We also share novel observations we made during the development of Llama 2 and Llama 2-Chat, such as the emergence of tool usage and temporal organization of knowledge. 3  We are releasing the following models to the general public for 