<a href="https://colab.research.google.com/github/yz2873/Unsupervised_Learning/blob/master/LlamaIndex_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Llama_Index Basic**

In [None]:
!ls requirements.txt

requirements.txt


In [None]:
!!pip install -r requirements.txt -q

['\x1b[?25l     \x1b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m0.0/829.7 kB\x1b[0m \x1b[31m?\x1b[0m eta \x1b[36m-:--:--\x1b[0m',
 '\x1b[2K     \x1b[91m━━━━━━\x1b[0m\x1b[91m╸\x1b[0m\x1b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m143.4/829.7 kB\x1b[0m \x1b[31m4.3 MB/s\x1b[0m eta \x1b[36m0:00:01\x1b[0m',
 '\x1b[2K     \x1b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m829.7/829.7 kB\x1b[0m \x1b[31m12.2 MB/s\x1b[0m eta \x1b[36m0:00:00\x1b[0m',
 '\x1b[?25h\x1b[?25l     \x1b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m0.0/76.5 kB\x1b[0m \x1b[31m?\x1b[0m eta \x1b[36m-:--:--\x1b[0m',
 '\x1b[2K     \x1b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m76.5/76.5 kB\x1b[0m \x1b[31m9.5 MB/s\x1b[0m eta \x1b[36m0:00:00\x1b[0m',
 '\x1b[?25h\x1b[?25l     \x1b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m0.0/7.6 MB\x1b[0m \x1b[31m?\x1b[0m eta \x1b[36m-:--:--\x1b[0m',
 '\x1b[2K     \x1b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m\x1b[91

## **Upload the OpenAI API key**

In [None]:
import os
from dotenv import load_dotenv, find_dotenv

In [None]:
load_dotenv(find_dotenv(), override=True)

True

In [None]:
import openai

In [None]:
openai.api_key = os.environ['OPENAI_API_KEY']

## **Import llama Index modules**

In [None]:
from llama_index.llms import OpenAI
from llama_index import VectorStoreIndex, SimpleDirectoryReader

In [None]:
from IPython.display import Markdown, display

## **Load the text file from 'data' directory**

In [None]:
documents = SimpleDirectoryReader('data').load_data()

In [None]:
#documents

## **Embedding the documents and create the 'index'**

In [None]:
index = VectorStoreIndex.from_documents(documents)

[nltk_data] Downloading package punkt to /tmp/llama_index...
[nltk_data]   Unzipping tokenizers/punkt.zip.


## **Create Query Engine**

In [None]:
query_engine = index.as_query_engine()

In [None]:
response = query_engine.query('What did the author do growing up?')

In [None]:
display(Markdown(f"<b>{response}</b>"))

<b>The author worked on writing and programming outside of school before college. They wrote short stories and tried writing programs on an IBM 1401 computer using an early version of Fortran. They also mentioned getting a microcomputer, specifically a TRS-80, and started programming on it.</b>

## **Download the index into local storage**

In [None]:
# This function call will download the contexts of the index in a local storage directory
index.storage_context.persist()

### As soon as you have local storage of index and you can load the index context back from local storage and no need to do VectorStoreIndex.from_document call

In [None]:
from llama_index import StorageContext, load_index_from_storage

In [None]:
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index1 = load_index_from_storage(storage_context=storage_context)

## **How to Customize**

In [None]:
from llama_index import ServiceContext, set_global_service_context

In [None]:
llm = OpenAI(model='gpt-3.5-turbo', temperature=0, max_tokens=256)

#configure service context
service_context = ServiceContext.from_defaults(llm=llm)
#service_context = ServiceContext.from_defaults(llm=llm, chunk_size=1000, chunk_overlap=20)

index = VectorStoreIndex.from_documents(documents, service_context=service_context)
#OR
#set_global_service_context(service_context)
#index = VectorStoreIndex.from_documents(documents)

In [None]:
# You need use Google PaLM model but you need to have API key for PaLM
from llama_index.llms import PaLM
service_context = ServiceContext.from_defaults(llm=PaLM())
index = VectorStoreIndex.from_documents(documents, service_context=service_context)