### **Load Environment variables from .env file**

In [6]:
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import AzureOpenAI
import openai
from dotenv import load_dotenv
import os

In [7]:
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_DEPLOYMENT_NAME = os.getenv("OPENAI_DEPLOYMENT_NAME")
OPENAI_MODEL_NAME = os.getenv("OPENAI_MODEL_NAME")
OPENAI_DEPLOYMENT_VERSION = os.getenv("OPENAI_DEPLOYMENT_VERSION")

OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME = os.getenv(
    "OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME")
OPENAI_ADA_EMBEDDING_MODEL_NAME = os.getenv("OPENAI_ADA_EMBEDDING_MODEL_NAME")

# Configure OpenAI API
openai.api_type = "azure"
openai.api_version = OPENAI_DEPLOYMENT_VERSION
openai.api_base = OPENAI_DEPLOYMENT_ENDPOINT
openai.api_key = OPENAI_API_KEY

#### **Run ONLY ONCE to create the embeddings - Split text into chunks** 

In [3]:
fileName = "./data/fabric-data-engineering.pdf"
loader = PyPDFLoader(fileName)
pages = loader.load_and_split()
print("Number of pages: ", len(pages))

Number of pages:  292


#### **Run ONLY ONCE to create the embeddings - Create embeddings and save to FAISS**

In [None]:
embeddings = OpenAIEmbeddings(
    model=OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME, chunk_size=1)
db = FAISS.from_documents(documents=pages, embedding=embeddings)
# save the FAISS index to disk
db.save_local("./dbs/documentation/faiss_index")

#### **Initialize LLM**

In [11]:
llm = AzureOpenAI(deployment_name=OPENAI_DEPLOYMENT_NAME,
                  model_name=OPENAI_MODEL_NAME,
                  openai_api_base=OPENAI_DEPLOYMENT_ENDPOINT,
                  openai_api_key=OPENAI_API_KEY)

#### **Initialize retrieval API WITHOUT your data - get non specific or wrong answers**

In [16]:
from IPython.display import display, HTML, JSON, Markdown
answer = llm("How do I choose between a lakehouse and a data warehouse in Microsoft Fabric? ")
# prepare prompt
messages = [{"role": "system", "content": "You are a HELPFUL assistant answering users questions. Answer in a clear and concise manner."},
            {"role": "user", "content": "Can I use PowerBI datamart for 110 GB data volume? Explain which other options are available for data volumes of 110 GB?"}]


answer = openai.ChatCompletion.create(engine=OPENAI_DEPLOYMENT_NAME,

                                      messages=messages,)
display(HTML(answer.choices[0].message.content))

#### **Initialize retrieval API WITH your data - get the right answers**

In [17]:
embeddings = OpenAIEmbeddings(model=OPENAI_ADA_EMBEDDING_MODEL_NAME, chunk_size=1)
# load the vector store to memory
vectorStore = FAISS.load_local("./dbs/documentation/faiss_index", embeddings)
retriever = vectorStore.as_retriever(search_type="similarity", search_kwargs={
                                     "k": 2})  # returns 2 most similar vectors/documents
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=False)

#### **Ask questions**

In [19]:
qa({"query": "Can I use PowerBI datamart for 110 GB data volume? Explain which other options are available for data volumes of 110 GB?"})

{'query': 'Can I use PowerBI datamart for 110 GB data volume? Explain which other options are available for data volumes of 110 GB?',
 'result': " No, you can't use PowerBI datamart for 110 GB data volume because it has a limit of 100 GB. You can use either a data warehouse or a lakehouse for data volumes of 110 GB.<|im_end|>"}

In [8]:
qa({"query": "What are the steps to load a CSV file to a delta table in Microsoft Fabric?"})

{'query': 'What are the steps to load a CSV file to a delta table in Microsoft Fabric?',
 'result': ' 1. In Microsoft Fabric, select Synapse Data Engineering experience. 2. Make sure you are in the desired workspace or select/create one. 3. Select the Lakehouse icon under New section in the main image. Then, you can upload your CSV file and convert it to a Delta table. From there, you can generate a Dataset and create a Power BI report.<|im_end|>'}