In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

if os.environ['OPENAI_API_KEY']:
    print("API Key is set.")

API Key is set.


In [2]:
from langchain_openai import ChatOpenAI


In [3]:
llm = ChatOpenAI(model="gpt-5-nano",temperature=0)

## **RAG IMPLEMENTATION WITH PDFs**

#### **STEP 1: Extracting Text from PDFs**

In [4]:
from langchain_community.document_loaders import PyPDFLoader


loader = PyPDFLoader("./Docs/fabric-admin.pdf")

docs = loader.load()

docs

[Document(metadata={'producer': 'Microsoft Learn PDF 1.0.25309.01', 'creator': 'Microsoft Learn', 'creationdate': '2025-12-09T19:42:08+00:00', 'title': 'fabric admin | Microsoft Learn', 'moddate': '2025-12-09T19:42:08+00:00', 'source': './Docs/fabric-admin.pdf', 'total_pages': 290, 'page': 0, 'page_label': '1'}, page_content='Tell us about your PDF experience.\nMicrosoft Fabric documentation for\nadmins\nLearn about the Microsoft Fabric admin settings, options, and tools.\nFabric in your organization\nｅOVERVIEW\nWhat is Microsoft Fabric admin?\nWhat is the admin portal?\nｂGET STARTED\nEnable Fabric for your organization\nRegion availability\nFind your Fabric home region\nｃHOW-TO GUIDE\nUnderstand Fabric admin roles\nｉREFERENCE\nGovernance documentation\nSecurity documentation\nTools and settings\nｅOVERVIEW\nAbout tenant settings\nｃHOW-TO GUIDE\nSet up git integration\nSet up item certification'),
 Document(metadata={'producer': 'Microsoft Learn PDF 1.0.25309.01', 'creator': 'Microsoft 

### **Creating own Metadata for PDF Chunks**

In [5]:
for i in docs:

    i.metadata = {"source": "fabric-admin.pdf",
                  "developer": "Microsoft"}


#### **STEP 2: Splitting the Document into CHUNKS**

In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100)

chunks = splitter.split_documents(docs)
chunks

[Document(metadata={'source': 'fabric-admin.pdf', 'developer': 'Microsoft'}, page_content='Tell us about your PDF experience.\nMicrosoft Fabric documentation for\nadmins\nLearn about the Microsoft Fabric admin settings, options, and tools.\nFabric in your organization\nｅOVERVIEW\nWhat is Microsoft Fabric admin?\nWhat is the admin portal?\nｂGET STARTED\nEnable Fabric for your organization\nRegion availability\nFind your Fabric home region\nｃHOW-TO GUIDE\nUnderstand Fabric admin roles\nｉREFERENCE\nGovernance documentation\nSecurity documentation\nTools and settings\nｅOVERVIEW\nAbout tenant settings\nｃHOW-TO GUIDE\nSet up git integration\nSet up item certification'),
 Document(metadata={'source': 'fabric-admin.pdf', 'developer': 'Microsoft'}, page_content='Configure notifications\nSet up metadata scanning\nEnable content certification\nEnable service principal authentication\nConfigure Multi-Geo support\nMonitoring and management\nｅOVERVIEW\nWhat is the admin monitoring workspace?\nｐCONCE

In [7]:
chunks[0].metadata

{'source': 'fabric-admin.pdf', 'developer': 'Microsoft'}

#### **STEP 3: Creating Embeddings for the Chunks**

In [8]:
from langchain_openai import OpenAIEmbeddings

embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

#### **STEP 4: Store Embeddings in the EXISTING Local Vector Store**

In [9]:
from langchain_community.vectorstores import Chroma

vectorstore = Chroma(persist_directory="./Vector/",
                     embedding_function=embedding_model)

  vectorstore = Chroma(persist_directory="./Vector/",


In [10]:
vectorstore.add_documents(chunks)

['8ca51445-b2c0-461e-b9fc-6d72f26c803e',
 '2ed22b02-2ffe-4c2c-8f42-940dad3eb38e',
 '952a3b93-6745-4fa3-ba0b-60606ff88140',
 '3641ca6c-2127-415e-8258-0b476a4943fd',
 '869aee40-e42b-4c65-a198-8867933afb03',
 '44865583-d751-41fa-ab1c-f0823121af71',
 '6be85019-5523-443d-99a3-2d39c7c890c1',
 '9f26343e-ba20-46f4-acd5-fb7d7be844b4',
 'da0f7b4f-20cc-4d59-bdd3-67cf4ae14f0d',
 'efcb44b9-0f0d-4852-b95e-a9e486baf708',
 '04ea9b63-77d3-4878-b13d-e174de73d758',
 'b27dc70f-633c-4520-98a2-9cdf313f046d',
 'b26d9a1e-7edc-43de-8765-4f8873b29304',
 '12cc1f46-cdda-4876-9f8d-1b229d002577',
 '1052b7d1-1430-4499-8377-e8c6861563c9',
 'ef5d8c69-1a82-4b62-be9e-ceb1cd3204ca',
 'e2f4828f-b1d5-4ff5-b5b4-62a2c945df05',
 '9c873a09-666e-40e9-bfe2-b8a05ab0a5fd',
 '221fd23e-be9f-4015-8d3f-a6537aeed28a',
 'e9b59d61-95f4-465e-9f05-64c734de1b77',
 '42bcef14-9e0a-443c-a8ea-3310b1be772a',
 '82c7af71-baa9-4a0d-af53-62b2589101ac',
 'e528e489-49b2-4f49-b948-26ba64696580',
 '7f536718-2906-42ba-8009-c4da77447f7f',
 '494ae408-6a0e-

#### **STEP 5: Semantic Search**

In [11]:
vectorstore.similarity_search("What is Microsoft Fabric admin", k= 3)

[Document(metadata={'source': 'fabric-admin.pdf', 'developer': 'Microsoft'}, page_content='What is Microsoft Fabric admin?\n07/01/2024\nMicrosoft Fabric admin is the management of the organization-wide settings that control how\nMicrosoft Fabric works. Users that are assigned to admin roles configure, monitor, and\nprovision organizational resources. This article provides an overview of admin roles, tasks, and\ntools to help you get started.\nThere are several roles that work together to administer Microsoft Fabric for your organization.\nMost admin roles are assigned in the Microsoft 365 admin portal or by using PowerShell. The\ncapacity admin roles are assigned when the capacity is created. To learn more about each of\nthe admin roles, see About admin roles. To learn how to assign admin roles, see Assign admin\nroles.\nThis section lists the Microsoft 365 admin roles and the tasks they can perform.\nGlobal administrator\nUnlimited access to all management features for the organizatio

#### **Re-Use the Vector Database**

In [4]:
vectorstore_persist = Chroma(
    persist_directory="./Vector/",
    embedding_function=embedding_model
)

  vectorstore_persist = Chroma(


In [5]:
vectorstore_persist.similarity_search("Why do we need OneLake?", k= 3)

[Document(metadata={'developer': 'Microsoft', 'source': 'fabric-onelake.pdf'}, page_content='OneLake, the OneDrive for data\nArticle• 07/25/2024\nOneLake is a single, unified, logical data lake for your whole organization. A data Lake\nprocesses large volumes of data from various sources. Like OneDrive, OneLake comes\nautomatically with every Microsoft Fabric tenant and is designed to be the single place\nfor all your analytics data. OneLake brings customers:\nOne data lake for the entire organization\nOne copy of data for use with multiple analytical engines\nBefore OneLake, it was easier for customers to create multiple lakes for different\nbusiness groups rather than collaborating on a single lake, even with the extra overhead\nof managing multiple resources. OneLake focuses on removing these challenges by\nimproving collaboration. Every customer tenant has exactly one OneLake. There can\nnever be more than one and if you have Fabric, there can never be zero. Every Fabric\ntenant au