In [1]:
import os
import requests
from dotenv import load_dotenv
from requests.auth import HTTPBasicAuth

import numpy as np
import faiss

In [2]:
# load dotenv
load_dotenv()

True

In [41]:
# Fetch confluence pages
auth = HTTPBasicAuth(os.environ.get('CONFLUENCE_USERNAME'), os.environ.get('CONFLUENCE_API_TOKEN'))
headers = {
    "Accept": "application/json",
    "Content-Type": "application/json"
}
url=f'{os.environ.get('CONFLUENCE_DOMAIN')}/wiki/api/v2/pages?body-format=storage'
response = requests.get(url, headers=headers, auth=auth)
pages = response.json()["results"]
for page in pages:
    title = page["title"]
    content = page["body"]["storage"]["value"]
    print(title, content[:200])

Software development <ac:layout><ac:layout-section ac:type="fixed-width" ac:breakout-mode="default"><ac:layout-cell><ac:structured-macro ac:name="panel" ac:schema-version="1" ac:macro-id="022d3530-ace1-4c84-88a3-009e4aac1
Template - Product requirements <ac:structured-macro ac:name="details" ac:schema-version="1" data-layout="default" ac:local-id="a750237f-a9f2-44f7-ae29-0fd5dbd51be8" ac:macro-id="d858d8e03b5e1daef385609bd282d5dd"><ac:rich-text-body>
Template - Meeting notes <h2><ac:emoticon ac:name="blue-star" ac:emoji-shortname=":calendar_spiral:" ac:emoji-id="1f5d3" ac:emoji-fallback="\uD83D\uDDD3" />&nbsp;Date</h2><p /><h2><ac:emoticon ac:name="blue-star" ac:emoji-sho
Template - Decision documentation <ac:macro ac:name="details"><ac:rich-text-body><table data-layout="default"><colgroup><col style="width: 149.0px;" /><col style="width: 577.0px;" /></colgroup><tbody><tr><th><p><strong>Status</strong>
2025-09-14 Meeting notes <h2><ac:emoticon ac:name="blue-star" ac:emoji-shortname=":c

In [17]:
#Chunk & Embed
from langchain_text_splitters import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = splitter.split_text(content)

model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(chunks)
print(embeddings)

[[-0.06894588 -0.01874877  0.00015105 ... -0.05508795  0.01305944
   0.03978699]
 [-0.03840163 -0.02341305  0.00550727 ...  0.07953945  0.01730547
   0.01879585]
 [-0.03325803  0.0348582  -0.03294918 ... -0.03169325  0.03493221
   0.02415984]
 [-0.07013027  0.03874774 -0.09290018 ... -0.01618124  0.01417076
   0.00848898]
 [-0.06507482 -0.00230296  0.0003009  ...  0.04186031  0.00080836
   0.0856195 ]]


In [19]:
#store chunks and embeddings into vector db
import chromadb

chroma_client = chromadb.Client()
collection = chroma_client.create_collection("confluence_docs")

for i, chunk in enumerate(chunks):
    collection.add(documents=[chunk], embeddings=[embeddings[i]], ids=[str(i)])

In [39]:
# Query the results from the DB
query = "can you explain me in detail what are the pages available in my confluence"
q_embedding = model.encode([query])[0]

# Retrieve
results = collection.query(query_embeddings=[q_embedding], n_results=3)

context = "\n\n".join(results["documents"][0])

prompt = f"""You are a helpful assistant. 
Answer the question using only the context below:

Context:
{context}

Question: {query}
Answer:"""

resp = requests.post('http://localhost:11434/api/generate', json={"model": "llama3.2", "prompt": prompt, "stream": False})
data = resp.json()
print(data["response"])

Unfortunately, I don't see any information about Confluence pages in the context provided. The text only mentions a Trade Banking Web Application, educational background, certifications, and contact information, but does not mention Confluence at all.

If you could provide more context or clarify what you mean by "pages available in my Confluence", I'd be happy to try and help further!
