In [1]:
from langchain_community.vectorstores.vdms import VDMS_Client

In [2]:
from pathlib import Path

import requests

# Folder with pdf and extracted images
datapath = Path("./multimodal_files").resolve()
datapath.mkdir(parents=True, exist_ok=True)

pdf_url = "https://www.infoblox.com/wp-content/uploads/infoblox-deployment-infoblox-rest-api.pdf"
pdf_path = str(datapath / pdf_url.split("/")[-1])
# with open(pdf_path, "wb") as f:
#     f.write(requests.get(pdf_url).content)

In [3]:
pdf_path = str(datapath / pdf_url.split("/")[-1])

In [4]:
pdf_path
#conda install -c conda-forge poppler

'C:\\Users\\redzh\\Documents\\myStuff\\Tests\\PDF_IMG_RAG\\multimodal_files\\infoblox-deployment-infoblox-rest-api.pdf'

In [6]:
import os
from unstructured.partition.pdf import partition_pdf

# Set the TESSDATA_PREFIX environment variable
os.environ['TESSDATA_PREFIX'] = r'C:\Users\redzh\Downloads'

raw_pdf_elements = partition_pdf(
    filename=pdf_path,
    extract_images_in_pdf=True,
    infer_table_structure=True,
    new_after_n_chars=3800,
    combine_text_under_n_chars=2000,
    image_output_dir_path=datapath,
)

datapath = str(datapath)

Some weights of the model checkpoint at microsoft/table-transformer-structure-recognition were not used when initializing TableTransformerForObjectDetection: ['model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [7]:
import unstructured

tables = []
texts = []
for element in raw_pdf_elements:
    if isinstance(element, unstructured.documents.elements.Table):
        tables.append(element)
    elif isinstance(element, unstructured.documents.elements.Text) or isinstance(element, unstructured.documents.elements.NarrativeText) or isinstance(element, unstructured.documents.elements.Title):
        texts.append(element)


In [9]:
from langchain_community.vectorstores.vdms import VDMS_Client
#! docker run --rm -d -p 55559:55555 --name vdms_rag_nb intellabs/vdms:latest
vdms_client = VDMS_Client(port=55559)

In [10]:
import os
import logging

from langchain_community.vectorstores import VDMS
from langchain_experimental.open_clip import OpenCLIPEmbeddings

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

logger.info("Creating OpenCLIPEmbeddings instance...")
# Create OpenCLIPEmbeddings instance
clip_embeddings = OpenCLIPEmbeddings(
    model_name="ViT-g-14", checkpoint="laion2b_s34b_b88k"
)
logger.info("OpenCLIPEmbeddings instance created successfully.")

logger.info("Creating VDMS...")
# Create VDMS
vectorstore = VDMS(
    client=vdms_client,
    collection_name="mm_rag_clip_photos",
    embedding=clip_embeddings
)

logger.info("VDMS created successfully.")

In [11]:
files = os.listdir(datapath)
logger.info(f"Found {len(files)} files in datapath.")

logger.info("Filtering for .jpg files...")

jpg_files = []
for file in files:
    if file.endswith(".jpg"):
        jpg_files.append(file)
        logger.debug(f"Added {file} to jpg_files list.")
    else:
        logger.debug(f"Skipped non-jpg file: {file}")
logger.info(f"Found {len(jpg_files)} .jpg files.")

try:
    image_uris = [os.path.join(datapath, jpg_file) for jpg_file in jpg_files]
    logger.info(f"Created {len(image_uris)} image URIs.")
except Exception as e:
    logger.error(f"Error creating image URIs: {str(e)}")
    image_uris = []

logger.info("Sorting image URIs...")
image_uris = sorted(image_uris)
logger.info("Image URIs sorted.")
logger.info(f"Found {len(image_uris)} image URIs with .jpg extension.")


In [12]:
# Add images
if image_uris:
    vectorstore.add_images(uris=image_uris)
    logger.info(f"Added {len(image_uris)} images to vectorstore.")
else:
    logger.warning("No image URIs found. Skipping adding images to vectorstore.")

if texts:
    logger.info("Adding documents to vectorstore...")
    text_contents = [text.text for text in texts]  # Extract text content
    vectorstore.add_texts(texts=text_contents)
    logger.info("Documents added to vectorstore successfully.")
else:
    logger.warning("No texts found. Skipping adding documents to vectorstore.")

logger.info("Creating retriever...")
# Make retriever
retriever = vectorstore.as_retriever()
logger.info("Retriever created successfully.")

In [13]:
## RAG

# `vectorstore.add_images` will store / retrieve images as base64 encoded strings.

import base64
from io import BytesIO

from PIL import Image


def resize_base64_image(base64_string, size=(128, 128)):
    """
    Resize an image encoded as a Base64 string.

    Args:
    base64_string (str): Base64 string of the original image.
    size (tuple): Desired size of the image as (width, height).

    Returns:
    str: Base64 string of the resized image.
    """
    # Decode the Base64 string
    img_data = base64.b64decode(base64_string)
    img = Image.open(BytesIO(img_data))

    # Resize the image
    resized_img = img.resize(size, Image.LANCZOS)

    # Save the resized image to a bytes buffer
    buffered = BytesIO()
    resized_img.save(buffered, format=img.format)

    # Encode the resized image to Base64
    return base64.b64encode(buffered.getvalue()).decode("utf-8")


def is_base64(s):
    """Check if a string is Base64 encoded"""
    try:
        return base64.b64encode(base64.b64decode(s)) == s.encode()
    except Exception:
        return False


def split_image_text_types(docs):
    """Split numpy array images and texts"""
    images = []
    text = []
    for doc in docs:
        doc = doc.page_content  # Extract Document contents
        if is_base64(doc):
            # Resize image to avoid OAI server error
            images.append(
                resize_base64_image(doc, size=(250, 250))
            )  # base64 encoded str
        else:
            text.append(doc)
    return {"images": images, "texts": text}

In [14]:
from langchain_community.llms.ollama import Ollama
from langchain_core.messages import HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough


def prompt_func(data_dict):
    # Joining the context texts into a single string
    formatted_texts = "\n".join(data_dict["context"]["texts"])
    messages = []

    # Adding image(s) to the messages if present
    if data_dict["context"]["images"]:
        image_message = {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{data_dict['context']['images'][0]}"
            },
        }
        messages.append(image_message)

    # Adding the text message for analysis
    text_message = {
        "type": "text",
        "text": (
            "As an expert in Infoblox and API  and Python, your task is to design and document APIs, "
            "ensuring they are comprehensive, accurate, and user-friendly. You will use Python to script "
            "interactions with these APIs and validate their functionality. Your documentation should include:\n"
            "- Detailed descriptions of API endpoints, including parameters, request bodies, and responses.\n"
            "- Examples of Python scripts that interact with these endpoints.\n"
            "- Explanation of authentication methods used by the API.\n"
            "- Best practices for error handling and versioning in API design.\n\n"
            f"User-provided keywords: {data_dict['question']}\n\n"
            "API Documentation and Python Scripts:\n"
            f"{formatted_texts}"
        ),
    }
    messages.append(text_message)
    return [HumanMessage(content=messages)]



def multi_modal_rag_chain(retriever):
    """Multi-modal RAG chain"""

    # Multi-modal LLM
    llm_model = Ollama(
        verbose=True, temperature=0.5, model="llava", base_url="http://localhost:11434"
    )

    # RAG pipeline
    chain = (
        {
            "context": retriever | RunnableLambda(split_image_text_types),
            "question": RunnablePassthrough(),
        }
        | RunnableLambda(prompt_func)
        | llm_model
        | StrOutputParser()
    )

    return chain

In [17]:


query = "What is a network?"
docs = retriever.invoke(query, k=10)


In [18]:
docs

[Document(page_content='Add a network', metadata={'id': '9c0b290d-326e-44a7-b18e-68fae5e26948'}),
 Document(page_content='Search for a network', metadata={'id': 'c0257225-c797-44cb-8808-c2271d80979f'}),
 Document(page_content='Search for a network', metadata={'id': 'a8d2e8a7-38c7-4f54-8854-6cb5f2299727'})]

In [19]:
chain = multi_modal_rag_chain(retriever)
response = chain.invoke(query)
print(response)

 Sure, I can help with that. Here's an example of API documentation and Python scripts for Infoblox:

API Documentation:
-------------

### Add a network

#### Request

* Method: POST
* Endpoint: /networks
* Parameters:
	+ name (required): The name of the network.
	+ ip\_range (required): The IP range of the network.
	* Example request body:
```json
{
    "name": "My Network",
    "ip_range": "10.0.0/24"
}
```
#### Response

* Status code: 201 Created
* Body: The newly created network object.

### Search for a network

#### Request

* Method: GET
* Endpoint: /networks
* Parameters:
	+ q (optional): A search query.
	* Example request:
```bash
https://<hostname>/wapi/v1.0/networks?q=My Network
```
#### Response

* Status code: 200 OK
* Body: An array of network objects that match the search criteria.

Authentication Method:
---------------------

Infoblox uses API keys for authentication. To obtain an API key, you need to create a user account on the Infoblox appliance and generate an AP

In [None]:
#! docker kill vdms_rag_nb