In [1]:
from dotenv import load_dotenv

load_dotenv()

True

## Working with PDFs as images

### Approach 2: Use the entire page!

How do we mentally split PDFs? We usually think of them as a set of pages. We can do the same with PDFs, by embedding the entire page!

<img src="data/imgs/hai_ai_index_report_2025_chapter_2_34_of_80.jpg" width="200px" />
<img src="data/imgs/hai_ai_index_report_2025_chapter_2_58_of_80.jpg" width="200px" />
<img src="data/imgs/hai_ai_index_report_2025_chapter_2_69_of_80.jpg" width="200px" />

In [None]:
import weaviate
import os

client = weaviate.connect_to_embedded(
    version="1.32.5",
    headers={
        "X-Cohere-Api-Key": os.getenv("COHERE_API_KEY"),
    },
    environment_variables={
        "LOG_LEVEL": "error",                   # Reduce amount of logs
        "ENABLE_API_BASED_MODULES": "true",     # Enable API-based modules like multi2vec-cohere
    }
)

In [3]:
client.collections.delete("Pages")

{"action":"load_all_shards","build_git_commit":"08d409a988","build_go_version":"go1.25.0","build_image_tag":"HEAD","build_wv_version":"1.32.5","level":"error","msg":"failed to load all shards: context canceled","time":"2025-09-15T20:52:55+01:00"}


In [4]:
from weaviate.classes.config import Property, DataType, Configure, Tokenization

client.collections.create(
    name="Pages",
    properties=[
        Property(
            name="document_title",
            data_type=DataType.TEXT,
        ),
        Property(
            name="page_image",
            data_type=DataType.BLOB,
        ),
        Property(
            name="filename",
            data_type=DataType.TEXT,
            tokenization=Tokenization.FIELD
        ),
    ],
    vector_config=[
        # Add `Configure.Vectors.multi2vec_cohere` vector to the collection with:
        # name: "default", source properties: ["page_image"], and model: "embed-v4.0"
        # BEGIN_SOLUTION
        Configure.Vectors.multi2vec_cohere(
            name="default",
            image_fields=["page_image"],
            model="embed-v4.0"
        )
        # END_SOLUTION
    ]
)

<weaviate.collections.collection.sync.Collection at 0x128a62410>

In [5]:
pages = client.collections.get("Pages")

Load pre-computed vectors & metadata

In [6]:
import numpy as np
import json

with open("data/embeddings/hai_embeddings_metadata.json", "r") as f:
    metadata = json.load(f)

embeddings = np.load("data/embeddings/hai_image_embeddings.npy")

Import data

In [7]:
from tqdm import tqdm
from pathlib import Path
import base64

with pages.batch.fixed_size(batch_size=100) as batch:
    for i, embedding in tqdm(enumerate(embeddings)):
        filepath = Path(metadata["image_paths"][i])
        image = filepath.read_bytes()
        base64_image = base64.b64encode(image).decode('utf-8')
        obj = {
            "document_title": "Stanford HAI Report 2025",
            "page_image": base64_image,
            "filename": filepath.name
        }

        # Add object to batch for import with (batch.add_object())
        # This time, manually provide the vector with `{"default": embedding}`
        # BEGIN_SOLUTION
        batch.add_object(
            properties=obj,
            vector={"default": embedding}
        )
        # END_SOLUTION

80it [00:00, 1187.07it/s]


In [8]:
from weaviate.classes.generate import GenerativeConfig, GenerativeParameters

prompt = GenerativeParameters.grouped_task(
    prompt="What advances has there been in autonomous driving in the last few years?",
    image_properties=["page_image"],  # Property containing images in Weaviate
)


response = pages.generate.near_text(
    # Try a RAG query with:
    # query (what to search for): "How to clean the drain pump" and
    # limit (how many objects to fetch): 3
    # grouped_task (prompt): prompt defined above
    # BEGIN_SOLUTION
    query="autonomous driving advances",
    limit=3,
    grouped_task=prompt,
    # END_SOLUTION
    # Runtime definition of what generative AI model provider to use
    generative_provider=GenerativeConfig.anthropic(
        model="claude-3-5-haiku-latest"
    )
)

In [9]:
print(response.generative.text)

Based on the images, here are key advances in autonomous driving over recent years:

1. Vehicle Developments:
- Tesla unveiled the Cybercab, a two-passenger autonomous vehicle without a steering wheel or pedals, set for production in 2026
- Baidu launched its Apollo Go robotaxi, the RT6, across multiple cities in China
- Waymo has expanded operations to four major U.S. cities (Phoenix, San Francisco, Los Angeles, Austin)

2. Technological Improvements:
- New benchmarks for evaluating self-driving capabilities have been introduced, like:
  - OpenAD: First real-world, open-world autonomous driving benchmark for 3D object detection
  - Bench2Drive: A comprehensive benchmark providing more realistic, closed-loop testing simulation

3. Deployment and Scale:
- Commercial robotaxi fleets are now operating in several cities
- Waymo provides 150,000 paid rides per week, covering over a million miles
- Baidu reported 988,000 rides across China in Q3 2024, with a 20% year-over-year increase
- Chi

In [10]:
for o in response.objects:
    print(f"Filename: {o.properties['filename']}")

Filename: hai_ai_index_report_2025_chapter_2_77_of_80.jpg
Filename: hai_ai_index_report_2025_chapter_2_78_of_80.jpg
Filename: hai_ai_index_report_2025_chapter_2_76_of_80.jpg


In [11]:
client.close()

{"build_git_commit":"08d409a988","build_go_version":"go1.25.0","build_image_tag":"HEAD","build_wv_version":"1.32.5","error":"cannot find peer","level":"error","msg":"transferring leadership","time":"2025-09-15T20:53:10+01:00"}
