# Notebook ① – Data Preparation & Index Build

Prerequisites  
```bash
pip install pandas pyarrow llama-index chromadb sentence-transformers
```

In [1]:
import glob, json, os, pandas as pd
from pathlib import Path

from llama_index.core import Document, ServiceContext
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.embeddings.utils import resolve_embed_model


DATA_DIR = Path('data')
PICKUP_FILES = glob.glob(str(DATA_DIR/'pickup'/'*.csv'))
DELIVERY_FILES = glob.glob(str(DATA_DIR/'delivery'/'*.csv'))
print(f'Found {len(PICKUP_FILES)} pickup + {len(DELIVERY_FILES)} delivery CSVs')

Found 5 pickup + 5 delivery CSVs


In [2]:
def load_csvs(files):
    dfs = [pd.read_csv(f) for f in files]
    return pd.concat(dfs, ignore_index=True)

pickup_df = load_csvs(PICKUP_FILES)
delivery_df = load_csvs(DELIVERY_FILES)
df = pd.concat([pickup_df, delivery_df], ignore_index=True)
print(df.head())

   order_id  region_id       city  courier_id     accept_time  \
0    483671          3  Chongqing        1518  08-14 07:57:00   
1   1746131          3  Chongqing        4706  10-09 07:46:00   
2   2301722          3  Chongqing        4706  10-09 13:57:00   
3   3788723          3  Chongqing        4706  05-19 08:13:00   
4    713435          3  Chongqing        4706  05-22 08:16:00   

  time_window_start time_window_end        lng       lat  aoi_id  ...  \
0    08-14 09:00:00  08-14 11:00:00  106.46877  29.47204     218  ...   
1    10-09 09:00:00  10-09 11:00:00  106.46872  29.47200     218  ...   
2    10-09 13:57:00  10-09 15:57:00  106.46869  29.47191     218  ...   
3    05-19 11:00:00  05-19 13:00:00  106.46878  29.47208     218  ...   
4    05-22 09:00:00  05-22 11:00:00  106.46813  29.47228     218  ...   

   pickup_gps_lng pickup_gps_lat accept_gps_time  accept_gps_lng  \
0             NaN            NaN             NaN             NaN   
1             NaN            NaN  

In [3]:
# Normalise
# Step 1: Clean and restore string timestamps
df['accept_time'] = df['accept_time'].astype(str).str.strip()

# Step 2: Add year and parse as datetime
df['accept_time'] = pd.to_datetime(
    '2023-' + df['accept_time'],
    format='%Y-%m-%d %H:%M:%S',
    errors='coerce',
    utc=True
)

# Step 3: Optional ISO string version
df['accept_time_iso'] = df['accept_time'].astype(str)

# Step 4: Normalize courier_id
df['courier_id'] = df['courier_id'].astype(str).str.zfill(6)

# Step 5: Sort
df = df.sort_values('accept_time').reset_index(drop=True)


In [4]:
print(df['accept_time'].isna().sum())

0


In [5]:
print(df['accept_time'].astype(str).head(10).tolist())

['2023-04-16 12:33:00+00:00', '2023-04-24 08:18:00+00:00', '2023-04-24 13:05:00+00:00', '2023-04-26 08:13:00+00:00', '2023-04-26 08:37:00+00:00', '2023-04-26 08:40:00+00:00', '2023-04-26 08:41:00+00:00', '2023-04-27 08:38:00+00:00', '2023-04-27 08:49:00+00:00', '2023-04-27 10:55:00+00:00']


In [9]:
print("🟢 Parsed rows:", df['accept_time'].notna().sum())
print("🔴 Failed rows:", df['accept_time'].isna().sum())

🟢 Parsed rows: 10650808
🔴 Failed rows: 0


In [7]:
# Chunk into courier-day docs
grouped = df.groupby([df['courier_id'], df['accept_time'].dt.date])
docs = []
for (cid, day), grp in grouped:
    docs.append(
        Document(
            text=grp.to_json(orient='records'),
            metadata={'courier': cid, 'date': str(day)}
        )
    )
print(f'Created {len(docs):,} documents')    

Created 594,603 documents


In [8]:
print(docs[0].metadata)
print(json.loads(docs[0].text)[0])  # preview 1st row in first doc

{'courier': '000000', 'date': '2023-06-20'}
{'order_id': 4054541, 'region_id': 29, 'city': 'Shanghai', 'courier_id': '000000', 'accept_time': 1687279140000, 'time_window_start': None, 'time_window_end': None, 'lng': 121.41333, 'lat': 31.25387, 'aoi_id': 27388, 'aoi_type': 1, 'pickup_time': None, 'pickup_gps_time': None, 'pickup_gps_lng': None, 'pickup_gps_lat': None, 'accept_gps_time': '06-20 16:39:00', 'accept_gps_lng': 121.40005, 'accept_gps_lat': 31.25108, 'ds': 620, 'delivery_time': '06-20 18:39:00', 'delivery_gps_time': '06-20 18:39:00', 'delivery_gps_lng': 121.39989, 'delivery_gps_lat': 31.25093, 'accept_time_iso': '2023-06-20 16:39:00+00:00'}


In [19]:
import json

json_docs = [
    {"text": d.text, "metadata": d.metadata} for d in docs
]
with open("docs_594k.json", "w", encoding="utf-8") as f:
    json.dump(json_docs, f, ensure_ascii=False)

print("✅ Saved documents to docs_594k.json")


✅ Saved documents to docs_594k.json


In [1]:
import json
from llama_index.core import Document

with open("docs_594k.json", "r", encoding="utf-8") as f:
    json_docs = json.load(f)

# Recreate Document objects
docs = [Document(text=entry["text"], metadata=entry["metadata"]) for entry in json_docs]


In [2]:
import torch

if torch.cuda.is_available():
    print("✅ CUDA GPU available:", torch.cuda.get_device_name(0))
else:
    print("❌ GPU not available, using CPU")


✅ CUDA GPU available: NVIDIA GeForce RTX 5060 Ti


In [3]:
# --- Imports & Config ---
import json
import os
import time
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings, VectorStoreIndex, Document
from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb

# --- Load saved documents ---
with open("docs_594k.json", "r", encoding="utf-8") as f:
    json_docs = json.load(f)
docs = [Document(text=d["text"], metadata=d["metadata"]) for d in json_docs]

# --- Set up HuggingFace embedding with GPU + large batch ---
embed_model = HuggingFaceEmbedding(
    model_name="intfloat/e5-base-v2",
    embed_batch_size=256,
    device="cuda"  # ✅ explicitly use GPU
)
Settings.embed_model = embed_model
Settings.chunk_size = 1024

# --- Set up Chroma vector store ---
client = chromadb.PersistentClient(path="lade_chroma")
collection = client.get_or_create_collection("lade")
vector_store = ChromaVectorStore(chroma_collection=collection)

# --- Resume tracking ---
batch_size = 10000  # Smaller chunk for test speed; raise to 50_000 later
total_chunks = (len(docs) + batch_size - 1) // batch_size
checkpoint_file = "embedding_progress.json"

# --- Load completed chunks ---
if os.path.exists(checkpoint_file):
    with open(checkpoint_file, "r") as f:
        completed = set(json.load(f))
else:
    completed = set()



  from .autonotebook import tqdm as notebook_tqdm


> **Next**: open `SyntheticDataset_Creator.ipynb` to build the gold Q‑A set.

In [4]:
with tqdm(total=total_chunks, desc="Embedding chunks", leave=True) as pbar:
    pbar.update(len(completed))  # skip completed

    for i in range(0, len(docs), batch_size):
        chunk_id = i // batch_size
        if chunk_id in completed:
            continue

        chunk = docs[i:i + batch_size]

        start = time.time()
        VectorStoreIndex.from_documents(
            chunk,
            vector_store=vector_store,
            show_progress=True      # 👈 inner progress bar
        )
        elapsed = time.time() - start

        # ✅ Save progress
        completed.add(chunk_id)
        with open(checkpoint_file, "w") as f:
            json.dump(sorted(completed), f)

        pbar.update(1)
        print(f"✅ Finished chunk {chunk_id} ({i}–{i+len(chunk)}) in {elapsed/60:.2f} min")


Parsing nodes: 100%|██████████| 10000/10000 [00:44<00:00, 225.25it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.12it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:44<00:00, 45.58it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:44<00:00, 45.57it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.39it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.41it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.39it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.38it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.31it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.31it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.01it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 44.92it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 44.96it/s]
Generating embeddings: 100%|██

✅ Finished chunk 20 (200000–210000) in 20.25 min


Parsing nodes: 100%|██████████| 10000/10000 [00:55<00:00, 181.27it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.32it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.33it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.34it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.30it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.29it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.27it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.26it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.25it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.26it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.21it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.15it/s]
Generating embeddings: 100%|██

✅ Finished chunk 21 (210000–220000) in 24.84 min


Parsing nodes: 100%|██████████| 10000/10000 [00:49<00:00, 203.83it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.07it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.15it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.09it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.11it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.28it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.09it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.03it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.06it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.10it/s]
Generating embeddings: 100%|██

✅ Finished chunk 22 (220000–230000) in 22.42 min


Parsing nodes: 100%|██████████| 10000/10000 [00:52<00:00, 191.34it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.07it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.29it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.17it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.11it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.15it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.08it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.17it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.07it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.04it/s]
Generating embeddings: 100%|██

✅ Finished chunk 23 (230000–240000) in 23.01 min


Parsing nodes: 100%|██████████| 10000/10000 [00:46<00:00, 213.25it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.07it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.27it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.11it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.21it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.15it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.17it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.09it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.09it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.11it/s]
Generating embeddings: 100%|██

✅ Finished chunk 24 (240000–250000) in 21.13 min


Parsing nodes: 100%|██████████| 10000/10000 [00:46<00:00, 214.34it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.22it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.26it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.32it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.17it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.25it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.23it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.20it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.13it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.17it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.14it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.08it/s]
Generating embeddings: 100%|██

✅ Finished chunk 25 (250000–260000) in 21.12 min


Parsing nodes: 100%|██████████| 10000/10000 [00:52<00:00, 190.02it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.13it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.26it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.23it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.13it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.14it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.25it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.14it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.17it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.21it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.15it/s]
Generating embeddings: 100%|██

✅ Finished chunk 26 (260000–270000) in 23.36 min


Parsing nodes: 100%|██████████| 10000/10000 [00:49<00:00, 200.83it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.01it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.28it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.25it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.26it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.26it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.23it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.27it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.13it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.25it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:46<00:00, 44.00it/s]
Generating embeddings: 100%|██

✅ Finished chunk 27 (270000–280000) in 22.70 min


Parsing nodes: 100%|██████████| 10000/10000 [00:44<00:00, 225.15it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.20it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.29it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.28it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.22it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.22it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.23it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.22it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██

✅ Finished chunk 28 (280000–290000) in 20.34 min


Parsing nodes: 100%|██████████| 10000/10000 [00:48<00:00, 206.83it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.25it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.36it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.31it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.26it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.23it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.23it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.28it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.29it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.26it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██

✅ Finished chunk 29 (290000–300000) in 21.86 min


Parsing nodes: 100%|██████████| 10000/10000 [00:49<00:00, 203.56it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.00it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.22it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.22it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.26it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.22it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.15it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.28it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.25it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.14it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.15it/s]
Generating embeddings: 100%|██

✅ Finished chunk 30 (300000–310000) in 22.20 min


Parsing nodes: 100%|██████████| 10000/10000 [00:43<00:00, 227.49it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.12it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.28it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.25it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.29it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.23it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.15it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.20it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.13it/s]
Generating embeddings: 100%|██

✅ Finished chunk 31 (310000–320000) in 20.38 min


Parsing nodes: 100%|██████████| 10000/10000 [00:45<00:00, 220.55it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.07it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.22it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.11it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.15it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.08it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.06it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.17it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.09it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██

✅ Finished chunk 32 (320000–330000) in 20.99 min


Parsing nodes: 100%|██████████| 10000/10000 [00:37<00:00, 267.12it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.22it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.27it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.21it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.23it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.21it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.20it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.11it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██

✅ Finished chunk 33 (330000–340000) in 17.98 min


Parsing nodes: 100%|██████████| 10000/10000 [00:43<00:00, 230.01it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.14it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.35it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.28it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.23it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.29it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.07it/s]
Generating embeddings: 100%|██

✅ Finished chunk 34 (340000–350000) in 19.91 min


Parsing nodes: 100%|██████████| 10000/10000 [00:33<00:00, 298.56it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.11it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.33it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.30it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.12it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.22it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.13it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.11it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.14it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.10it/s]
Generating embeddings: 100%|██

✅ Finished chunk 35 (350000–360000) in 16.10 min


Parsing nodes: 100%|██████████| 10000/10000 [00:33<00:00, 297.98it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.17it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.42it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.15it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.29it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.34it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.23it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.30it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.20it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.25it/s]
Generating embeddings: 100%|██

✅ Finished chunk 36 (360000–370000) in 15.75 min


Parsing nodes: 100%|██████████| 10000/10000 [00:37<00:00, 267.87it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.22it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.40it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.31it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.26it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.21it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.15it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.23it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.17it/s]
Generating embeddings: 100%|██

✅ Finished chunk 37 (370000–380000) in 18.01 min


Parsing nodes: 100%|██████████| 10000/10000 [00:43<00:00, 230.57it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.17it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.36it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.22it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.25it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.12it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.29it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.25it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.17it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.21it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██

✅ Finished chunk 38 (380000–390000) in 20.20 min


Parsing nodes: 100%|██████████| 10000/10000 [00:35<00:00, 284.78it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.13it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.41it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.29it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.17it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.27it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.26it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.15it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.13it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.11it/s]
Generating embeddings: 100%|██

✅ Finished chunk 39 (390000–400000) in 17.35 min


Parsing nodes: 100%|██████████| 10000/10000 [00:46<00:00, 214.81it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.33it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.14it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.14it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.23it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.27it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.25it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:46<00:00, 43.85it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.17it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██

✅ Finished chunk 40 (400000–410000) in 21.89 min


Parsing nodes: 100%|██████████| 10000/10000 [00:42<00:00, 236.55it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.00it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.35it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 44.99it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.11it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.13it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.14it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 44.98it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 44.94it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.23it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.31it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.20it/s]
Generating embeddings: 100%|██

✅ Finished chunk 41 (410000–420000) in 19.54 min


Parsing nodes: 100%|██████████| 10000/10000 [00:42<00:00, 236.22it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.26it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.12it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.21it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.22it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.20it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.12it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.09it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.09it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██

✅ Finished chunk 42 (420000–430000) in 19.91 min


Parsing nodes: 100%|██████████| 10000/10000 [00:44<00:00, 225.35it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 44.63it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 44.89it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 44.97it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 44.95it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.02it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.02it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.00it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.13it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 44.93it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 44.95it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.03it/s]
Generating embeddings: 100%|██

✅ Finished chunk 43 (430000–440000) in 18.19 min


Parsing nodes: 100%|██████████| 10000/10000 [00:38<00:00, 258.72it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.27it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.32it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.20it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.33it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.30it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.03it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.30it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.26it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.23it/s]
Generating embeddings: 100%|██

✅ Finished chunk 44 (440000–450000) in 17.60 min


Parsing nodes: 100%|██████████| 10000/10000 [00:38<00:00, 257.99it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.20it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.28it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.38it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.32it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.20it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.31it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.29it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.29it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.22it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.29it/s]
Generating embeddings: 100%|██

✅ Finished chunk 45 (450000–460000) in 17.96 min


Parsing nodes: 100%|██████████| 10000/10000 [00:43<00:00, 230.67it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.09it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.34it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.35it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.39it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.29it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.23it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.29it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.29it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.20it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.21it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.26it/s]
Generating embeddings: 100%|██

✅ Finished chunk 46 (460000–470000) in 20.28 min


Parsing nodes: 100%|██████████| 10000/10000 [00:39<00:00, 252.81it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.04it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.27it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.22it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.17it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.25it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.03it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.12it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.11it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.08it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.15it/s]
Generating embeddings: 100%|██

✅ Finished chunk 47 (470000–480000) in 18.80 min


Parsing nodes: 100%|██████████| 10000/10000 [00:39<00:00, 251.59it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.17it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.30it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.11it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.06it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.17it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.13it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.20it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.21it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.17it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.18it/s]
Generating embeddings: 100%|██

✅ Finished chunk 48 (480000–490000) in 18.79 min


Parsing nodes: 100%|██████████| 10000/10000 [00:37<00:00, 264.98it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.08it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.25it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.30it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.11it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.15it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 44.61it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.06it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.02it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 44.96it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.12it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:45<00:00, 45.05it/s]
Generating embeddings: 100%|██

✅ Finished chunk 49 (490000–500000) in 17.85 min


Parsing nodes: 100%|██████████| 10000/10000 [00:46<00:00, 216.88it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:53<00:00, 38.47it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:56<00:00, 36.21it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:58<00:00, 35.21it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:54<00:00, 37.64it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:59<00:00, 34.29it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:00<00:00, 33.73it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:00<00:00, 34.07it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:00<00:00, 33.74it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:00<00:00, 34.09it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:51<00:00, 39.41it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:50<00:00, 40.19it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:50<00:00, 40.16it/s]
Generating embeddings: 100%|██

✅ Finished chunk 50 (500000–510000) in 21.10 min


Parsing nodes: 100%|██████████| 10000/10000 [00:42<00:00, 233.60it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:48<00:00, 42.57it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:48<00:00, 42.23it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:49<00:00, 41.16it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:47<00:00, 42.76it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:49<00:00, 41.56it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:53<00:00, 38.53it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:58<00:00, 34.98it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:58<00:00, 35.28it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:52<00:00, 38.69it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:51<00:00, 40.06it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:52<00:00, 39.27it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:52<00:00, 38.71it/s]
Generating embeddings: 100%|██

✅ Finished chunk 51 (510000–520000) in 22.02 min


Parsing nodes: 100%|██████████| 10000/10000 [00:39<00:00, 254.03it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:55<00:00, 37.11it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:01<00:00, 33.10it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:29<00:00, 22.83it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:27<00:00, 23.30it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:20<00:00, 25.35it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:32<00:00, 22.21it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:36<00:00, 21.14it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:24<00:00, 24.14it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:29<00:00, 22.92it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:29<00:00, 22.95it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:31<00:00, 22.32it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:24<00:00, 24.34it/s]
Generating embeddings: 100%|██

✅ Finished chunk 52 (520000–530000) in 25.47 min


Parsing nodes: 100%|██████████| 10000/10000 [00:54<00:00, 182.65it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:04<00:00, 31.75it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:05<00:00, 31.45it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:07<00:00, 30.45it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:07<00:00, 30.57it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:03<00:00, 32.15it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:07<00:00, 30.48it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:05<00:00, 31.20it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:04<00:00, 31.57it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:09<00:00, 29.34it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:06<00:00, 30.63it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:06<00:00, 30.60it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:08<00:00, 29.73it/s]
Generating embeddings: 100%|██

✅ Finished chunk 53 (530000–540000) in 27.90 min


Parsing nodes: 100%|██████████| 10000/10000 [00:54<00:00, 183.06it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:07<00:00, 30.38it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:05<00:00, 31.23it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:08<00:00, 29.83it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:07<00:00, 30.51it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:06<00:00, 30.78it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:05<00:00, 31.38it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:05<00:00, 31.20it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:05<00:00, 31.17it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:08<00:00, 29.83it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:06<00:00, 30.81it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:04<00:00, 31.98it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:06<00:00, 30.81it/s]
Generating embeddings: 100%|██

✅ Finished chunk 54 (540000–550000) in 28.41 min


Parsing nodes: 100%|██████████| 10000/10000 [00:52<00:00, 189.40it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:07<00:00, 30.13it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:08<00:00, 29.91it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:07<00:00, 30.24it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:06<00:00, 30.59it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:06<00:00, 30.80it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:08<00:00, 30.04it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:06<00:00, 30.65it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:06<00:00, 30.99it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:07<00:00, 30.42it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:04<00:00, 31.91it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:05<00:00, 31.06it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:03<00:00, 32.35it/s]
Generating embeddings: 100%|██

✅ Finished chunk 55 (550000–560000) in 25.19 min


Parsing nodes: 100%|██████████| 10000/10000 [00:56<00:00, 178.46it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:59<00:00, 34.33it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:06<00:00, 30.74it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:07<00:00, 30.31it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:07<00:00, 30.28it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:07<00:00, 30.42it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:04<00:00, 31.59it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:06<00:00, 30.70it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:05<00:00, 31.12it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:00<00:00, 33.68it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:52<00:00, 39.12it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:51<00:00, 39.95it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:08<00:00, 30.06it/s]
Generating embeddings: 100%|██

✅ Finished chunk 56 (560000–570000) in 25.48 min


Parsing nodes: 100%|██████████| 10000/10000 [00:49<00:00, 200.33it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:08<00:00, 29.69it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:09<00:00, 29.48it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:06<00:00, 30.79it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:05<00:00, 31.15it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:06<00:00, 30.93it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:08<00:00, 29.71it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:07<00:00, 30.29it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:05<00:00, 31.14it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:07<00:00, 30.45it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:04<00:00, 31.84it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:03<00:00, 32.29it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:05<00:00, 31.17it/s]
Generating embeddings: 100%|██

✅ Finished chunk 57 (570000–580000) in 23.38 min


Parsing nodes: 100%|██████████| 10000/10000 [00:49<00:00, 203.23it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:59<00:00, 34.45it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:00<00:00, 33.84it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:00<00:00, 33.83it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:00<00:00, 33.66it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:59<00:00, 34.59it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:59<00:00, 34.65it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:58<00:00, 34.74it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:02<00:00, 32.77it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:02<00:00, 32.60it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:53<00:00, 37.94it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:51<00:00, 39.64it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:58<00:00, 34.73it/s]
Generating embeddings: 100%|██

✅ Finished chunk 58 (580000–590000) in 22.84 min


Parsing nodes: 100%|██████████| 4603/4603 [00:17<00:00, 261.34it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:10<00:00, 28.90it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:07<00:00, 30.26it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:08<00:00, 29.74it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:08<00:00, 29.93it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:08<00:00, 29.92it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:07<00:00, 30.36it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:06<00:00, 30.94it/s]
Generating embeddings: 100%|██████████| 2048/2048 [01:08<00:00, 30.00it/s]
Generating embeddings: 100%|██████████| 85/85 [00:03<00:00, 24.50it/s]
Embedding chunks: 100%|██████████| 60/60 [13:52:02<00:00, 832.04s/it] 

✅ Finished chunk 59 (590000–594603) in 9.52 min



