In [1]:
! pip install -q "dlt[qdrant]" "qdrant-client[fastembed]"

In [20]:
import dlt
import requests # ¡No olvides importar requests!
from dlt.destinations import qdrant

# --- Función para extraer datos con el decorador ---
@dlt.resource # <-- Aquí va el decorador
def zoomcamp_data():
    docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
    print(f"Descargando documentos de: {docs_url}") # Un print para saber qué está haciendo
    docs_response = requests.get(docs_url)
    documents_raw = docs_response.json()

    for course in documents_raw:
        course_name = course['course']

        for doc in course['documents']:
            doc['course'] = course_name
            yield doc # <-- Usa yield para que dlt procese cada documento

# --- Configuración de Qdrant ---
qdrant_destination = qdrant(
  qd_path="db.qdrant",
)

# --- Configuración y ejecución del pipeline ---
pipeline = dlt.pipeline(
    pipeline_name="zoomcamp_pipeline",
    destination=qdrant_destination,
    dataset_name="zoomcamp_tagged_data"
)

# Ejecuta el pipeline, pasando la llamada a la función decorada
load_info = pipeline.run(zoomcamp_data())
print(pipeline.last_trace)

Descargando documentos de: https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json
Run started at 2025-07-06 18:20:36.092319+00:00 and COMPLETED in 2 minutes and 8.85 seconds with 4 steps.
Step extract COMPLETED in 28.09 seconds.

Load package 1751826040.2197907 is EXTRACTED and NOT YET LOADED to the destination and contains no failed jobs

Step normalize COMPLETED in 0.11 seconds.
Normalized data for the following tables:
- zoomcamp_data: 948 row(s)

Load package 1751826040.2197907 is NORMALIZED and NOT YET LOADED to the destination and contains no failed jobs

Step load COMPLETED in 1 minute and 36.54 seconds.
Pipeline zoomcamp_pipeline load step completed in 1 minute and 36.52 seconds
1 load package(s) were loaded to destination qdrant and into dataset zoomcamp_tagged_data
The qdrant destination used /media/villarreal-fx/Proyectos/llm-zoomcamp/02.1-dtl/db.qdrant location to store data
Load package 1751826040.2197907 is LOADED and contains no failed jobs


In [12]:
dlt.__version__

'1.12.3'