# Example Dataset

In [4]:
!mkdir data\paul_graham
!curl -o "data/paul_graham/paul_graham_essay.txt" "https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt"

A subdirectory or file data\paul_graham already exists.
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 75042  100 75042    0     0   389k      0 --:--:-- --:--:-- --:--:--  398k


In [5]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core.llama_dataset.generator import RagDatasetGenerator
from llama_index.llms.openai import OpenAI

# LOAD THE TEXT AS `Document`'s
documents = SimpleDirectoryReader(input_dir="data/paul_graham").load_data()


In [8]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=250, chunk_overlap=20)

nodes = splitter.get_nodes_from_documents(documents)


# PG Vector Store

In [11]:
from sqlalchemy import make_url
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.postgres import PGVectorStore

PG_VECTOR_URI = "postgresql://6czv96dl1wgko7658yz5t10uqlm9qfk9:1x23vu2tbp89a1k4x5gbob06nfrzke44@junction.proxy.rlwy.net:55475/railway"

url = make_url(PG_VECTOR_URI)
vector_store = PGVectorStore.from_params(
    database=url.database,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="data_vecstore",
    embed_dim=1536,  # openai embedding dimension
    hnsw_kwargs={
        "hnsw_m": 16,
        "hnsw_ef_construction": 64,
        "hnsw_ef_search": 40,
        "hnsw_dist_method": "vector_cosine_ops",
    },
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(
    nodes, storage_context=storage_context, show_progress=True
)

  from .autonotebook import tqdm as notebook_tqdm
Generating embeddings: 100%|██████████| 384/384 [00:03<00:00, 108.16it/s]


In [1]:
from sqlalchemy import make_url
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.postgres import PGVectorStore

PG_VECTOR_URI = "postgresql://6czv96dl1wgko7658yz5t10uqlm9qfk9:1x23vu2tbp89a1k4x5gbob06nfrzke44@junction.proxy.rlwy.net:55475/railway"

url = make_url(PG_VECTOR_URI)

In [5]:
url.password



'1x23vu2tbp89a1k4x5gbob06nfrzke44'

# PG Docstore


In [12]:
from llama_index.storage.docstore.postgres import PostgresDocumentStore 
from llama_index.core.node_parser import SentenceSplitter

PG_VECTOR_URI = "postgresql://6czv96dl1wgko7658yz5t10uqlm9qfk9:1x23vu2tbp89a1k4x5gbob06nfrzke44@junction.proxy.rlwy.net:55475/railway"

# create (or load) docstore and add nodes (data_docstore)
docstore = PostgresDocumentStore.from_uri(
    uri = PG_VECTOR_URI
)
docstore.add_documents(nodes)

# Load Back

In [8]:
from sqlalchemy import make_url
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.postgres import PGVectorStore


POSTGRES_URI = "postgresql://6czv96dl1wgko7658yz5t10uqlm9qfk9:1x23vu2tbp89a1k4x5gbob06nfrzke44@junction.proxy.rlwy.net:55475/railway"

url = make_url(POSTGRES_URI)
vector_store = PGVectorStore.from_params(
    database=url.database,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="data_vecstore",
    embed_dim=1536,  # openai embedding dimension
    hnsw_kwargs={
        "hnsw_m": 16,
        "hnsw_ef_construction": 64,
        "hnsw_ef_search": 40,
        "hnsw_dist_method": "vector_cosine_ops",
    },
)

vector_index = VectorStoreIndex.from_vector_store(vector_store)

In [12]:
from llama_index.core import VectorStoreIndex
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.embeddings.openai import OpenAIEmbedding

vector_index = VectorStoreIndex.from_vector_store(vector_store)
vector_retriever = VectorIndexRetriever(
    embed_model=OpenAIEmbedding(model="text-embedding-3-small"),
    index=vector_index,
    similarity_top_k=10,
)


In [13]:
vector_retriever.retrieve("What is the meaning of life?")

[NodeWithScore(node=TextNode(id_='50189757-db15-4f64-a444-1267bfabf52d', embedding=None, metadata={'file_path': 'c:\\Users\\l501l\\Desktop\\Projects\\Templates\\Railway-pgvectorscale\\data\\paul_graham\\paul_graham_essay.txt', 'file_name': 'paul_graham_essay.txt', 'file_type': 'text/plain', 'file_size': 75042, 'creation_date': '2024-12-20', 'last_modified_date': '2024-12-20'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='a6048d41-c628-4e8a-bb03-2b63b464f27b', node_type='4', metadata={'file_path': 'c:\\Users\\l501l\\Desktop\\Projects\\Templates\\Railway-pgvectorscale\\data\\paul_graham\\paul_graham_essay.txt', 'file_name': 'paul_graham_essay.txt', 'file_type': 'text/plain', 'file_size': 75042, 'creation_d