In [None]:
from llama_index import SimpleDirectoryReader
from llama_index.node_parser import SimpleNodeParser
import weaviate

In [None]:
# connect to your weaviate instance

from weaviate.embedded import EmbeddedOptions

client = weaviate.Client(
  embedded_options=EmbeddedOptions()
)


## Starting with a small file 9 pages, EMR dump


kaiser_vk_data = SimpleDirectoryReader('/Users/vinayak/projects/kaiser/data/short_sample/').load_data()

In [None]:
# chunk up the data posts into nodes 
parser = SimpleNodeParser()
nodes = parser.get_nodes_from_documents(kaiser_vk_data)

In [None]:
from llama_index.vector_stores import WeaviateVectorStore
from llama_index import VectorStoreIndex, StorageContext
from llama_index.storage.storage_context import StorageContext


# construct vector store
vector_store = WeaviateVectorStore(weaviate_client = client, index_name="Kaiser_vk_data", text_key="content")

# setting up the storage for the embeddings
storage_context = StorageContext.from_defaults(vector_store = vector_store)
#
# set up the index
index = VectorStoreIndex(nodes, storage_context = storage_context)


In [None]:
# and now query 🚀
query_engine = index.as_query_engine()
response = query_engine.query("Give me the encounter provider, reasons for visit, date, diagnosis as a table? Compute a third column ICD-10 given the diagnosis.")
print(response)

## Now on a slightly larger file, hospital encounters and tests

In [None]:
# load the data in using the reader
kaiser_vk_hospital_encounters_data = SimpleDirectoryReader('/Users/vinayak/projects/kaiser/data/hospital_encounters_full/').load_data()

In [None]:
# chunk up the data posts into nodes 
parser = SimpleNodeParser()
nodes = parser.get_nodes_from_documents(kaiser_vk_hospital_encounters_data)

In [None]:
from llama_index.vector_stores import WeaviateVectorStore
from llama_index import VectorStoreIndex, StorageContext
from llama_index.storage.storage_context import StorageContext


# construct vector store
vector_store = WeaviateVectorStore(weaviate_client = client, index_name="Kaiser_vk_hospital_encounters_data", text_key="content")

# setting up the storage for the embeddings
storage_context = StorageContext.from_defaults(vector_store = vector_store)

# set up the index
index = VectorStoreIndex(nodes, storage_context = storage_context)


In [None]:
# and now query 🚀
query_engine = index.as_query_engine()
response = query_engine.query("Give me a table of visit_diagnosis, test performed and date")
print(response)

## Making it more complex since we need top 'n' results not only top 2

In [None]:
from llama_index import (
    VectorStoreIndex,
    get_response_synthesizer,
)
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine


# configure retriever
retriever = VectorIndexRetriever(
    index=index, 
    similarity_top_k=20,
)

# configure response synthesizer
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize",
)

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

# query
response = query_engine.query("Give me a table of visit_diagnosis, test performed and date")
print(response)

In [None]:
from llama_index import (
    VectorStoreIndex,
    get_response_synthesizer,
)
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine


# configure retriever
retriever = VectorIndexRetriever(
    index=index, 
    similarity_top_k=20,
)

# configure response synthesizer
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize",
)

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

# query
response = query_engine.query("For the time I complained of chest pain or acid reflux, pleasse give me a two column table of diagnosis and date")
print(response)