In [1]:
from llama_index import SimpleDirectoryReader
from llama_index.node_parser import SimpleNodeParser
import weaviate

In [2]:
# connect to your weaviate instance

from weaviate.embedded import EmbeddedOptions

client = weaviate.Client(
  embedded_options=EmbeddedOptions()
)


Started /Users/vinayak/.cache/weaviate-embedded: process ID 9835


{"action":"startup","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2023-07-19T17:27:49-07:00"}
{"action":"startup","auto_schema_enabled":true,"level":"info","msg":"auto schema enabled setting is set to \"true\"","time":"2023-07-19T17:27:49-07:00"}
{"action":"hnsw_vector_cache_prefill","count":3000,"index_id":"kaiser_vk_data_XZ11Gl2OGZs4","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2023-07-19T17:27:49-07:00","took":405765}
{"action":"hnsw_vector_cache_prefill","count":3000,"index_id":"kaiser_vk_hospital_encounters_data_dDYcXtmUpR47","level":"info","limit":1000000000000,"msg":"prefilled vector cache","time":"2023-07-19T17:27:49-07:00","took":1794664}
{"action":"hnsw_vector_cache_prefill","count":3000,"index_id":"normal_path_data_44x59J1As7ay","level":"info","limit":1000000000000,"msg":"pre

In [3]:
from llama_index import ServiceContext

## Starting with a small file 9 pages, EMR dump

In [4]:
path_data = SimpleDirectoryReader('/Users/vinayak/projects/kaiser/data/normal_pathology_report/').load_data()

In [5]:
# chunk up the data posts into nodes 
parser = SimpleNodeParser()
nodes = parser.get_nodes_from_documents(path_data)

In [6]:
from llama_index.vector_stores import WeaviateVectorStore
from llama_index import VectorStoreIndex, StorageContext
from llama_index.storage.storage_context import StorageContext


# construct vector store
vector_store = WeaviateVectorStore(weaviate_client = client, index_name="Normal_path_data", text_key="content")

# setting up the storage for the embeddings
storage_context = StorageContext.from_defaults(vector_store = vector_store)
#
# set up the index
index = VectorStoreIndex(nodes, storage_context = storage_context)


In [8]:
# and now query 🚀
query_engine = index.as_query_engine()
response = query_engine.query("Give me a table with the test name, the result, unit and normal range.")
print(response)


Test Name | Result | Unit | Normal Range
Dengue |  |  | 
Malaria |  |  | 
Typhoid |  |  | 
Chikungunya |  |  | 
Vitamin D |  |  | 
Vitamin B12 |  |  | 
Vitamin B9 |  |  |


In [None]:
from llama_index.llms import Anthropic

# non-streaming
resp = Anthropic().
print(resp)

In [None]:
import os
print(os.environ.keys())

In [None]:
# Not getting back all the values, why ??? 
query_engine = index.as_query_engine()
response = query_engine.query("This is a pathology lab blood report. Its has various sections about different tests conducted. For the section on hematology, please give me the test, result, unit and reference interval in a table.")
print(response)

In [None]:
# and now query 🚀
query_engine = index.as_query_engine()
response = query_engine.query("What kind of report is this?")
print(response)

## Now on a slightly larger file, hospital encounters and tests

In [None]:
# load the data in using the reader
kaiser_vk_hospital_encounters_data = SimpleDirectoryReader('/Users/vinayak/projects/kaiser/data/hospital_encounters_full/').load_data()

In [None]:
# chunk up the data posts into nodes 
parser = SimpleNodeParser()
nodes = parser.get_nodes_from_documents(kaiser_vk_hospital_encounters_data)

In [None]:
from llama_index.vector_stores import WeaviateVectorStore
from llama_index import VectorStoreIndex, StorageContext
from llama_index.storage.storage_context import StorageContext


# construct vector store
vector_store = WeaviateVectorStore(weaviate_client = client, index_name="Kaiser_vk_hospital_encounters_data", text_key="content")

# setting up the storage for the embeddings
storage_context = StorageContext.from_defaults(vector_store = vector_store)

# set up the index
index = VectorStoreIndex(nodes, storage_context = storage_context)


In [None]:
# and now query 🚀
query_engine = index.as_query_engine()
response = query_engine.query("Give me a table of visit_diagnosis, test performed and date")
print(response)

## Making it more complex since we need top 'n' results not only top 2

In [None]:
from llama_index import (
    VectorStoreIndex,
    get_response_synthesizer,
)
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine


# configure retriever
retriever = VectorIndexRetriever(
    index=index, 
    similarity_top_k=100,
)

# configure response synthesizer
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize",
)

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

# query
response = query_engine.query("Give me a table of visit_diagnosis, test performed and date")
print(response)

In [None]:
from llama_index import (
    VectorStoreIndex,
    get_response_synthesizer,
)
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine


# configure retriever
retriever = VectorIndexRetriever(
    index=index, 
    similarity_top_k=20,
)

# configure response synthesizer
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize",
)

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

# query
response = query_engine.query("Give me a table with all the tests, the result, unit and normal range.")
print(response)

In [None]:
from llama_index import (
    VectorStoreIndex,
    get_response_synthesizer,
)
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine


# configure retriever
retriever = VectorIndexRetriever(
    index=index, 
    similarity_top_k=100,
)

# configure response synthesizer
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize",
)

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

# query
response = query_engine.query("Give me a table with all the tests, the result, unit and normal range.")
print(response)

In [None]:
response = query_engine.query("Please summarize any findings mentioned in the document?")
print(response)

In [None]:
# and now query 🚀
query_engine = index.as_query_engine()
response = query_engine.query("What kind of report is this?")
print(response)

In [None]:
# load the data in using the reader
scanned_image_data = SimpleDirectoryReader('/Users/vinayak/projects/kaiser/data/scanned_image/').load_data()
# chunk up the data posts into nodes 
parser = SimpleNodeParser()
nodes = parser.get_nodes_from_documents(scanned_image_data)

from llama_index.vector_stores import WeaviateVectorStore
from llama_index import VectorStoreIndex, StorageContext
from llama_index.storage.storage_context import StorageContext


# construct vector store
vector_store = WeaviateVectorStore(weaviate_client = client, index_name="Scaned_image_data", text_key="content")

# setting up the storage for the embeddings
storage_context = StorageContext.from_defaults(vector_store = vector_store)

# set up the index
index = VectorStoreIndex(nodes, storage_context = storage_context)



In [None]:
from llama_index import (
    VectorStoreIndex,
    get_response_synthesizer,
)
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine


In [None]:
import llama_index

In [None]:
llama_index.response_synthesizers.type.ResponseMode.GENERATION?

In [None]:
retriever = index.as_retriever(retriever_mode='default')
query_engine = RetrieverQueryEngine(retriever)
response = query_engine.query("Give me a table with all the tests, the result, unit and normal range?")

In [None]:
print(response)

In [None]:
response

In [None]:
!pip install anthropic