In [5]:
from llama_index import download_loader, VectorStoreIndex, ServiceContext, StorageContext, load_index_from_storage, SimpleDirectoryReader
from llama_index.node_parser import SimpleNodeParser
import os

from collections import defaultdict
from realtyagent.secret import API_KEY

import openai
openai.api_key = API_KEY
os.environ['OPENAI_API_KEY'] = API_KEY


In [15]:
years = [2023, 2022, 2021]

index_docs = defaultdict()
all_docs = []
parser = SimpleNodeParser()
dirs = os.listdir('./data/')

for val in dirs:
    docs = SimpleDirectoryReader(f'./data/{val}').load_data()
    index_docs[val] = parser.get_nodes_from_documents(docs)
    all_docs.extend(docs)

index_docs.keys()

<function defaultdict.keys>

In [16]:
service_context = ServiceContext.from_defaults()
index_set = defaultdict()
for val in index_docs.keys():
    storage_context = StorageContext.from_defaults()
    cur_index = VectorStoreIndex.from_documents(index_docs[val], storage_context=storage_context, service_context=service_context)
    index_set[val] = cur_index
    storage_context.persist(persist_dir=f'./storage/{val}')
    

In [19]:
index_docs.keys()

dict_keys(['2022', '2023', '2021', 'domain_knowledge', 'Listing'])

### Composing a Graph to Synthesize Answers Across

In [37]:
from llama_index import ListIndex, LLMPredictor, ServiceContext, load_graph_from_storage
from langchain import OpenAI
from llama_index.indices.composability import ComposableGraph

index_summaries = [f"San Jose houing report for {year} fiscal year" for year in years]
index_summaries.append('This is what we know about some listing preferences')
index_summaries.append('This is a current home for sales')

llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.8))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
storage_context = StorageContext.from_defaults()
children_indices = [index_set[val] for val in index_set.keys()]
print(index_set.keys())

graph = ComposableGraph.from_indices(
    ListIndex,
    children_indices,
    index_summaries=index_summaries,
    service_context=service_context,
    storage_context=storage_context,
)

root_id = graph.root_id
storage_context.persist(persist_dir='./storage/root')

dict_keys(['2022', '2023', '2021', 'domain_knowledge', 'Listing'])


In [38]:
root_id

'942915ab-5f32-4d90-a9be-75aae57f605f'

### Setting up the Tools + Langchain Chatbot Agent

In [24]:
from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain.agents import initialize_agent

from llama_index.langchain_helpers.agents import LlamaToolkit, create_llama_chat_agent, IndexToolConfig
# define a decompose transform
from llama_index.indices.query.query_transform.base import DecomposeQueryTransform
decompose_transform = DecomposeQueryTransform(
    llm_predictor, verbose=True
)

# define custom retrievers
from llama_index.query_engine.transform_query_engine import TransformQueryEngine

custom_query_engines = {}
for index in index_set.values():
    query_engine = index.as_query_engine()
    query_engine = TransformQueryEngine(
        query_engine,
        query_transform=decompose_transform,
        transform_extra_info={'index_summary': index.index_struct.summary},
    )
    custom_query_engines[index.index_id] = query_engine
custom_query_engines[graph.root_id] = graph.root_index.as_query_engine(
    response_mode='tree_summarize',
    verbose=True,
)

# construct query engine
graph_query_engine = graph.as_query_engine(custom_query_engines=custom_query_engines)

# tool config
graph_config = IndexToolConfig(
    query_engine=graph_query_engine,
    name=f"Graph Index",
    description="useful for when you want to answer queries that require analyzing San Jose housing.",
    tool_kwargs={"return_direct": True}
)

In [30]:
# define toolkit
index_configs = []
for y in range(2021, 2024):
    query_engine = index_set[str(y)].as_query_engine(
        similarity_top_k=3,
    )
    tool_config = IndexToolConfig(
        query_engine=query_engine, 
        name=f"Vector Index {y}",
        description=f"useful for when you want to answer queries about the {y} San Jose housing",
        tool_kwargs={"return_direct": True}
    )
    index_configs.append(tool_config)

cons = 'domain_knowledge'
query_engine = index_set[cons].as_query_engine()
tool_config = IndexToolConfig(
    query_engine=query_engine, 
    name=f"Vector Index {cons}",
    description=f"useful for when you want to answer queries about the specific listing agent preferences San Jose housing",
    tool_kwargs={"return_direct": True}
)
index_configs.append(tool_config)

cons = 'Listing'
query_engine = index_set[cons].as_query_engine()
tool_config = IndexToolConfig(
    query_engine=query_engine, 
    name=f"Vector Index {cons}",
    description=f"useful for when you want to answer queries about which homes in San Jose are on sale or on listing or available for buying",
    tool_kwargs={"return_direct": True}
)
index_configs.append(tool_config)

toolkit = LlamaToolkit(
    index_configs=index_configs + [graph_config],
)

In [31]:
memory = ConversationBufferMemory(memory_key="chat_history")
llm=OpenAI(temperature=0.5)
agent_chain = create_llama_chat_agent(
    toolkit,
    llm,
    memory=memory,
    verbose=True
)

In [32]:
agent_chain.run(input="My client has single income, which homes he could buy?")



[1m> Entering new  chain...[0m
[32;1m[1;3m
Thought: Do I need to use a tool? Yes
Action: Vector Index 2021
Action Input: Single income[0m
Observation: [36;1m[1;3mhouseholds need what percentage of their income to afford the average rent for a two-bedroom
apartment in San José?

30%[0m
[32;1m[1;3m[0m

[1m> Finished chain.[0m


'households need what percentage of their income to afford the average rent for a two-bedroom\napartment in San José?\n\n30%'

In [35]:
agent_chain.run('Which homes are on sale that a single income family can afford')



[1m> Entering new  chain...[0m
[32;1m[1;3mAI: Based on a single income, the homes on sale that may be affordable are: 
1243 Elm St, San Jose, CA 95126 (Single Family Home, 3 bedrooms, 2.0 bathrooms, 1400 sq ft, $980,000, 4 stories, 7 years old, $85,000 in upgrades)
4689 Willowbrook Dr, San Jose, CA 95129 (Townhouse, 2 bedrooms, 2.5 bathrooms, 1200 sq ft, $1,350,000, 3 stories, 9 years old, $120,000 in upgrades)
8320 Amaryllis Way, San Jose, CA 95135 (Single Family Home, 4 bedrooms, 3.0 bathrooms, 2400 sq ft, $1,250,000, 2 stories, 8 years old, $110,000 in upgrades)
37 S 11th St, San Jose, CA 95112 (Condo, 1 bedroom, 1.0 bathroom, 800 sq ft, $450,000, 6 stories, 6 years old, $75,000 in upgrades)
1550 Technology Dr #2093, San Jose, CA 95110 (Condo, 2 bedrooms, 2.0 bathrooms, 1130 sq ft, $700,000[0m

[1m> Finished chain.[0m


'Based on a single income, the homes on sale that may be affordable are: \n1243 Elm St, San Jose, CA 95126 (Single Family Home, 3 bedrooms, 2.0 bathrooms, 1400 sq ft, $980,000, 4 stories, 7 years old, $85,000 in upgrades)\n4689 Willowbrook Dr, San Jose, CA 95129 (Townhouse, 2 bedrooms, 2.5 bathrooms, 1200 sq ft, $1,350,000, 3 stories, 9 years old, $120,000 in upgrades)\n8320 Amaryllis Way, San Jose, CA 95135 (Single Family Home, 4 bedrooms, 3.0 bathrooms, 2400 sq ft, $1,250,000, 2 stories, 8 years old, $110,000 in upgrades)\n37 S 11th St, San Jose, CA 95112 (Condo, 1 bedroom, 1.0 bathroom, 800 sq ft, $450,000, 6 stories, 6 years old, $75,000 in upgrades)\n1550 Technology Dr #2093, San Jose, CA 95110 (Condo, 2 bedrooms, 2.0 bathrooms, 1130 sq ft, $700,000'

In [36]:
agent_chain.run('Which homes are on sale that a dual income family can afford')



[1m> Entering new  chain...[0m
[32;1m[1;3m
AI: Based on a dual income, the homes on sale that may be affordable are: 
1243 Elm St, San Jose, CA 95126 (Single Family Home, 3 bedrooms, 2.0 bathrooms, 1400 sq ft, $980,000, 4 stories, 7 years old, $85,000 in upgrades)
4689 Willowbrook Dr, San Jose, CA 95129 (Townhouse, 2 bedrooms, 2.5 bathrooms, 1200 sq ft, $1,350,000, 3 stories, 9 years old, $120,000 in upgrades)
8320 Amaryllis Way, San Jose, CA 95135 (Single Family Home, 4 bedrooms, 3.0 bathrooms, 2400 sq ft, $1,250,000, 2 stories, 8 years old, $110,000 in upgrades)
37 S 11th St, San Jose, CA 95112 (Condo, 1 bedroom, 1.0 bathroom, 800 sq ft, $450,000, 6 stories, 6 years old, $75,000 in upgrades)
1550 Technology Dr #2093, San Jose, CA 95110 (Condo, 2 bedrooms, 2.0 bathrooms, 1130 sq ft, $700,[0m

[1m> Finished chain.[0m


'Based on a dual income, the homes on sale that may be affordable are: \n1243 Elm St, San Jose, CA 95126 (Single Family Home, 3 bedrooms, 2.0 bathrooms, 1400 sq ft, $980,000, 4 stories, 7 years old, $85,000 in upgrades)\n4689 Willowbrook Dr, San Jose, CA 95129 (Townhouse, 2 bedrooms, 2.5 bathrooms, 1200 sq ft, $1,350,000, 3 stories, 9 years old, $120,000 in upgrades)\n8320 Amaryllis Way, San Jose, CA 95135 (Single Family Home, 4 bedrooms, 3.0 bathrooms, 2400 sq ft, $1,250,000, 2 stories, 8 years old, $110,000 in upgrades)\n37 S 11th St, San Jose, CA 95112 (Condo, 1 bedroom, 1.0 bathroom, 800 sq ft, $450,000, 6 stories, 6 years old, $75,000 in upgrades)\n1550 Technology Dr #2093, San Jose, CA 95110 (Condo, 2 bedrooms, 2.0 bathrooms, 1130 sq ft, $700,'

In [39]:
sorted(index_set.keys())

['2021', '2022', '2023', 'Listing', 'domain_knowledge']