In [None]:
! pip install llama_index
! pip install llama-parse
! pip install llmsherpa
! pip install llama-index-readers-pdf-marker
! pip install llama-index-readers-llama-parse
! pip install llama-index-readers-smart-pdf-loader
! pip install llama-index-indices-managed-postgresml
! pip install llama-index-storage-index-store-postgres
! pip install llama-index-storage-index-store-mongodb
! pip install llama-index-storage-index-store-postgres
! pip install llama-index-storage-docstore-postgres
! pip install llama-index-storage-docstore-mongodb
! pip install llama-index-vector-stores-postgres
! pip install llama-index-vector-stores-pinecone
! pip install llama-index-vector-stores-chroma
! pip install llama-index-llms-openai
! pip install llama-index-llms-ollama
! pip install llama-index-extractors-entity
! pip install llama-index-extractors-marvin
! pip install unstructured
! pip install lxml

In [None]:
from env import AppConfig
from llama_index.core import Settings
config = AppConfig()
logger = config.logger
# resp = Settings.llm.complete("hello")
# config.logger.debug(resp)
question = 'who comes from Lawrence Berkely National Labatorry in this book'

```01.01. laod vector index from database ```

In [None]:
from llama_index.vector_stores.postgres import PGVectorStore
from llama_index.core import Settings, VectorStoreIndex
from sqlalchemy import make_url
url = make_url(config.pg_uri)
pg_vec_store = PGVectorStore.from_params(
    database=url.database, 
    host=url.host, 
    password=url.password, 
    port=url.port, 
    user=url.username, 
    table_name="vec_store", 
    embed_dim=4096,  # openai embedding dimension 
    hnsw_kwargs={
            "hnsw_m": 16,
            "hnsw_ef_construction": 64,
            "hnsw_ef_search": 40,
            "hnsw_dist_method": "vector_cosine_ops",
        })
idx = VectorStoreIndex.from_vector_store(vector_store=pg_vec_store, embed_model=Settings.embed_model)

```01.02 define meter-filters ```

In [None]:
from llama_index.core.vector_stores.types import (FilterOperator, 
                                                  FilterCondition, 
                                                  MetadataFilter, 
                                                  MetadataFilters) 

filters = MetadataFilters(
    filters=[MetadataFilter(key="department",  value="Procurement"  ),
             MetadataFilter(key="security_classification",  value='',  operator=FilterOperator.LTE)],  
    condition=FilterCondition.AND)

```01.03. Define Selectors ```

In [None]:
from llama_index.core.selectors import (LLMMultiSelector, 
                                        PydanticMultiSelector,
                                        LLMSingleSelector)
options = [
    "option 1: this is good for summarization questions",  
    "option 2: this is useful for precise definitions",  
    "option 3: this is useful for comparing concepts",]
selector = LLMSingleSelector.from_defaults() 
selections = selector.select(options,  
                           query="What's the definition of space?"  )
logger.debug(type(selections))

```01.04.Retrievers```

In [None]:
from llama_index.core.retrievers import (AutoMergingRetriever, 
                                         BaseRetriever,
                                         BaseImageRetriever, 
                                         EmptyIndexRetriever, 
                                         KeywordTableSimpleRetriever,
                                         KGTableRetriever, 
                                         KnowledgeGraphRAGRetriever,
                                         LLMSynonymRetriever, ListIndexRetriever,
                                         RecursiveRetriever,
                                         RouterRetriever,
                                         TextToCypherRetriever,
                                         TransformRetriever,
                                         TreeRootRetriever,
                                         TreeSelectLeafRetriever,
                                         TreeSelectLeafEmbeddingRetriever,
                                         SummaryIndexEmbeddingRetriever, 
                                         SummaryIndexLLMRetriever,
                                         SummaryIndexRetriever,
                                         VectorIndexRetriever,
                                         VectorContextRetriever,
                                         VectorIndexAutoRetriever,
                                         )
from llama_index.core.vector_stores.types import VectorStoreQueryMode

retriever = VectorIndexRetriever(index=idx, 
                                 vector_store_query_mode=VectorStoreQueryMode.DEFAULT, 
                                 embed_model=Settings.embed_model, 
                                 filters=[],
                                 callback_manager=None,
                                 alpha=0.9,
                                 verbose=True)

```01.05.Define Tools ```

In [None]:
from llama_index.core.tools import RetrieverTool
vector_tool = RetrieverTool.from_defaults(retriever=retriever, description="....")

router_retriever = RouterRetriever(selector=selector,
                                   retriever_tools=[vector_tool], llm=Settings.llm)
resp = router_retriever.retrieve("Xiangyang")
logger.debug(type(resp))

```01.06. DecomposeQueryTransform```

In [None]:
from llama_index.core.indices.query.query_transform.base import  DecomposeQueryTransform  
decompose = DecomposeQueryTransform(llm=Settings.llm,)  
query_bundle = decompose.run("Who comes from Lawrence Berkeley National Labaratory and when did the LBNL established") 
logger.debug(f'bundle: {query_bundle.query_str}')
query_engine = idx.as_query_engine()
resp = query_engine.query(query_bundle.query_str)
logger.debug(resp)


```01.07. OpenAIQuestionGenerator ```

```02.00 Postprocessors```
- Node Filtering Postprocessors
- Node Transforming Postprocessors
- Node Re-Ranking Postprocessors

```02.01. Postprocessors```

In [None]:
from llama_index.core.postprocessor import (AutoPrevNextNodePostprocessor, 
                                            EmbeddingRecencyPostprocessor,
                                            FixedRecencyPostprocessor,
                                            KeywordNodePostprocessor,
                                            MetadataReplacementPostProcessor,
                                            NERPIINodePostprocessor,
                                            PrevNextNodePostprocessor, 
                                            PIINodePostprocessor, 
                                            SimilarityPostprocessor,
                                            TimeWeightedPostprocessor,)
original_nodes = retriever.retrieve(question)
pp =SimilarityPostprocessor(similarity_cutoff=0.8)
remaining_nodes = pp.postprocess_nodes(original_nodes)
for node in remaining_nodes:
    logger.debug(node)