# LOAD

In [23]:
from dotenv import load_dotenv
load_dotenv()
import os
MONGODB_URL = os.getenv("MONGODB_URI")
MONGODB_DBNAME = os.getenv("MONGODB_DBNAME")

import pymongo
MONGODB_CLIENT = pymongo.MongoClient(MONGODB_URL)
from llama_index.core.ingestion import IngestionCache
from llama_index.storage.kvstore.mongodb import MongoDBKVStore as MongoDBCache
MONGODB_CACHE = IngestionCache(cache = MongoDBCache(mongo_client=MONGODB_CLIENT, db_name = MONGODB_DBNAME))
from llama_index.storage.docstore.mongodb import MongoDocumentStore
MONGODB_DOCSTORE = MongoDocumentStore.from_uri(uri=MONGODB_URL, db_name=MONGODB_DBNAME)



## embededing

In [26]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

EMBED_MODEL = 'BAAI/bge-small-en-v1.5'
EMBEDDINGS = HuggingFaceEmbedding(model_name = EMBED_MODEL)

## loading

In [13]:


GUIDE_PDF = './data/pdf/log_Meanings_Explanation.pdf'
LOGS_DIR = './data/log'

def ingest_logs():
    from llama_index.core.node_parser import SentenceSplitter
    splitter = SentenceSplitter(chunk_size=80, chunk_overlap= 20)
    from llama_index.core import SimpleDirectoryReader
    documents = SimpleDirectoryReader(LOGS_DIR, 
                                      filename_as_id=True).load_data()
    
    from llama_index.core.ingestion import IngestionPipeline, DocstoreStrategy
    from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
    pipeline = IngestionPipeline(
        transformations = [splitter, EMBEDDINGS],
        vector_store = MongoDBAtlasVectorSearch(
            mongo_client = MONGODB_CLIENT,
            db_name = MONGODB_DBNAME,
            collection_name = 'logs_collection',
            vector_index_name = 'logs_idx'),
        cache = MONGODB_CACHE,
        docstore = MONGODB_DOCSTORE,
        docstore_strategy = DocstoreStrategy.UPSERTS,
        )
    
    nodes = pipeline.run(documents = documents)
    
    

def ingest_pdf():
    from llama_index.readers.file import PDFReader
    documents = PDFReader().load_data(file=GUIDE_PDF)
    from llama_index.core.ingestion import IngestionPipeline, DocstoreStrategy
    from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
    pipline = IngestionPipeline(
        transformations = [EMBEDDINGS],
        vector_store = MongoDBAtlasVectorSearch(
            mongodb_client = MONGODB_CLIENT,
            db_name = MONGODB_DBNAME,
            collection_name = 'pdf_collection',
            vector_index_name = 'pdf_idx'),
        cache = MONGODB_CACHE,
        docstore = MONGODB_DOCSTORE,
        docstore_strategy = DocstoreStrategy.UPSERTS,
        )
    
    nodes = pipline.run(documents=documents)
    

ingest_logs()
# ingest_pdf()
    

In [32]:
import os
import time

#Data Loaders
from llama_index.core import SimpleDirectoryReader
from llama_index.readers.github import GithubClient,GithubRepositoryReader
from llama_index.readers.file import PDFReader
from llama_index.core.node_parser import SentenceSplitter
#Indices and Storage
import pymongo
from llama_index.storage.kvstore.mongodb import MongoDBKVStore as MongoDBCache
from llama_index.storage.docstore.mongodb import MongoDocumentStore
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
#Pipeline
from llama_index.core.ingestion import IngestionPipeline, IngestionCache, DocstoreStrategy
#Vector Embedding Model
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
LOGS_DIR = './data/log'
SPRINGBOOT_GUIDE_PDF = './data/pdf/spring-boot-reference.pdf'
def ingest_logs():
  print('->Ingest Logs')
  splitter      = SentenceSplitter(chunk_size=180,chunk_overlap=20)
  documents     = SimpleDirectoryReader(LOGS_DIR,
                                        filename_as_id = True).load_data()
  
  pipeline      = IngestionPipeline(
                        transformations   = [splitter,EMBEDDINGS],
                        vector_store      = MongoDBAtlasVectorSearch(
                                              mongodb_client  = MONGODB_CLIENT,
                                              db_name         = MONGODB_DBNAME,
                                              collection_name = 'logs_collection',
                                              index_name      = 'logs_idx'),
                        cache             = MONGODB_CACHE,
                        docstore          = MONGODB_DOCSTORE,
                        docstore_strategy = DocstoreStrategy.UPSERTS,
                  )
  nodes         = pipeline.run(documents = documents)
# ingest_logs()

def ingest_devguide():
  print('->Ingest Dev Guide')
  start         = time.time()
  documents     = PDFReader().load_data(file=SPRINGBOOT_GUIDE_PDF)
  pipeline      = IngestionPipeline(
                        transformations   = [EMBEDDINGS], 
                        vector_store      = MongoDBAtlasVectorSearch(
                                              mongodb_client  = MONGODB_CLIENT,
                                              db_name         = MONGODB_DBNAME,
                                              collection_name = 'devguide_collection',
                                              index_name      = 'devguide_idx'),
                        cache             = MONGODB_CACHE,
                        docstore          = MONGODB_DOCSTORE,
                        docstore_strategy = DocstoreStrategy.UPSERTS,
                  )
  nodes         = pipeline.run(documents = documents)
  end           = time.time()
  print(f'  Total Time = {end-start}', f'Total Documents = {len(documents)}', f'Total Nodes = {len(nodes)}')
ingest_devguide()

->Ingest Dev Guide


index_name is deprecated. Please use vector_index_name
vector_index_name and index_name both specified. Will use vector_index_name


  Total Time = 217.8314368724823 Total Documents = 973 Total Nodes = 973


# LLM

In [3]:
from dotenv import load_dotenv
load_dotenv()
import os
MONGODB_URL = os.getenv("MONGODB_URI")
MONGODB_DBNAME = os.getenv("MONGODB_DBNAME")
import pymongo
MONGODB_CLIENT = pymongo.MongoClient(MONGODB_URL)

from llama_index.core import Settings
from llama_index.core import VectorStoreIndex, get_response_synthesizer
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from llama_index.core.retrievers import VectorIndexRetriever, QueryFusionRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.chat_engine import CondenseQuestionChatEngine

EMBED_MODEL = 'BAAI/bge-small-en-v1.5'
EMBEDDINGS = HuggingFaceEmbedding(model_name = EMBED_MODEL)
Settings.embed_model = EMBEDDINGS

def get_chat_engine(model):
    llm_model = Ollama(base_url="http://localhost:11434", model = model, request_timeout=300)
    
    Settings.llm = llm_model
    collections = ['logs_collection', 'pdf_collection']
    indices = ['logs_idx', 'pdf_idx']
    retrievers = []
    
    for r in range(len(collections)):
        vector_store = MongoDBAtlasVectorSearch(
            mongodb_client=MONGODB_CLIENT,
            db_name = MONGODB_DBNAME,
            collection_name= collections[r],
            index_name = indices[r]
        )
        store_index = VectorStoreIndex.from_vector_store(vector_store=vector_store, embed_model= EMBEDDINGS)
        index_retriever = VectorIndexRetriever(index = store_index, similarity_top_k=4)
        retrievers.append(index_retriever)
        break
    
    fusion_retriever = QueryFusionRetriever(
        retrievers,
        similarity_top_k= 4,
        llm = llm_model,
        num_queries=1,
        verbose = False
    )
    response_synthesizer = get_response_synthesizer(llm=llm_model)
    query_engine = RetrieverQueryEngine(
        retriever = fusion_retriever,
        response_synthesizer= response_synthesizer,
        node_postprocessors= [SimilarityPostprocessor(similarity_cutoff=0.7)]
    )

    chat_engine = CondenseQuestionChatEngine.from_defaults(
        query_engine=query_engine,
        llm = llm_model
    )
    return chat_engine

get_chat_engine('mistral').stream_chat('what is badWeather application')

index_name is deprecated. Please use vector_index_name
vector_index_name and index_name both specified. Will use vector_index_name


RuntimeError: Detected nested async. Please use nest_asyncio.apply() to allow nested event loops.Or, use async entry methods like `aquery()`, `aretriever`, `achat`, etc.

In [1]:
from dotenv import load_dotenv
load_dotenv()
import os
MONGODB_URL = os.getenv("MONGODB_URL")
MONGODB_DBNAME = os.getenv("MONGODB_DBNAME")

import pymongo
MONGODB_CLIENT = pymongo.MongoClient(MONGODB_URL)

from llama_index.embeddings.huggingface import HuggingFaceEmbedding
EMBED_MODEL = 'BAAI/bge-small-en-v1.5'
EMBEDDINGS = HuggingFaceEmbedding(model_name = EMBED_MODEL)

from llama_index.core import Settings
Settings.embed_model = EMBEDDINGS

from llama_index.core import Settings
from llama_index.core import VectorStoreIndex, get_response_synthesizer
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from llama_index.core.retrievers import VectorIndexAutoRetriever, QueryFusionRetriever
from llama_index.core.query_engine import RetryQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.chat_engine import CondenseQuestionChatEngine

  from .autonotebook import tqdm as notebook_tqdm


In [118]:
import nest_asyncio
import asyncio
# nest_asyncio.apply()
loop = asyncio.get_running_loop()
loop.stop()
loop.close()

: 