In [14]:
import json
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import JSONLoader
from langchain.storage import InMemoryStore
from langchain_core.documents import Document

import os
from dotenv import load_dotenv

In [15]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.storage import InMemoryStore
from langchain_core.documents import Document

import os
from dotenv import load_dotenv

# Load environment variables (make sure to set GOOGLE_API_KEY in your .env file)
load_dotenv()

def load_rag_content_from_text(text_file_path, chunk_size=1000, chunk_overlap=200):
    # Initialize the TextLoader
    loader = TextLoader(text_file_path)

    # Load the document
    documents = loader.load()

    # Initialize the text splitter
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
    )

    # Split the documents
    splits = text_splitter.split_documents(documents)

    # Initialize the embedding function with Gemini
    embedding_function = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

    # Create the vector store
    vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_function)

    # Create the retriever
    retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

    return retriever

# Usage
text_file_path = "path_to_your_text_file.txt"
retriever = load_rag_content_from_text(text_file_path)

# Test the retriever
query = "What is the main topic of the content?"
results = retriever.get_relevant_documents(query)

for doc in results:
    print(f"Content: {doc.page_content[:100]}...")
    print(f"Metadata: {doc.metadata}")
    print("-" * 50)