In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
import weaviate
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()

api_key = os.getenv("GEMINI_API_KEY")
weaviate_api_key = os.getenv("WEAVIATE_API_KEY")
weaviate_url = os.getenv("WEAVIATE_URL")

llm = ChatGoogleGenerativeAI(
    api_key=api_key,
    model="gemini-2.0-flash"
)


In [4]:
import nest_asyncio

nest_asyncio.apply()

### WebBaseLoader

In [5]:
from langchain_community.document_loaders import WebBaseLoader

loader_multiple_pages = WebBaseLoader(
    ["https://www.xevensolutions.com/"]
)

In [6]:
loader_multiple_pages.load()

[Document(metadata={'source': 'https://www.xevensolutions.com/', 'title': 'Xeven Solutions - AI Development & Solutions Company', 'description': 'Xeven Solutions is a leading AI Development & Solutions Company providing custom AI-based software services to automate workflow and boost innovation.', 'language': 'en-US'}, page_content="  Xeven Solutions - AI Development & Solutions Company                                                     Services  AI Development Services AI Chatbot Development Predictive Modelling Mobile App Development Chat GPT Integrations Custom Software Natural Language Processing Machine Learning DevOps Computer Vision Custom Web Development Staff Augmentation UI UX Design  Salesforce Industries  HealthTech EdTech FinTech GreenTech Internet of Things Retail AI Diagnostics E-Commerce Smart Healthcare HIPAA Compliance  Portfolio Company  About Us Life at Xeven  Resource  Blogs Gallery Careers  Contact Us     X                                     971-56-813-6243     

### RecursiveUrlLoader

In [7]:
from langchain_community.document_loaders import RecursiveUrlLoader

loader = RecursiveUrlLoader(
    "https://www.xevensolutions.com/",
)

In [8]:
loader.load()

[Document(metadata={'source': 'https://www.xevensolutions.com/', 'content_type': 'text/html', 'title': '403 Forbidden', 'language': None}, page_content='<html>\r\n<head><title>403 Forbidden</title></head>\r\n<body>\r\n<center><h1>403 Forbidden</h1></center>\r\n<hr><center>nginx</center>\r\n</body>\r\n</html>\r\n')]

### SitemapLoader

In [21]:
from langchain_community.document_loaders.sitemap import SitemapLoader

sitemap_loader = SitemapLoader(web_path="https://www.xevensolutions.com//sitemap.xml")

In [22]:
response = sitemap_loader.load()

Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.74s/it]
Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.58s/it]
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.13it/s]
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.30it/s]
Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.47s/it]
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.25it/s]
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.30it/s]
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.36it/s]
Fetching pages: 100%|##########| 371/371 [03:29<00:00,  1.78it/s]


In [25]:
for i in response:
    print(' '.join(i.page_content.split()))

The Best Role of Natural Language Processing in Global Business Services AI Development Services AI Chatbot Development Predictive Modelling​ Mobile App Development Chat GPT Integrations Custom Software Natural Language Processing Machine Learning DevOps Computer Vision​ Custom Web Development Staff Augmentation UI UX Design Salesforce Industries HealthTech EdTech FinTech GreenTech Internet of Things Retail AI Diagnostics E-Commerce Smart Healthcare HIPAA Compliance Portfolio Company About Us Life at Xeven Resource Blogs Gallery Careers Contact Us X 971-56-813-6243 Free AI Consultation Services AI Development Services AI Chatbot Development Predictive Modelling​ Mobile App Development Chat GPT Integrations Custom Software Natural Language Processing Machine Learning DevOps Computer Vision​ Custom Web Development Staff Augmentation UI UX Design Salesforce Industries HealthTech EdTech FinTech GreenTech Internet of Things Retail AI Diagnostics E-Commerce Smart Healthcare HIPAA Compliance 

## function for load website content

In [44]:
import nest_asyncio

nest_asyncio.apply()


from langchain_community.document_loaders.sitemap import SitemapLoader

def load_website_content(website_url):
    
    if website_url.endswith('/'):
        website_url = website_url[:-1]
    
    sitemap_url = f"{website_url}/sitemap.xml"
    
    print(f"Attempting to load sitemap from: {sitemap_url}")
    sitemap_loader = SitemapLoader(web_path=sitemap_url)
    
    documents = sitemap_loader.load()
    
    print(f"Successfully loaded {len(documents)} pages from {website_url}")
    return documents

In [45]:
website_content = load_website_content("https://www.devsinc.com/")

Attempting to load sitemap from: https://www.devsinc.com/sitemap.xml


  handle = None
Fetching pages: 100%|##########| 127/127 [01:37<00:00,  1.30it/s]


Successfully loaded 127 pages from https://www.devsinc.com


## function for splitting web content

In [47]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_content(documents, chunk_size=1000, chunk_overlap=200):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
    )
    chunks = text_splitter.split_documents(documents)

    return chunks

In [48]:
docs = split_content(website_content)

In [49]:
len(docs)

1697

# function for the setup vector db

In [50]:
from langchain_weaviate.vectorstores import WeaviateVectorStore
from weaviate.classes.init import Auth



def setup_vector_database(docs):
    embeddings = GoogleGenerativeAIEmbeddings(
        google_api_key=api_key, 
        model="models/embedding-001"
    )

    client = weaviate.connect_to_weaviate_cloud(
        cluster_url=weaviate_url,
        auth_credentials=Auth.api_key(weaviate_api_key),
        skip_init_checks=True
    )

    vector_db = WeaviateVectorStore.from_documents(docs, embeddings, client=client)
    return vector_db

In [51]:
vector_db = setup_vector_database(docs)

In [None]:

from langchain_core.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser


def create_rag_chain(vector_db):
    """Creates a RAG (Retrieval-Augmented Generation) chain using Weaviate as the retriever."""

    
    template = """You are an assistant for question-answering tasks. 
    Use the following pieces of retrieved context to answer the question. 
    If you don't know the answer, just say that you don't know. 
    
    Question: {question} 
    Context: {context} 
    
    Answer:"""
    
    prompt = ChatPromptTemplate.from_template(template)
    retriever = vector_db.as_retriever(search_kwargs={"k": 5})
    
    rag_chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    
    return rag_chain


In [None]:
chain = create_rag_chain(vector_db)

In [62]:
result = chain.invoke("give me contact details of usman")
print(result)

Usman Asif is the Founder and CEO of Devsinc. You can reach him via LinkedIn:

https://www.linkedin.com/in/usman-asif-15038a22/
