In [14]:
import requests
from lxml import etree

# Replace this list with your actual list of URLs
list_of_urls = [
    'https://infinitalab.com/post-sitemap.xml',
    'https://infinitalab.com/post-sitemap2.xml',
    'https://infinitalab.com/post-sitemap3.xml',
    'https://infinitalab.com/post-sitemap4.xml',
    'https://infinitalab.com/page-sitemap.xml',
    'https://infinitalab.com/category-sitemap.xml',
    # Add more URLs as needed
]

output_file = 'links.txt'

with open(output_file, 'w') as file:
    for url in list_of_urls:
        # Fetch the content of the URL
        response = requests.get(url)
        if response.status_code == 200:
            # Parse the XML content
            root = etree.fromstring(response.content)
            # Find all <loc> tags and write their text content to the file
            for loc in root.findall('.//{http://www.sitemaps.org/schemas/sitemap/0.9}loc'):
                file.write(loc.text + '\n')

print(f'URLs extracted to {output_file}')

URLs extracted to extracted_urls.txt


None


In [2]:
pip install requests beautifulsoup4

Collecting beautifulsoup4
  Downloading beautifulsoup4-4.12.3-py3-none-any.whl (147 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.9/147.9 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting soupsieve>1.2
  Downloading soupsieve-2.5-py3-none-any.whl (36 kB)
Installing collected packages: soupsieve, beautifulsoup4
Successfully installed beautifulsoup4-4.12.3 soupsieve-2.5

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
BitsAndBytesConfig,
pipeline
)

import torch

import streamlit as st

import os

from langchain.llms import HuggingFacePipeline
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate, format_document
from langchain_core.runnables import RunnableLambda, RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain.memory import ConversationBufferMemory
from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string

from operator import itemgetter

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
def load_llm():

    #Loading the Mistral Model
    model_name='mistralai/Mistral-7B-Instruct-v0.2'
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16
    )


    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
    )

    # Building a LLM text-generation pipeline
    text_generation_pipeline = pipeline(
        model=model,
        tokenizer=tokenizer,
        task="text-generation",
        temperature=0.2,
        repetition_penalty=1.1,
        return_full_text=True,
        max_new_tokens=1024,
    )

    llm = HuggingFacePipeline(pipeline= text_generation_pipeline)

    return llm

In [5]:
def embeddings_model():
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-mpnet-base-v2"
    )
    return embeddings

In [6]:
def initialize_vectorstore():

    # Read URLs from the links.txt file and store them in a list
    with open('links.txt', 'r') as file:
        urls_list = [line.strip() for line in file if line.strip()]

    urls_list = list(set(urls_list))
    #Initializing a text_splitter
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=20,
        length_function=len,
        is_separator_regex=False,
    )

    #Loading all the content of the urls in docs format
    loader = WebBaseLoader(urls_list)
    docs = loader.load_and_split(text_splitter=text_splitter)

    vectorstore = FAISS.from_documents(
        docs, embedding=hf_embeddings
    )
    retriever = vectorstore.as_retriever(search_kwargs = {'k':10})
    return retriever

In [7]:
def return_chain_elements():

    #template to get the Standalone question
    _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

        Chat History:
        {chat_history}
        Follow Up Input: {question}
        Standalone question:
    """
    CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

    #Function to create the context from retrieved documents
    DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
    def _combine_documents(
        docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
    ):
        doc_strings = [format_document(doc, document_prompt) for doc in docs]
        return document_separator.join(doc_strings)

    #Creating the template for the final answer
    template = """Answer the question based only on the following context:
        {context}

        Question: {question}
    """
    ANSWER_PROMPT = ChatPromptTemplate.from_template(template)


    # Now we calculate the standalone question
    standalone_question = {
        "standalone_question": {
            "question": lambda x: x["question"],
            "chat_history": lambda x: get_buffer_string(x["chat_history"]),
        }
        | CONDENSE_QUESTION_PROMPT
        | llm
        | StrOutputParser(),
    }

    # Now we retrieve the documents
    retrieved_documents = {
        "docs": itemgetter("standalone_question") | vector_retriever,
        "question": lambda x: x["standalone_question"],
    }

    # Now we construct the inputs for the final prompt
    final_inputs = {
        "context": lambda x: _combine_documents(x["docs"]),
        "question": itemgetter("question"),
    }

    # And finally, we do the part that returns the answers
    answer = {
        "answer": final_inputs | ANSWER_PROMPT | llm,
        "docs": itemgetter("docs"),
    }

    return standalone_question, retrieved_documents, answer


In [8]:
llm = load_llm()

hf_embeddings = embeddings_model()

vector_retriever = initialize_vectorstore()

standalone_question, retrieved_documents, answer = return_chain_elements()

`low_cpu_mem_usage` was None, now set to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 3/3 [00:10<00:00,  3.57s/it]


In [9]:
conversational_memory = ConversationBufferMemory(
        return_messages=True, output_key="answer", input_key="question"
    )

In [10]:
loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(conversational_memory.load_memory_variables) | itemgetter("history"),
)

In [11]:
chain = loaded_memory | standalone_question | retrieved_documents | answer

In [12]:
inputs = {"question": "How do I start a GPU node?"}

In [13]:
result = chain.invoke(inputs)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [14]:
result

{'answer': '\n\n\n\nAnswer:\nTo start a GPU node using the E2E Networks Myaccount portal, follow these steps:\n\n1. Log in to your E2E Networks account at https://myaccount.e2enetworks.com/.\n2. Navigate to the "Compute" tab and select "GPU" from the dropdown menu.\n3. Click the "Create" button to initiate the creation of a new GPU node.\n4. Choose the desired GPU instance type and specify any additional settings or configurations.\n5. Once the node has been created, you\'ll be taken to its details page. Here, you can find the public IP address and other relevant information.\n6. To start the node, click the "Start" button located near the top of the page. The node will begin booting up and should be ready to use within a few minutes.\n\nIf you encounter any issues during the process, please refer to the E2E Networks documentation or contact our support team for assistance.',
 'docs': [Document(page_content='How to Launch GPU H100 notebook | E2E Networks  documentation\n\n\n\n\n\n\n\n\

In [39]:
result["docs"]

[Document(page_content='How to Launch GPU H100 notebook | E2E Networks  documentation\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n      Skip to content\n    \n\n\n\nE2E Cloud\n\nDocs\nE2E Networks\n\n\n\n\nToggle navigation menu\n\n\n\n\n\n              Login\n            \n\n\n\n              Sign Up\n            \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nE2E Networks  documentation\n\n\n\n\n\n⌘\n    K\n  \n\n\n\nDocs\nE2E Networks\n\nMyaccount\n\n        Getting Started with Myaccount\nSignUp Process for Indian Customers\nCustomer validation Process for Indian Customers\nSignUp Process for International Customers\nCustomer Validation Process for International Process\nCustomer Validation Process for Contact Persons\nDomestic Customer Validation Process FAQs\nInternational Customer Validation Process FAQs\nSign In Process\n\n\n   Release Notes\n\nCompute\n\nNodes\n Virtual Compute Nodes\n Monitoring\n 1-Click Deployment\n Active Directory\n\n\nGPU\nGPU Cloud\n\n\nEQS\nIntroduction\nHow to Create 

In [50]:
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt).replace('\n','') for doc in docs]

    return document_separator.join(doc_strings)

In [51]:
_combine_documents(docs[1:4])

'E2E Networks Documentation | E2E Networks  documentation      Skip to content    E2E CloudDocsE2E NetworksToggle navigation menu              Login                          Sign Up            E2E Networks  documentation⌘    K  DocsE2E NetworksMyaccount        Getting Started with MyaccountSignUp Process for Indian CustomersCustomer validation Process for Indian CustomersSignUp Process for International CustomersCustomer Validation Process for International ProcessCustomer Validation Process for Contact PersonsDomestic Customer Validation Process FAQsInternational Customer Validation Process FAQsSign In Process   Release NotesComputeNodes Virtual Compute Nodes Monitoring 1-Click Deployment Active DirectoryGPUGPU CloudEQSIntroductionHow to Create EQS ?ActionsAdd Queue under tabActions for queue service:Using SDKApplianceLoad Balancer ApplianceAuto ScalingIntroductionConceptsDefine Scale GroupsFaaSFunction as a Service (FaaS)How to Create FunctionsFunctions InformationNetworkCDNVPCHow to

In [25]:
final_answer = (
    result["answer"] + "\n\n"
    "For further assistance follow the links below:\n"
    f"1. {result['docs'][0].metadata['source']}\n"
    f"2. {result['docs'][1].metadata['source']}\n"
    f"3. {result['docs'][2].metadata['source']}"
)

In [27]:
from IPython.display import display, Markdown


display(Markdown(final_answer))






Answer:
To start a GPU node using the E2E Networks Myaccount portal, follow these steps:

1. Log in to your E2E Networks account at https://myaccount.e2enetworks.com/.
2. Navigate to the "Compute" tab and select "GPU" from the dropdown menu.
3. Click the "Create" button to initiate the creation of a new GPU node.
4. Choose the desired GPU instance type and specify any additional settings or configurations.
5. Once the node has been created, you'll be taken to its details page. Here, you can find the public IP address and other relevant information.
6. To start the node, click the "Start" button located near the top of the page. The node will begin booting up and should be ready to use within a few minutes.

If you encounter any issues during the process, please refer to the E2E Networks documentation or contact our support team for assistance.

For further assistance follow the links below:
1. https://docs.e2enetworks.com/AI_ML/gpu_notebook_h100.html
2. https://docs.e2enetworks.com/gpucloud/index.html
3. https://docs.e2enetworks.com/gpucloud/index.html