## Libraries

In [None]:
!pip install langchain langchain_core langchain_community langchain-huggingface torch accelerate bitsandbytes docarray unstructured

## Requirements

Install the requirements and import relevant modules

In [67]:
import os
import pandas as pd
from pandas import DataFrame
import json
from langchain_core.prompts import ChatPromptTemplate

## Environment Variables and Constants

Set the API keys and environment variables required for running the app

In [71]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = ""
os.environ['LANGCHAIN_API_KEY'] = ""
os.environ["OPENAI_API_KEY"] = ""

# The hugging face cahces is the location
# where the models will be downloaded. It is 
# recommended to set it in a location which has
# sufficient storage space

os.environ["HF_HUB_CACHE"] = ""
os.environ["HF_HOME"] = ""

TEMPLATE = """
You are an assistant for question-answering tasks.
If you don't know the answer, just say that you don't know.
Use three sentences maximum and keep the answer concise.
Question: {question}
Answer:
"""

## Utitlity Functions

Utitlity functions to perform different operations like loading data, formatting data etc.

In [77]:
def load_excel_to_dataframe(file_path: str, header=0, index_col=0, reset_index=False) -> DataFrame:
    """
    Loads the remediation table into a pandas dataframe.

    Args:
        * file_path (str): The path to the remidations excel file
        * header (int): the row to set as header row
        * index_col: column ids. provide if there are rows with multi level sub subrows

    Returns:
        A pandas `DataFrame` object with the loaded data
    """

    df = pd.read_excel(file_path, header=header, index_col=index_col)
    df = df.reset_index()
    return df

def convert_df_to_json(df: DataFrame) -> json:
    """
    Converts a pandas `DataFrame` to json format

    Args:
        * df (DataFrame): A panads `DataFrame` object with required data

    Returns:
        json data
    """

    json_data = df.to_json(orient="records", indent=4)
    return json_data

def set_prompt(template) -> ChatPromptTemplate:
    """
    Set up the chat prompt to be used with the model

    Args:
        * template (str): The prompt template to use

    Returns:
        `ChatPromptTemplate` object
    """

    prompt = ChatPromptTemplate.from_template(template)
    return prompt

In [63]:
remediations_df = load_excel_to_dataframe("./data/Remediations.xlsx", header=2, index_col=[0,1,2,3,4])
scenarios_df = load_excel_to_dataframe("./data/Scenarios.xlsx")

In [64]:
scenarios_json = convert_df_to_json(scenarios_df)
remediations_json = convert_df_to_json(remediations_df)