## 1. Install Required Libraries and Tools

In [19]:
!pip install ollama langchain chromadb gradio discord.py PyPDF2 langchain_community gradio



In [20]:
# import packages
import os 
import PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
import ollama

## 2. Set Up Ollama and Download LLaMA 3 Mode

In [21]:
# Download the LLaMA 3.1 model (8B parameters)
# RUn this command in the terminal
# ollama pull llama3.1


# Download the text embedding model
# ollama pull nomic-embed-text

## 3. Load and Split Data from PDF Documents

In [22]:
# Function to read PDF documents
def load_pdf_text(pdf_path):
    pdf_reader = PyPDF2.PdfReader(pdf_path)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text


# Specify the directory containing the PDF files
pdf_directory = "./input_data"

# Get a list of all PDF files in the directory
pdf_paths = [os.path.join(pdf_directory, file) for file in os.listdir(pdf_directory) if file.endswith('.pdf')]


# Extract the text
docs = [load_pdf_text(pdf) for pdf in pdf_paths]

# Combine all text into a single document
combined_text = "\n".join(docs)


# Split the combined text into chunks for better retrieval
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_text(combined_text)

## 4. Create Embeddings and Vector Store

In [23]:
# from langchain.vectorstores import Chroma
# from langchain.embeddings import OllamaEmbeddings

from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings

# Create Ollama embeddings
embeddings = OllamaEmbeddings(model="llama3.1")

# Create a vector store from the documents and embeddings
vectorstore = Chroma.from_texts(texts=splits, embedding=embeddings)

## 5. Define Functions for Retrieval-Augmented Generation (RAG)

In [27]:
# Function to interact with the LLaMA 3 model using Ollama
def ollama_llm(question, context):
    formatted_prompt = f"Question: {question}\n\nContext: {context}"
    response = ollama.chat(model='llama3.1', messages=[
        {'role': 'system', 'content': 'You are a helpful ai assistant that answers SQL queries'},
        {'role': 'user', 'content': 'What is SQL in full?'},
        {'role': 'assistant', 'content': ' Structured Query Language'},
        {'role': 'user', 'content': formatted_prompt}
        ])
    return response['message']['content']

# Convert retrieved documents into a single formatted context string
def combine_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# RAG chain to get answers
def rag_chain(question):
    # Retrieve relevant documents
    retriever = vectorstore.as_retriever()
    retrieved_docs = retriever.invoke(question)
    
    # Combine retrieved documents into context
    formatted_context = combine_docs(retrieved_docs)
    
    # Generate answer using LLaMA 3 model
    return ollama_llm(question, formatted_context)

# Test the RAG setup
result = rag_chain("What SQL")
print(result)

Based on the context, it appears that there is no SQL query being asked.

However, I can tell you that "What us SQL in full?" is a question asking for the full form of Structured Query Language. 

The answer is: **S**tructured **Q**uery **L**anguage.


## Optional:  Build a chatbot UI

In [7]:
import gradio as gr
# Gradio interface
iface = gr.Interface(
    fn=rag_chain,
    inputs=["text"],
    outputs="text",
    title="Harry Maguire Bot",
    description="A bot that answers questions about Harry Maguire as per his Wikipedia."
)

# Launch the app
iface.launch()

  from .autonotebook import tqdm as notebook_tqdm


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




## Deploying the Discord Bot

In [None]:
# After deployment to AWS EC2, follow the following steps

# # Run the Bot:
# python3 your_bot_script.py


# # Keep the Bot Running:
# sudo apt install tmux -y
# tmux new -s discordbot
# python3 your_bot_script.py


# # You can detach from the tmux session by pressing Ctrl + B, then D. To reattach, use:
# tmux attach -t discordbot


# # Create a Systemd Service File:
# sudo nano /etc/systemd/system/discordbot.service


# # Add the following content:
# [Unit]
# Description=Discord Bot

# [Service]
# WorkingDirectory=/home/ubuntu/yourbotrepo
# ExecStart=/usr/bin/python3 /home/ubuntu/yourbotrepo/your_bot_script.py
# Restart=always
# User=ubuntu

# [Install]
# WantedBy=multi-user.target


# # Enable and Start the Service:
# sudo systemctl enable discordbot.service
# sudo systemctl start discordbot.service
