### AI Chatbot


In [None]:
!nvidia-smi

Mon May  5 19:19:18 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   55C    P8             11W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
! pip install langchain openai faiss-cpu pypdf



In [None]:
# Load the Extracted Raw Text
import os

# Load the raw text file extracted using AWS Textract
with open("COMBINE_3.txt", "r", encoding="utf-8") as file:
    raw_text = file.read()

# Ensure the text is loaded properly
print("Loaded document with", len(raw_text.split()), "words.")

Loaded document with 318250 words.


In [None]:
#Chunk the Data for Processing
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Define a text splitter to break long documents into smaller, retrievable chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2500,  # Each chunk has 2500 characters
    chunk_overlap=300  # Overlapping text to maintain context
)

# Split raw text into chunks
docs = text_splitter.split_text(raw_text)

print(f"Total chunks created: {len(docs)}")

Total chunks created: 1152


In [None]:
#Create Embeddings and Store in FAISS
! pip install langchain_community
! pip install tiktoken
from langchain_community.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

# Load OpenAI embeddings (or use any other embedding model)
embedding_model = OpenAIEmbeddings(openai_api_key= "OpenAI_API_Key")

# Store document chunks in FAISS for fast retrieval
vectorstore = FAISS.from_texts(docs, embedding_model)

# Save the FAISS index for future use
vectorstore.save_local("faiss_cba_index")

In [None]:
#Build the Chatbot Using LangChain
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

# Load FAISS index
vectorstore = FAISS.load_local("faiss_cba_index", embedding_model, allow_dangerous_deserialization=True)

# Use GPT-4 model for high accuracy responses
llm = ChatOpenAI(model_name="gpt-4", openai_api_key="OpenAI_API_Key")

# Create a retrieval-based QA system
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),  # Retrieve top 3 relevant chunks
    chain_type="stuff"
)

# Function to interact with the chatbot
def chatbot(question):
    response = qa_chain.invoke(question)
    return response["result"]

In [None]:
# Chatbot accuracy

from sentence_transformers import SentenceTransformer, util
import numpy as np

# Load a sentence transformer model for semantic similarity
sbert_model = SentenceTransformer('all-MiniLM-L6-v2')

# Test cases with ground-truth answers
test_set = [
    {"query": "What is the blended rate for Boeing B756 Captain Pay Rates, Aircraft 764, in year 1?", "expected_answer": "486.69"},
    {"query": "My arrival time at FLL was delayed 4 hours. I was supposed to have a 15 hour layover and now it's 11 hours. What hotel should they provide?", "expected_answer": "a pilot scheduled for a layover should be furnished suitable single occupancy lodging in accordance with Section 4-C and the Hotel Guidelines. The hotel should be within 15 minutes normal driving time from the airport."}
]

# Evaluation function
def evaluate_chatbot_accuracy(test_data, chatbot_function, threshold=0.85):
    correct_count = 0
    total = len(test_data)
    detailed_results = []

    for item in test_data:
        question = item["query"]
        expected = item["expected_answer"]
        predicted = chatbot_function(question)

        # Semantic similarity using SBERT
        expected_emb = sbert_model.encode(expected, convert_to_tensor=True)
        predicted_emb = sbert_model.encode(predicted, convert_to_tensor=True)
        similarity_score = util.cos_sim(expected_emb, predicted_emb).item()

        is_correct = similarity_score >= threshold
        if is_correct:
            correct_count += 1

        detailed_results.append({
            "question": question,
            "expected": expected,
            "predicted": predicted,
            "similarity": round(similarity_score, 3),
            "is_correct": is_correct
        })

    accuracy_percent = (correct_count / total) * 100
    return round(accuracy_percent, 2), detailed_results

# Run the evaluation
accuracy, results = evaluate_chatbot_accuracy(test_set, chatbot)
print(f"\n✅ Chatbot Accuracy: {accuracy}%\n")

# Optional: Print detailed breakdown
for r in results:
    print(f"Q: {r['question']}\n✓ Expected: {r['expected']}\n✎ Predicted: {r['predicted']}\n→ Similarity: {r['similarity']} → {'✔️' if r['is_correct'] else '❌'}\n")


In [None]:
import warnings
import random

warnings.filterwarnings("ignore", category=UserWarning)

# Define greeting inputs and responses
GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up", "hey")
GREETING_RESPONSES = ["hi", "hey", "hi there", "hello", "I am glad! You are talking to me"]

def greeting(sentence):
    """Check if the user input contains a greeting."""
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)
    return None

'''def response(user_input):
    """Generate a response based on user input."""
    answer = chatbot(user_input)
    return(answer)'''

In [None]:
#Start the Chatbot

import ipywidgets as widgets
from IPython.display import display, HTML
import random
import warnings
import sys

warnings.filterwarnings("ignore", category=UserWarning)

def on_submit(_):
    """Handle user input and generate chatbot response."""
    query = input_box.value.strip()
    input_box.value = ""

    if query.lower() in ['bye', 'exit', 'stop']:
        display(HTML("<b><font color='red' style='font-size: 18px;'>Pilot_buddy:</font></b> <span style='font-size: 18px;'>Thank you for using Pilot_buddy. Have a great day!</span>"))
        return

    greeting_response = greeting(query)
    if greeting_response:
        answer = greeting_response
    else:
        answer = chatbot(query)

    display(HTML(f"<b style='font-size: 18px;'>User:</b> <span style='font-size: 18px;'>{query}</span>"))
    display(HTML(f"<b><font color='blue' style='font-size: 18px;'>Pilot_buddy:</font></b> <span style='font-size: 18px;'>{answer}</span>"))

# Display Welcome Message with Larger Font
display(HTML("<h3 style='font-size: 22px;'>🚀 Welcome to Pilot_buddy! Your AI assistant for CBAs. Type 'exit' to stop.</h3>"))

# Input Box for User Queries
input_box = widgets.Text(placeholder="Please enter your question...")
input_box.on_submit(on_submit)

display(input_box)

### GUI of Chabot

In [None]:
! pip install streamlit

In [None]:
!wget -q -O - ipv4.icanhazip.com

In [None]:
! streamlit run app.py & npx localtunnel --port 8501