In [None]:
!pip install langchain langchain_groq langchain_community langchain-huggingface langchain-pinecone

Collecting langchain
  Downloading langchain-0.3.4-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.12 (from langchain)
  Downloading langchain_core-0.3.13-py3-none-any.whl.metadata (6.3 kB)
Collecting langchain-text-splitters<0.4.0,>=0.3.0 (from langchain)
  Downloading langchain_text_splitters-0.3.0-py3-none-any.whl.metadata (2.3 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.137-py3-none-any.whl.metadata (13 kB)
Collecting pydantic<3.0.0,>=2.7.4 (from langchain)
  Downloading pydantic-2.9.2-py3-none-any.whl.metadata (149 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.4/149.4 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.4.0,>=0.3.12->langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting httpx<1,>=0.23.0 (from langsmith<0.2.0,>=0.1.17->langchain)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
C

In [None]:
!pip install -qU \
    openai==0.27.7 \
    pinecone-client==3.1.0 \
    pinecone-datasets==0.7.0 \
    tqdm \
    pinecone-notebooks==0.1.1

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m152.2/152.2 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.0/211.0 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.3/78.3 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m169.0/169.0 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.9/34.9 MB[0m [31m47.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m73.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import json
from datetime import datetime
from typing import Dict, List

from langchain.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.schema import Document



def format_datetime(date_str: str) :

    dt = datetime.fromisoformat(date_str.replace('Z', '+00:00'))
    return dt.strftime("%Y-%m-%d %H:%M")

def create_documents(booking_data: Dict) :

    documents = []
    user_id = booking_data['user']['id']

    for flight in booking_data['user']['flights']:
        # Main flight details document
        flight_text = (
            f"Flight Booking Details:\n"
            f"PNR: {flight['pnr']}\n"
            f"Class: {flight['class']}\n"
            f"From: {flight['source']}\n"
            f"To: {flight['destination']}\n"
            f"Departure: {format_datetime(flight['departure_date'])}\n"
            f"Arrival: {format_datetime(flight['arrival_date'])}\n"
            f"Layover Duration: {flight['layover_duration']}"
        )

        documents.append(Document(
            page_content=flight_text,
            metadata={
                'type': 'flight',
                'user_id': user_id,
                'ticket_id': flight['ticket_id'],
                'pnr': flight['pnr']
            }
        ))

        # Process each flight segment
        for idx, segment in enumerate(flight['segments']):
            segment_text = (
                f"Flight Segment {idx + 1}:\n"
                f"Flight Number: {segment['flight_number']}\n"
                f"From: {segment['departure']['airport']} ({segment['departure']['iata']})\n"
                f"To: {segment['arrival']['airport']} ({segment['arrival']['iata']})\n"
                f"Departure: {format_datetime(segment['departure']['date'])}\n"
                f"Arrival: {format_datetime(segment['arrival']['date'])}\n\n"
                f"Passenger Details for this segment:"
            )

            # Add passenger details
            for passenger in segment['passengers']:
                segment_text += f"\n- {passenger['first_name']} {passenger['last_name']}:"
                segment_text += f"\n  Seat: {passenger['seat_number']}"
                segment_text += f"\n  Cabin Baggage: {passenger['cabin_baggage']}"
                segment_text += f"\n  Check-in Baggage: {passenger['check_in_baggage']}"

            documents.append(Document(
                page_content=segment_text,
                metadata={
                    'type': 'segment',
                    'user_id': user_id,
                    'ticket_id': flight['ticket_id'],
                    'pnr': flight['pnr'],
                    'flight_number': segment['flight_number'],
                    'segment_number': idx
                }
            ))

    return documents


In [None]:
import os

if not os.environ.get("PINECONE_API_KEY"):
    from pinecone_notebooks.colab import Authenticate
    Authenticate()

In [None]:
embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')



In [None]:
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore


api_key = os.environ.get("PINECONE_API_KEY")

# configure client
pc = Pinecone(api_key=api_key,embeddings=embeddings)

In [None]:
from pinecone import ServerlessSpec

cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
region = os.environ.get('PINECONE_REGION') or 'us-east-1'

spec = ServerlessSpec(cloud=cloud, region=region)

In [None]:
index_name = 'havahavai1'

In [None]:
# check if index already exists (it shouldn't if this is first time)
if index_name not in pc.list_indexes().names():
    # if does not exist, create index
    pc.create_index(
        index_name,
        dimension=384,  # dimensionality of text-embedding-ada-002
        metric='cosine',
        spec=spec
    )
# connect to index
index = pc.Index(index_name)
# view index stats
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [None]:
with open("/content/Journey_Details.json", 'r') as f:
        booking_data = json.load(f)

documents = create_documents(booking_data)

In [None]:
print(documents)

[Document(metadata={'type': 'flight', 'user_id': 227, 'ticket_id': 3183, 'pnr': 'HZAVJJ'}, page_content='Flight Booking Details:\nPNR: HZAVJJ\nClass: ECONOMY\nFrom: Cape Town International Airport (CPT)\nTo: Indira Gandhi International Airport (DEL)\nDeparture: 2024-07-11 14:35\nArrival: 2024-07-12 08:10\nLayover Duration: 55m'), Document(metadata={'type': 'segment', 'user_id': 227, 'ticket_id': 3183, 'pnr': 'HZAVJJ', 'flight_number': 'ET846', 'segment_number': 0}, page_content='Flight Segment 1:\nFlight Number: ET846\nFrom: Cape Town International Airport (CPT)\nTo: Addis Ababa Bole International Airport (ADD)\nDeparture: 2024-07-11 14:35\nArrival: 2024-07-11 22:00\n\nPassenger Details for this segment:\n- surendra singh:\n  Seat: 21a\n  Cabin Baggage: 7kg\n  Check-in Baggage: 23kg\n- narinder kaur:\n  Seat: 21b\n  Cabin Baggage: 7kg\n  Check-in Baggage: 23kg\n- samik singh:\n  Seat: 21c\n  Cabin Baggage: 7kg\n  Check-in Baggage: 23kg'), Document(metadata={'type': 'segment', 'user_id'

In [None]:
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)

In [None]:
# documents = create_documents(booking_data)
vectorstore.add_documents(documents)

['f2d5832a-5a44-419e-a112-fd2f477949e2',
 'db355bcb-4de4-4f4b-ba2a-bc5b402e9a2d',
 '0bad9075-85df-4f4d-8bb1-d97d50a71a56']

## Retrieval

In [None]:
from langchain_groq import ChatGroq

# get api key from platform.openai.com
groq_api_key = os.getenv('GROQ_API_KEY') or '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'



In [None]:
query = (
    "What time is my flight from Cape Town to Addis Ababa, and what's the arrival time? "
)

res = embeddings.embed_query(query)
sim=vectorstore.similarity_search(query)

print(sim)

[Document(id='db355bcb-4de4-4f4b-ba2a-bc5b402e9a2d', metadata={'flight_number': 'ET846', 'pnr': 'HZAVJJ', 'segment_number': 0.0, 'ticket_id': 3183.0, 'type': 'segment', 'user_id': 227.0}, page_content='Flight Segment 1:\nFlight Number: ET846\nFrom: Cape Town International Airport (CPT)\nTo: Addis Ababa Bole International Airport (ADD)\nDeparture: 2024-07-11 14:35\nArrival: 2024-07-11 22:00\n\nPassenger Details for this segment:\n- surendra singh:\n  Seat: 21a\n  Cabin Baggage: 7kg\n  Check-in Baggage: 23kg\n- narinder kaur:\n  Seat: 21b\n  Cabin Baggage: 7kg\n  Check-in Baggage: 23kg\n- samik singh:\n  Seat: 21c\n  Cabin Baggage: 7kg\n  Check-in Baggage: 23kg'), Document(id='f2d5832a-5a44-419e-a112-fd2f477949e2', metadata={'pnr': 'HZAVJJ', 'ticket_id': 3183.0, 'type': 'flight', 'user_id': 227.0}, page_content='Flight Booking Details:\nPNR: HZAVJJ\nClass: ECONOMY\nFrom: Cape Town International Airport (CPT)\nTo: Indira Gandhi International Airport (DEL)\nDeparture: 2024-07-11 14:35\nArr

We write some functions to handle the retrieval and completion steps:

In [None]:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.memory import ConversationBufferMemory

from langchain.prompts import ChatPromptTemplate

In [None]:

def create_qa_chain(vectorstorePineconeVectorStore,groq_api_key):
    """Create the QA chain with custom prompt"""
    # Initialize language model
    llm = ChatGroq(
        api_key=groq_api_key,
        temperature=0,
        model_name="llama-3.1-8b-instant"
    )

    # Create conversation memory
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        return_messages=True
    )

    # Define custom prompt template
    prompt_template = """You are a helpful flight booking assistant. Use the following pieces of context to answer the user's question.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    Always maintain a professional and friendly tone.
    Format dates and times in a readable way.
    If multiple passengers are involved, list their details clearly.
    For questions about baggage, specify both cabin and check-in allowances.
    For layover questions, include both the location and duration.

    Context: {context}

    Chat History: {chat_history}

    Question: {question}

    Assistant: """

    PROMPT = ChatPromptTemplate.from_template(prompt_template)

    return ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(
            search_type="similarity",
            search_kwargs={"k": 1}
        ),
        memory=memory,
        combine_docs_chain_kwargs={"prompt": PROMPT}
    )

In [None]:
qa_chain = create_qa_chain(vectorstore, groq_api_key)

In [None]:
def get_chatbot_response(question: str, qa_chain: ConversationalRetrievalChain) -> str:
    """Get response from the chatbot for a given question"""
    response = qa_chain.invoke({"question": question})
    print(vectorstore.similarity_search(question))
    return response['answer']

In [None]:
questions = [
    "What time do I arrive in Delhi?",
    "Where is my layover?",
    "What’s my seat for the first flight?",
    "Do I have checked baggage?"
]

for question in questions:
    response = get_chatbot_response(question, qa_chain)
    print(f"\nQ: {question}")
    print(f"A: {response}\n")

[Document(id='f2d5832a-5a44-419e-a112-fd2f477949e2', metadata={'pnr': 'HZAVJJ', 'ticket_id': 3183.0, 'type': 'flight', 'user_id': 227.0}, page_content='Flight Booking Details:\nPNR: HZAVJJ\nClass: ECONOMY\nFrom: Cape Town International Airport (CPT)\nTo: Indira Gandhi International Airport (DEL)\nDeparture: 2024-07-11 14:35\nArrival: 2024-07-12 08:10\nLayover Duration: 55m'), Document(id='0bad9075-85df-4f4d-8bb1-d97d50a71a56', metadata={'flight_number': 'ET686', 'pnr': 'HZAVJJ', 'segment_number': 1.0, 'ticket_id': 3183.0, 'type': 'segment', 'user_id': 227.0}, page_content='Flight Segment 2:\nFlight Number: ET686\nFrom: Addis Ababa Bole International Airport (ADD)\nTo: Indira Gandhi International Airport (DEL)\nDeparture: 2024-07-11 22:55\nArrival: 2024-07-12 08:10\n\nPassenger Details for this segment:\n- surendra singh:\n  Seat: 21a\n  Cabin Baggage: 7kg\n  Check-in Baggage: 23kg\n- narinder kaur:\n  Seat: 21b\n  Cabin Baggage: 7kg\n  Check-in Baggage: 23kg\n- samik singh:\n  Seat: 21

---