In [6]:
!head Ecommerce_FAQ_Chatbot_dataset.json

{
  "questions": [
  {
  "question": "How can I create an account?",
  "answer": "To create an account, click on the 'Sign Up' button on the top right corner of our website and follow the instructions to complete the registration process."
  },
  {
  "question": "What payment methods do you accept?",
  "answer": "We accept major credit cards, debit cards, and PayPal as payment methods for online orders."
  },


## Generating unique ID's

In [2]:
# Re-load the document since it was not defined in this new code execution.
import json
import hashlib

with open('Ecommerce_FAQ_Chatbot_dataset.json', 'rt') as f_in:
    documents= json.load(f_in)

def generate_document_id(doc):
    combined = f"{doc['question']}-{doc['answer'][:10]}"
    hash_object = hashlib.md5(combined.encode())
    hash_hex = hash_object.hexdigest()
    document_id = hash_hex[:8]
    return document_id

for qa_pair in documents['questions']:
    doc_id = generate_document_id(qa_pair)
    qa_pair['id'] = doc_id  # Add the generated ID to the question-answer pair


In [6]:
hashes = []

for doc in documents['questions']:
    hashes.append(doc)

In [27]:
hashes[0]

{'question': 'How can I create an account?',
 'answer': "To create an account, click on the 'Sign Up' button on the top right corner of our website and follow the instructions to complete the registration process.",
 'id': 'a5825c73'}

In [24]:
# check if hash unique hash id count and len of questions ar equal
len(hash_list) == len(documents['questions'])

True

In [30]:
with open('documents-with-ids.json', 'wt') as f_out:
    json.dump(hashes, f_out, indent=2)

In [31]:
!head documents-with-ids.json

[
  {
    "question": "How can I create an account?",
    "answer": "To create an account, click on the 'Sign Up' button on the top right corner of our website and follow the instructions to complete the registration process.",
    "id": "a5825c73"
  },
  {
    "question": "What payment methods do you accept?",
    "answer": "We accept major credit cards, debit cards, and PayPal as payment methods for online orders.",
    "id": "db025979"


## Chunking

In [203]:
from openai import OpenAI
from dotenv import load_dotenv

In [204]:
load_dotenv()

True

In [205]:
import json
with open("documents-with-ids.json",'rt') as f_out:
    documents= json.load(f_out)

In [206]:
documents[0]

{'question': 'How can I create an account?',
 'answer': "To create an account, click on the 'Sign Up' button on the top right corner of our website and follow the instructions to complete the registration process.",
 'id': 'a5825c73'}

In [207]:
prompt=f"""Review {documents} and create output a new .json file that contains a key which categorises the type of question
            based on the topics they belong to, some high level topics are:
                1. Account & Registration
                2. Payments & Pricing
                3. Shipping & Delivery
                4. Returns & Refunds
                5. Order Management
                6. Customer Support & Services
                7. Product Information
                
            The topics above must be introduced as a new key "topic" for each question, answer pair.
            
            return only the contents found in between ```json``` nothing else.
"""

In [208]:

client= OpenAI()
model= "gpt-4o-mini"
response= client.chat.completions.create(
        model=model,
       messages=[
        # {"role": "system", "content": "You are a helpful assistant."},
        {"role":"user", "content": prompt}
    ],
)

In [209]:
def format_output(response):
    data_to_view = {
        "response": response.choices[0].message.content.replace("```json",'').replace('```','')
    }
    return data_to_view['response']

response_string = format_output(response)

In [213]:
dat = json.loads(response_string)

In [221]:
dat_df = pd.DataFrame(dat)

dat_df.head()

Unnamed: 0,question,answer,id,topic
0,How can I create an account?,"To create an account, click on the 'Sign Up' b...",a5825c73,Account & Registration
1,What payment methods do you accept?,"We accept major credit cards, debit cards, and...",db025979,Payments & Pricing
2,How can I track my order?,You can track your order by logging into your ...,f936d8f6,Order Management
3,What is your return policy?,Our return policy allows you to return product...,ae7b5c39,Returns & Refunds
4,Can I cancel my order?,You can cancel your order if it has not been s...,0ea444c1,Order Management


## Indexing

In [227]:
dat_df.columns

Index(['question', 'answer', 'id', 'topic'], dtype='object')

In [229]:
import minsearch

In [234]:
index= minsearch.Index(
    text_fields = ['question', 'answer', 'id', 'topic'],
    keyword_fields = []
)

In [244]:
query = "How can I track an order"

In [239]:
dco = dat_df.to_dict(orient='records')


In [240]:
index.fit(dco)

<minsearch.Index at 0x13bd6a1d0>

In [245]:
index.search(query, num_results=3)

[{'question': 'How can I track my order?',
  'answer': "You can track your order by logging into your account and navigating to the 'Order History' section. There, you will find the tracking information for your shipment.",
  'id': 'f936d8f6',
  'topic': 'Order Management'},
 {'question': 'Can I request an invoice for my order?',
  'answer': 'Yes, an invoice is usually included with your order. If you require a separate invoice, please contact our customer support team with your order details.',
  'id': 'f26b624d',
  'topic': 'Order Management'},
 {'question': 'Can I change or cancel an item in my order?',
  'answer': 'If you need to change or cancel an item in your order, please contact our customer support team as soon as possible. We will assist you with the necessary steps.',
  'id': '39134f05',
  'topic': 'Order Management'}]

## RAG Flow

In [253]:
entry_template= """
question:{question}
answer:{answer}
topic:{topic}
""".strip()

prompt_template = """You are a customer service representative for an e-commerce website. Answer the QUESTION based on the CONTEXT from our frequently asked queries database.
Use only facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: {context}
""".strip()

In [252]:
def search(query):
    boost = {'question': 3.0, "topic":0.5}
    
    results = index.search(query=query,
                           boost_dict=boost,
                           num_results=10)
    return results

def build_prompt(query, search_results):
    context= ""
    
    for doc in search_results:
        context= context + entry_template.format(**doc) + "\n\n"
        
    prompt= prompt_template.format(question=query, context= context).strip()
    
    return prompt

def llm(prompt):
    
    reponse= client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role":"user","content":prompt}]
    )
    
    return reponse.choices[0].message.content

def rag(query):
    results= search(query)
    prompt= build_prompt(query=query,search_results=results)
    response= llm(prompt=prompt)
    return response

In [258]:
query= "I don't understand how to track my order"
answer= rag(query)
print(answer)

You can track your order by logging into your account on our e-commerce website. Once you're logged in, navigate to the 'Order History' section. In that section, you can find the tracking information for your shipment.
