Full Pipeline of the NLP Project Titled:
# Emotionally Aware Chatbot

In [None]:
!pip install --upgrade transformers accelerate einops

Collecting transformers
  Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.24.1-py3-none-any.whl (261 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.4/261.4 kB[0m [31m33.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting einops
  Downloading einops-0.7.0-py3-none-any.whl (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m32.6 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (

In [None]:
import numpy as np
import tensorflow as tf

# for falcon
import transformers
from transformers import AutoTokenizer,AutoModelForCausalLM
import torch

In [None]:
falcon_model = "tiiuae/falcon-7b-instruct"

falcon_tokenizer = AutoTokenizer.from_pretrained(falcon_model)

Downloading (…)okenizer_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

In [None]:
pipeline=transformers.pipeline(
    "text-generation",
    model=falcon_model,
    tokenizer=falcon_tokenizer,
    torch_dtype=torch.bfloat16,
    # trust_remote_code=True,
    device_map="auto",
)

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/16.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00002.bin:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00002.bin:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

In [None]:
# For bert
from transformers import BertTokenizer, TFBertForSequenceClassification

In [None]:
# Drive Mount
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Initialize BERT tokenizer and model architecture
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
loaded_bert = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=7)

# Load saved weights
# TODO - CHANGE THIS PATH
weights_path = '/content/drive/MyDrive/NLP Project/models/bert_weights_priyanshu_3epoch'
loaded_bert.load_weights(weights_path)

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7fac89f87fa0>

In [None]:
# For inverse encoding
number_to_sentiment = {
    0: 'sadness',
    1: 'anger',
    2: 'love',
    3: 'surprise',
    4: 'fear',
    5: 'joy',
    6: 'neutral'
}

def tokenize_text(text):
    encoded_text = bert_tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=64,  # Adjust as needed
        pad_to_max_length=True,
        return_attention_mask=True,
        return_tensors='tf'  # Use TensorFlow tensors
    )
    return {'input_ids': encoded_text['input_ids'], 'attention_mask': encoded_text['attention_mask']}


def get_emotion(text):
  '''Use fine-tuned Bert model for emotion classification. Returns emotion in string format.'''
  tokenized_text = tokenize_text(text)
  predicted_label = loaded_bert.predict(tokenized_text)
  predicted_label = np.argmax(predicted_label.logits, axis=1).item()
  # Map the numerical label back to the original emotion category
  predicted_emotion = number_to_sentiment[predicted_label]
  return predicted_emotion

In [None]:
def ask_chatbot(prompt:str):
  '''Talk with Falcon-7B'''
  sequences = pipeline(
      prompt,
      max_length=200,
      do_sample=True,
      top_k=10,
      num_return_sequences=1,
      eos_token_id=falcon_tokenizer.eos_token_id,
      )

  return sequences[0]['generated_text'].replace("\n", " ").replace(prompt, "")

In [None]:
def get_ans(prompt: str):
  '''This function modifies the prompt based on sentiment and returns Falcon-7B's response to the modified prompt.'''

  # Get emotion in string using Bert
  emotion = get_emotion(prompt)

  print(f"Predicted emotion: {emotion}\n")

  modified_prompt = f"I am {emotion}. Address my feelings and then reply to the following question: {prompt}"

  return ask_chatbot(modified_prompt)

In [None]:
prompt = "Your customer service is so pathetic. Just tell me what is your return policy."
get_ans(prompt)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.




Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Predicted emotion: neutral



The current implementation of Falcon calls `torch.scaled_dot_product_attention` directly, this will be deprecated in the future in favor of the `BetterTransformer` API. Please install the latest optimum library with `pip install -U optimum` and call `model.to_bettertransformer()` to benefit from `torch.scaled_dot_product_attention` and future performance optimizations.


' I apologize for the poor quality of your customer service, as I understand that it may have negatively impacted your shopping experience. To address your concerns, our return policy allows customers to return or exchange items within 30 days of purchase, provided they are in their original condition and packaging.'

In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_excel('/content/drive/MyDrive/NLP Project/questions_for_nlp_project.xlsx')
df

Unnamed: 0,user_prompt,modified_prompt
0,"I wasn't expecting to get a refund, especially...",I feel surprise. Address my feelings and then ...
1,"Wait, my order was upgraded to express shippin...",I feel surprise. Address my feelings and then ...
2,I'm scared that my account has been compromise...,I feel fear. Address my feelings and then answ...
3,I'm concerned about my privacy. How do you pro...,I feel fear. Address my feelings and then answ...
4,You've charged me twice for the same product! ...,I feel anger. Address my feelings and then ans...
5,"Every time I use your service, there's a probl...",I feel anger. Address my feelings and then ans...
6,I've been on hold for ages and still no resolu...,I feel anger. Address my feelings and then ans...
7,Your pricing is outrageous! This is highway ro...,I feel anger. Address my feelings and then ans...
8,Wow! My issue was resolved so quickly! I'm thr...,I feel joy. Address my feelings and then answe...
9,Every interaction with your support team has b...,I feel joy. Address my feelings and then answe...


In [None]:
# Apply the ask_chatbot function to each row and store the results in new columns
df['original_response'] = df['user_prompt'].apply(ask_chatbot)
df['modified_response'] = df['modified_prompt'].apply(ask_chatbot)

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_tok

In [None]:
df

Unnamed: 0,user_prompt,modified_prompt,original_response,modified_response
0,"I wasn't expecting to get a refund, especially...",I feel surprise. Address my feelings and then ...,It was great. I'm a long time shopper and cus...,It's understandable to feel surprised and exc...
1,"Wait, my order was upgraded to express shippin...",I feel surprise. Address my feelings and then ...,How does that work? It depends on the shippin...,I'm delighted to hear that your order was upg...
2,I'm scared that my account has been compromise...,I feel fear. Address my feelings and then answ...,I'm sorry to hear that you're concerned. Ther...,It's normal to feel scared when you suspect a...
3,I'm concerned about my privacy. How do you pro...,I feel fear. Address my feelings and then answ...,"As an AI language model, I am not capable of ...",It's important to protect your personal infor...
4,You've charged me twice for the same product! ...,I feel anger. Address my feelings and then ans...,"I am not sure about your specific situation, ...",! I understand that you are dissatisfied with ...
5,"Every time I use your service, there's a probl...",I feel anger. Address my feelings and then ans...,We're sorry to hear that there's been an issu...,"I understand your frustration, and it's not a..."
6,I've been on hold for ages and still no resolu...,I feel anger. Address my feelings and then ans...,I'm sorry to hear that. Can you provide me wi...,It's completely reasonable to feel angry in t...
7,Your pricing is outrageous! This is highway ro...,I feel anger. Address my feelings and then ans...,Sorry to hear that. Can you please share the ...,I'm sorry to hear that you feel this way. It'...
8,Wow! My issue was resolved so quickly! I'm thr...,I feel joy. Address my feelings and then answe...,We're glad to hear that we were able to resol...,I'm glad that your issue was resolved swiftly...
9,Every interaction with your support team has b...,I feel joy. Address my feelings and then answe...,Thank you! I have a question about how to use...,I am thrilled to hear that every interaction ...


In [None]:
# Calculate the average length of entries in the 'user_prompt' column
average_length = df['original_response'].apply(lambda x: len(x)).mean()

# Display the average length
print("Average Length:", average_length)

Average Length: 329.42105263157896


In [None]:
# Calculate the average length of entries in the 'user_prompt' column
average_length = df['modified_response'].apply(lambda x: len(x)).mean()

# Display the average length
print("Average Length:", average_length)

Average Length: 323.2105263157895


## Same Pipeline Using HuggingFace API for Falcon.

In [None]:
!pip install --upgrade transformers accelerate einops

Collecting transformers
  Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m21.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.24.1-py3-none-any.whl (261 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.4/261.4 kB[0m [31m33.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting einops
  Downloading einops-0.7.0-py3-none-any.whl (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m38.3 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (

In [None]:
# Drive Mount
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import tensorflow as tf
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from transformers import BertTokenizer, TFBertForSequenceClassification
from google.colab import drive
import requests

# Define Falcon-7B API URL and headers
API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct"
headers = {"Authorization": "Bearer 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"}

# Define Falcon-7B model
falcon_model = "tiiuae/falcon-7b-instruct"
falcon_tokenizer = AutoTokenizer.from_pretrained(falcon_model)

# # Define BERT model
# bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# loaded_bert = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=7)

# # Load saved BERT weights
# weights_path = '/content/drive/MyDrive/NLP Project/models/bert_weights_priyanshu_3epoch'
# loaded_bert.load_weights(weights_path)

# Map numerical labels to sentiment
number_to_sentiment = {
    0: 'sadness',
    1: 'anger',
    2: 'love',
    3: 'surprise',
    4: 'fear',
    5: 'joy',
    6: 'neutral'
}

# Function to tokenize text for BERT model
def tokenize_text(text):
    encoded_text = bert_tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=64,
        pad_to_max_length=True,
        return_attention_mask=True,
        return_tensors='tf'
    )
    return {'input_ids': encoded_text['input_ids'], 'attention_mask': encoded_text['attention_mask']}

# Function to get emotion using the BERT model
def get_emotion(text):
    tokenized_text = tokenize_text(text)
    predicted_label = loaded_bert.predict(tokenized_text)
    predicted_label = np.argmax(predicted_label.logits, axis=1).item()
    predicted_emotion = number_to_sentiment[predicted_label]
    return predicted_emotion

Downloading (…)okenizer_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

In [None]:
# # Function to query Falcon-7B model with a modified prompt
# def ask_chatbot(prompt):
#     payload = {"inputs": prompt,
#                "max_time" : "119",
#                "max_new_tokens": "249"}
#     response = requests.post(API_URL, headers=headers, json=payload)
#     return response.json()[0]['generated_text'].replace("\n", " ").replace(prompt, "")

# Function to get Falcon-7B's response based on emotion
def get_ans(prompt: str):
    emotion = get_emotion(prompt)
    print(f"Predicted emotion: {emotion}\n")
    modified_prompt = f"I am {emotion}. Address my feelings and then reply to the following question: {prompt}"
    return ask_chatbot(modified_prompt)

In [None]:
# Function to query Falcon-7B model with a modified prompt
def ask_chatbot(prompt):
    payload = {"inputs": prompt,
               "parameters": {
                  "wait_for_model": True,
                  "max_time" : 119,
                  "max_new_tokens": 249,
                  "return_full_text": False
                }
               }
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()[0]['generated_text'].replace("\n", " ")

## Testing Falcon API

In [None]:
# Example prompt
prompt = "This is the third time I am contacting you. What is the engine capacity of a Lamborghini Gallardo?"
result = ask_chatbot(prompt)

# Print Falcon-7B's response
print(result)

 The engine capacity of a Lamborghini Gallardo is 5.2 liters.


In [None]:
# Example prompt
prompt = "I am upset. Address my feelings and then answer the following question. This is the third time I am contacting you. What is the engine capacity of a Lamborghini Gallardo?"
result = ask_chatbot(prompt)

# Print Falcon-7B's response
print(result)

 I'm sorry to hear that you're upset. As for the engine capacity of a Lamborghini Gallardo, it has a 4.2-liter V10 engine that produces 552 horsepower.


In [None]:
# Example prompt
prompt = "I wasn't expecting to get a refund, especially so quickly."
result = ask_chatbot(prompt)

# Print Falcon-7B's response
print(result)

 I'm very impressed with the service and will definitely be using it again in the future. I was very pleased with the service I received. I was able to get a refund for a product I purchased from a website. I will definitely use this service again in the future.


In [None]:
# Example prompt
prompt = "I feel surprise. Address my feelings and then answer this: I wasn't expecting to get a refund, especially so quickly."
result = ask_chatbot(prompt)

# Print Falcon-7B's response
print(result)

 I understand that you feel surprised and delighted about receiving a refund so quickly. It's important to acknowledge and appreciate the unexpected positive outcome. You could express your gratitude and happiness for the situation, and then consider how you might use the refund to your advantage.


In [None]:
# Example prompt
prompt = "Your customer service is so pathetic. Just tell me what is your return policy."
result = ask_chatbot(prompt)

# Print Falcon-7B's response
print(result)

 I'm sorry to hear that you had a poor experience with our customer service. Our return policy is that you can return any product within 30 days of purchase, provided that it is in its original condition and packaging.


In [None]:
# Example prompt
prompt = "What is the sentiment of the following text: Your customer service is so pathetic. Just tell me what is your return policy."
result = ask_chatbot(prompt)

# Print Falcon-7B's response
print(result)

 The sentiment of the text is negative.


In [None]:
# Example prompt
prompt = "What is the sentiment of the following text (Choose 1 of anger, fear, surprise, joy, neutral, sad, upset): Your customer service is so pathetic. Just tell me what is your return policy."
result = ask_chatbot(prompt)

# Print Falcon-7B's response
print(result)

 sad


In [None]:
# Example prompt
prompt = "Your customer service is so pathetic. Just tell me what is your return policy."
result = ask_chatbot(prompt)

# Print Falcon-7B's response
print(result)

 I'm sorry to hear that you had a poor experience with our customer service. Our return policy is that you can return any product within 30 days of purchase, provided that it is in its original condition and packaging.


In [None]:
# Example prompt
prompt = "The product you sold to me just broke after 1 day usage. What is your return policy?"
result = ask_chatbot(prompt)

# Print Falcon-7B's response
print(result)

 I'm sorry to hear that. Our return policy is 30 days from the date of purchase. If you've used the product less than 30 days, we'll be happy to provide a refund or exchange. If you've used the product more than 30 days, we'll still provide a refund or exchange if you return the product in its original condition.


In [None]:
# Example prompt
prompt = "I am fed up with your company. How do I get a refund?"
result = ask_chatbot(prompt)

# Print Falcon-7B's response
print(result)

 I'm sorry to hear that. To get a refund, you can contact our customer service team and request a refund. They will guide you through the process and provide you with the necessary information.


In [None]:
# Example prompt
prompt = "This is the third time I am contacting you. What is the engine capacity of a Lamborghini Gallardo?"
result = ask_chatbot(prompt)

# Print Falcon-7B's response
print(result)

 The engine capacity of a Lamborghini Gallardo is 5.2 liters.


In [None]:
# Example prompt
prompt = "I am upset. Address my feelings and then answer the following question. This is the third time I am contacting you. What is the engine capacity of a Lamborghini Gallardo?"
result = ask_chatbot(prompt)

# Print Falcon-7B's response
print(result)

 I'm sorry to hear that you're upset. As for the engine capacity of a Lamborghini Gallardo, it has a 4.2-liter V10 engine that produces 552 horsepower.


# Saving Falcon

In [None]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m37.9 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m54.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m55.3 MB/s[0m eta [36m0:00:00[0m
Col

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
# import torch

# falcon_model_name = "tiiuae/falcon-7b-instruct"

falcon_model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-7b-instruct")

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/16.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00002.bin:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00002.bin:   0%|          | 0.00/4.48G [00:00<?, ?B/s]