# **Libraries**

In [None]:
import numpy as  np
import tensorflow as tf
import re
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import spacy
import pandas as pd

# **Read Data From Drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
path = '/content/drive/MyDrive/Datasets/Customer_data.csv'
data = pd.read_csv(path)

In [None]:
data.head(5)

Unnamed: 0,flags,instruction,category,intent,response
0,B,question about cancelling order {{Order Number}},ORDER,cancel_order,I've understood you have a question regarding ...
1,BQZ,i have a question about cancelling oorder {{Or...,ORDER,cancel_order,I've been informed that you have a question ab...
2,BLQZ,i need help cancelling puchase {{Order Number}},ORDER,cancel_order,I can sense that you're seeking assistance wit...
3,BL,I need to cancel purchase {{Order Number}},ORDER,cancel_order,I understood that you need assistance with can...
4,BCELN,"I cannot afford this order, cancel purchase {{...",ORDER,cancel_order,I'm sensitive to the fact that you're facing f...


# **Preprocessing Data**

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
data['intent'] = le.fit_transform(data['intent'])

In [None]:
import nltk
import spacy
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()
nltk.download('stopwords')
from nltk.corpus import stopwords
STOPWORDS = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
# Step 1: Load spaCy model for lemmatization
import spacy
nlp = spacy.load('en_core_web_sm')  # Load small English model

# Step 2: Define preprocessing function
def preprocess_text(text):
    # Tokenization and lowercasing using spaCy
    doc = nlp(text.lower())

    # Lemmatization and stopword removal
    tokens = [token.lemma_ for token in doc if token.text not in STOPWORDS and token.is_alpha]

    # Apply stemming (optional, if you want stemming along with lemmatization)
    tokens = [stemmer.stem(token) for token in tokens]

    return ' '.join(tokens)

# Step 3: Apply preprocessing to customer queries
data['processed_instruction'] = data['instruction'].apply(preprocess_text)

# Step 4: Check the resul
data[['instruction', 'processed_instruction']].head()


Unnamed: 0,instruction,processed_instruction
0,question about cancelling order {{Order Number}},question cancel order order number
1,i have a question about cancelling oorder {{Or...,question cancel oorder order number
2,i need help cancelling puchase {{Order Number}},need help cancel puchas order number
3,I need to cancel purchase {{Order Number}},need cancel purchas order number
4,"I cannot afford this order, cancel purchase {{...",afford order cancel purchas order number


# **Save Preparocessing Data**

In [None]:
def save(path,data):
  data.to_csv(path,index=False)

In [None]:
def saveEncoder(path,encoder):
  with open(path, 'wb') as file:
    pickle.dump(encoder, file)

In [None]:
saveEncoder("le.pkl",le)

In [None]:
save("preprocessing_data.csv",data)

# **Split Train and Test**

In [None]:
X = data["processed_instruction"].values
y = data["intent"].values

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# **Vectorizer**

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf_vectorizer = TfidfVectorizer(max_features=500)  # Limiting to 500 features for simplicity
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)


In [None]:
def saveVector(path , vector):
  with open(path, 'wb') as file:
    pickle.dump(tfidf_vectorizer, file)

In [None]:
saveVector("tfidf_vectorizer.pkl",tfidf_vectorizer)

# **Create Model and Train**

In [None]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Initialize and train a Multinomial Naive Bayes classifier
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train_tfidf, y_train)

In [None]:
nb_classifier.score(X_test_tfidf,y_test)

0.986046511627907

In [None]:
y_pred = nb_classifier.predict(X_test_tfidf)

In [None]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.95      1.00      0.98       187
           1       0.97      0.98      0.98       187
           2       0.99      0.99      0.99       216
           3       1.00      1.00      1.00       199
           4       0.98      0.98      0.98       192
           5       1.00      1.00      1.00       206
           6       0.94      0.99      0.97       200
           7       1.00      1.00      1.00       203
           8       1.00      0.99      0.99       208
           9       0.99      0.99      0.99       201
          10       0.99      0.98      0.99       217
          11       0.98      0.99      0.98       178
          12       0.99      1.00      1.00       218
          13       0.99      1.00      1.00       171
          14       0.99      0.99      0.99       186
          15       0.99      0.99      0.99       215
          16       1.00      0.87      0.93       196
          17       0.99    

In [None]:
print(accuracy_score(y_test,y_pred))

0.986046511627907


# **Testing**

In [None]:
def predict(text):
  text = preprocess_text(text)
  text = tfidf_vectorizer.transform([text])
  pred = nb_classifier.predict(text)
  return le.inverse_transform(pred)


In [None]:
text = input("Ask : ")
predict(text)

Ask : now i oreder product but it not delivery so i want cancelling my order


array(['cancel_order'], dtype=object)

# **Save Model Pickle**

In [None]:
import pickle

with open ('model.pkl','wb') as f:
  pickle.dump(nb_classifier,f)

# **Load Vectorizer and LabelEncoder**

In [None]:
def loadEncoder(path):
  with open(path, 'rb') as file:
    return pickle.load(file)

In [None]:
def loadVector(path):
  with open(path, 'rb') as file:
    return pickle.load(file)

In [None]:
labelEncoder = loadEncoder("le.pkl")

In [None]:
labelEncoder.transform(["order"])

array(['cancel_order'], dtype=object)

In [None]:
vector = loadVector("tfidf_vectorizer.pkl")

# **Create Model intent to Responce**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
path = '/content/drive/MyDrive/Datasets/Customer_data.csv'
data = pd.read_csv(path)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Assuming you have a DataFrame 'data' with 'intent' and 'response' columns.

# Encode the 'intent' column (input)
label_encoder = LabelEncoder()
data['intent_encoded'] = label_encoder.fit_transform(data['intent'])

# Tokenize the 'intent' column (user input)
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['intent'])
sequences = tokenizer.texts_to_sequences(data['intent'])
intent_padded = pad_sequences(sequences, padding='post')

# Tokenizer vocabulary size
vocab_size = len(tokenizer.word_index) + 1

# Define the model
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=64, input_length=intent_padded.shape[1]))
model.add(LSTM(128, return_sequences=False))
model.add(Dense(64, activation='relu'))
model.add(Dense(len(label_encoder.classes_), activation='softmax'))

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Print model summary
model.summary()

# Train the model
X = intent_padded
y = data['intent_encoded']
model.fit(X, y, epochs=10, batch_size=64, validation_split=0.2)




Epoch 1/10
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.6801 - loss: 1.3740 - val_accuracy: 0.0722 - val_loss: 15.4632
Epoch 2/10
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 19ms/step - accuracy: 1.0000 - loss: 8.2107e-04 - val_accuracy: 0.0722 - val_loss: 15.9231
Epoch 3/10
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 12ms/step - accuracy: 1.0000 - loss: 2.0048e-04 - val_accuracy: 0.0722 - val_loss: 16.2165
Epoch 4/10
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 1.0000 - loss: 8.2506e-05 - val_accuracy: 0.0722 - val_loss: 16.4763
Epoch 5/10
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 1.0000 - loss: 3.9627e-05 - val_accuracy: 0.0722 - val_loss: 16.7169
Epoch 6/10
[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 22ms/step - accuracy: 1.0000 - loss: 2.3330e-05 - val_accuracy: 0.0722 - val_loss: 16.

<keras.src.callbacks.history.History at 0x78b790c92560>

In [None]:
# To get the response after predicting an intent:
def get_response(user_input):
    sequence = tokenizer.texts_to_sequences([user_input])
    padded = pad_sequences(sequence, padding='post', maxlen=intent_padded.shape[1])
    predicted_intent = model.predict(padded)
    intent_label = label_encoder.inverse_transform([predicted_intent.argmax()])[0]
    response = data[data['intent'] == intent_label]['response'].values[0]
    return response


In [None]:
get_response("cancel")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step


"I've understood you have a question regarding canceling order {{Order Number}}, and I'm here to provide you with the information you need. Please go ahead and ask your question, and I'll do my best to assist you."

In [None]:
model.evaluate(X,y)

[1m840/840[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9812 - loss: 0.3667


[3.9895856380462646, 0.8144164681434631]

In [None]:
#save tokenizer and labelEncoder and intent_padded

import pickle

with open ('intent_tokenizer.pkl','wb') as f:
  pickle.dump(tokenizer,f)

with open ('intent_label_encoder.pkl','wb') as f:
  pickle.dump(label_encoder,f)

with open ('intent_padded.pkl','wb') as f:
  pickle.dump(intent_padded,f)

In [None]:
#save model

model.save("intent_model.h5")



# **Build Application**

In [153]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [154]:
path = "/content/drive/MyDrive/Datasets/Customer_data.csv"
data = pd.read_csv(path)

In [155]:
intent_model_path="/content/drive/MyDrive/Models/Intent_Models/intent_model.h5"
intent_encoder_path="/content/drive/MyDrive/Models/Intent_Models/intent_label_encoder.pkl"
intent_token_path="/content/drive/MyDrive/Models/Intent_Models/intent_tokenizer.pkl"
intent_padded_path="/content/drive/MyDrive/Models/Intent_Models/intent_padded.pkl"

In [156]:
text_model_path="/content/drive/MyDrive/Models/Text_Models/classification_model.pkl"
text_encoder_path="/content/drive/MyDrive/Models/Text_Models/le.pkl"
text_token_path="/content/drive/MyDrive/Models/Text_Models/tfidf_vectorizer.pkl"

In [157]:
import pickle
import numpy as np
import joblib
import spacy
import nltk
import spacy
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()
nltk.download('stopwords')
from nltk.corpus import stopwords
STOPWORDS = set(stopwords.words('english'))


class MyModel:
    nlp= spacy.load('en_core_web_sm')

    def __init__(self,path):
        with open(path, 'rb') as file:
            self.model = pickle.load(file)
        print(self.model)

    def loadVector(self,path):
        with open(path, 'rb') as file:
            self.vector = pickle.load(file)
        print(self.vector.transform(["hello"]))

    def loadEncoder(self,path):
        with open(path, 'rb') as file:
            self.encoder = pickle.load(file)

    def preprocess_text(self,text):
        doc = self.nlp(text.lower())
        tokens = [token.lemma_ for token in doc if token.text not in STOPWORDS and token.is_alpha]
        tokens = [stemmer.stem(token) for token in tokens]
        return ' '.join(tokens)

    def predict(self,text):
        self.loadEncoder(text_encoder_path)
        self.loadVector(text_token_path)
        text = self.preprocess_text(text)
        text = self.vector.transform([text])
        pred = self.model.predict(text)
        pred =self.encoder.inverse_transform(pred)[0]
        return pred


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [158]:
#intent Methods

class IntentModel:
  def __init__(self,path):
    self.model = tf.keras.models.load_model(path)

  def intentLoadEncoder(self,path):
    with open(path, 'rb') as file:
      self.encoder =  pickle.load(file)


  def intentPadded(self,path):
    with open(path, 'rb') as file:
      self.intent_padded = pickle.load(file)

  def intentTokenizer(self,path):
    with open(path, 'rb') as file:
      self.tokenizer = pickle.load(file)

  def get_response(self,user_input):
    self.intentLoadEncoder(intent_encoder_path)
    self.intentPadded(intent_padded_path)
    self.intentTokenizer(intent_token_path)
    sequence = self.tokenizer.texts_to_sequences([user_input])
    padded = pad_sequences(sequence, padding='post', maxlen=self.intent_padded.shape[1])
    predicted_intent = self.model.predict(padded)
    intent_label = self.encoder.inverse_transform([predicted_intent.argmax()])[0]
    response = data[data['intent'] == intent_label]['response'].values[0]
    return response


# **Testing a Application**

In [159]:
model = MyModel(text_model_path)
intentModel = IntentModel(intent_model_path)



MultinomialNB()


In [160]:
def predict():

  text = input("Ask : ")
  res = model.predict(text)
  print(res)
  res = intentModel.get_response(res)
  print(res)

In [170]:
predict()

Ask : check my order

track_order
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
I've understood you have a question regarding canceling order {{Order Number}}, and I'm here to provide you with the information you need. Please go ahead and ask your question, and I'll do my best to assist you.
