# Resume Chatbot
This notebook uses Natural Language Processing (NLP) and Neural Network to build a chatbot.

In [None]:
# import library
import os
import json
import string
import random 
import nltk
import numpy as np
from nltk.stem import WordNetLemmatizer 
import tensorflow as tf 
from tensorflow.keras import Sequential 
from tensorflow.keras.layers import Dense, Dropout
from utils import *

### Inputs
Input parameters

In [None]:
# resume data json file name
DATA_FN = 'data/resume_data.json'

# parameters for model training
EPOCH_NUM = 200
LEARNING_RATE = 0.01
LOSS_TYPE = 'categorical_crossentropy'
S_METRICS = 'accuracy'
DECAY = 1e-6

### Extract data
- Extract resume data from json file
- Download words and vocabs from nltk

In [None]:
# extract words and vocabs from nltk
nltk.download("punkt")
nltk.download("wordnet")
nltk.download('omw-1.4')

In [None]:
# Read data from json file
with open(DATA_FN) as json_file:
    data = json.load(json_file)

data = data['resume']
#f.close()

In [None]:
# initializing lemmatizer to get stem of words
lemmatizer = WordNetLemmatizer()
# Each list to create
words = []
classes = []
doc_X = []
doc_y = []

# Loop through all the intents
# tokenize each pattern and append tokens to words, the patterns and
# the associated tag to their associated list
for intent in data['intents']:
    for pattern in intent['patterns']:
        tokens = nltk.word_tokenize(pattern)
        words.extend(tokens)
        doc_X.append(pattern)
        doc_y.append(intent['tag'])
    
    # add the tag to the classes if it's not there already 
    if intent['tag'] not in classes:
        classes.append(intent['tag'])
        
# lemmatize all the words in the vocab and convert lowercase
# if the words don't appear in punctuation
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]

# sorting the vocab and classes in alphabetical order and taking the # set to ensure no duplicates occur
words = sorted(set(words))
classes = sorted(set(classes))

### Set up training data
Set up data to train

In [None]:
# list for training data
training = []
out_empty = [0] * len(classes)

# create bag of words model
for idx, doc in enumerate(doc_X):
    bow = []
    text = lemmatizer.lemmatize(doc.lower())
    for word in words:
        bow.append(1) if word in text else bow.append(0)
        
    # create index of class that the current pattern is linked to
    output_row = list(out_empty)
    output_row[classes.index(doc_y[idx])] = 1
    
    # add one hot encoded BoW and its classes to training data 
    training.append([bow, output_row])
    
# shuffle the data and convert it to an array
random.shuffle(training)
training = np.array(training, dtype=object)

# split the features and target labels
train_x = np.array(list(training[:, 0]))
train_y = np.array(list(training[:, 1]))

### Model training
Train the model using neural network. The model will look at the features and predict the tag associated with the features then will select the best response from the given tag

In [None]:
checkpoint_path = "training/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

# get the length of input and output
input_shape = (len(train_x[0]),)
output_shape = len(train_y[0])


# Run the neural network model
model = Sequential()
model.add(Dense(128, input_shape=input_shape, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(output_shape, activation = 'softmax'))

adam_opt = tf.keras.optimizers.Adam(learning_rate= LEARNING_RATE, decay=DECAY)
model.compile(loss=LOSS_TYPE,
              optimizer=adam_opt,
              metrics=[S_METRICS])

# train the model
model.fit(x=train_x, y=train_y, epochs=EPOCH_NUM, verbose=1, callbacks=[cp_callback])

In [None]:
# evaluate the model
loss, acc = model.evaluate(train_x, train_y, verbose=1)

### Save the model
Save the model so it can be used in other script

In [None]:
model.save('model/resume_model')

### Test chatbot

In [None]:
# load the model
#model = tf.keras.models.load_model('model/resume_model')

# Show the model architecture
model.summary()

In [None]:
# run the chatbot
try:
    while True:
        message = input('')
        intents = pred_class(message, words, classes, model)
        result = get_response(intents, data)
        print(result)
except (EOFError, KeyboardInterrupt):
    print("Chat Ended")