In [1]:
import numpy as np
import tensorflow as tf
import pickle
import re
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Test Data

In [14]:
desc1 = "Steps reproduce Launch url https//qavirginvoyagescom/booking User done voyage selection User \
clicked Choose Cabin User entered access key details Access Key First Name Last Name Email Address click \
continue buttonUser summary pageuser changed currency USD GBP url stringObserve currency Summary page navigate \
confirmation page Expected Result Currency Should not Display GBP currency user not able complete Booking Actual \
Result Currency Displaying GBP currency user able complete booking details reflected saleforce refer attached \
recording"

desc2 = "We app rejected Apple Please take look summary issues forwarded Apple They also available Apple \
Store Connect They still running issues logging Please let us know running point We like request keep us \
updated resolution progress Thank youWe discovered one bugs app reviewed iPad running iOS WiFiSpecifically \
app returns network error attempt register new account using Sign AppleWe noticed issue app still contributes \
lower quality user experience Apple users expect Your app uses Sign Apple login option not use Sign Apple \
button design branding and/or user interface elements appropriately described Sign With Apple Human \
Interface GuidelinesPS Image provided Appple Regards Begin forwarded messageFrom App Store \
Connect <no_replyemailapplecom>Subject New Message App Store Review Regarding Virgin VoyagesDate \
April AM EDTTo luisgonzalezvirginvoyagescom Dear Luis We've sent new message app Virgin Voyages \
app Apple ID To view reply message go Resolution Center App Store ConnectBest regards App Store \
Review Picture pngthumbnail"

# Utils

In [15]:
stopwords = ["a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at",
              "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "could", "did", "do",
              "does", "doing", "down", "during", "each", "few", "for", "from", "further", "had", "has", "have", "having",
              "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's",
              "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "it", "it's", "its", "itself", "let's", "me", "more", "most", "my",
              "myself", "nor", "of", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out", "over", "own", "same",
              "she", "she'd", "she'll", "she's", "should", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them",
              "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through",
              "to", "too", "under", "until", "up", "very", "was", "we", "we'd", "we'll", "we're", "we've", "were", "what", "what's", "when",
              "when's", "where", "where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "would", "you", "you'd", "you'll",
              "you're", "you've", "your", "yours", "yourself", "yourselves"]
def clean_description(x):
    #x = x.replace("\n", '').replace("\r", '').replace(".", '').replace("*", ' ').replace("*", ' ')
    x = re.sub("[!@#$\r\n.:0123456789\t-]", '', x)
    x = x.replace("*", ' ').replace(",", ' ').replace("[", '').replace("]", '').replace("|", '')
    arr = x.split()
    new_arr = [word for word in arr if not word in stopwords]
    sentence = ' '.join(word for word in new_arr)
    return sentence

max_len_desc = 1126
max_len_desc_reson = 100
def get_first_n_words_desc(x):
    arr = x.split()
    len_x = len(arr)
    if len_x < max_len_desc_reson:
        return ' '.join(word for word in arr)
    else:
        new_arr = arr[:max_len_desc_reson]
        return ' '.join(word for word in new_arr)

# Load Models

In [16]:
#Defect Classifier
defect_classifier = tf.keras.models.load_model('defect_classifier.h5')
with open('tokenizer.pickle', 'rb') as handle:
    tokenizer_description = pickle.load(handle)

#Reason Generator
reason_generator = tf.keras.models.load_model('reason_generator.h5')
with open('tokenizer_cancelled.pickle', 'rb') as handle:
    tokenizer_cancelled = pickle.load(handle)

In [20]:
def make_inference(description):
    #For defect classifier
    seq_desc = [clean_description(description)]
    seq_desc = tokenizer_description.texts_to_sequences(seq_desc)
    seq_desc = pad_sequences(seq_desc, maxlen=max_len_desc, padding='pre', truncating='pre')
    probab = defect_classifier.predict(seq_desc)[0][0]

    if probab < 0.5:
        print("Not a Valid Defect")
        print("Probability: ", 1-probab)
    
        #To generate Reason
        seq_desc = clean_description(description)
        seq_desc = get_first_n_words_desc(seq_desc)
        #Generate Next 20 words
        seed_text = seq_desc
        next_words = 20
        word_dict = dict((value, key) for (key, value) in tokenizer_cancelled.word_index.items())
        for i in range(next_words):
            seed_sentence = [seed_text]
            seed_sequence = tokenizer_cancelled.texts_to_sequences(seed_sentence)
            if len(seed_sequence[0]) > max_len_desc:
                seed_sequence = [list(seed_sequence[0][(len(seed_sequence[0])-max_len_desc_reson):])]
            padded_seed_sequence = pad_sequences(seed_sequence, truncating='pre', 
                                             padding='pre', maxlen=max_len_desc_reson)
            seed_text = seed_text + " " + word_dict[np.argmax(reason_generator.predict(padded_seed_sequence)[0])]
        reason = ' '.join(word for word in seed_text.split()[-20:])
        print("Reason: ",reason)
    else:
        print("It's a Valid Defect")
        print("Probability: ", probab)

In [21]:
make_inference(desc1)

Not a Valid Defect
Probability:  0.9975750744342804
Reason:  we have a duplicate asap ticket please find the correct notes and blocked to be the issue of metas can


In [22]:
make_inference(desc2)

It's a Valid Defect
Probability:  0.9020759
