In [31]:
# things we need for NLP
import nltk
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

# things we need for Tensorflow
import numpy as np
import tflearn
import tensorflow as tf
import random
# initialize tf
tf.reset_default_graph()

In [32]:
# Rx and biz lookup functions
# this would normally be in separate Class or web-services

# using fonefinder.net for cell location lookups
#       yellowpages.com for biz lookups
#       GoodRx.com for Rx pricing

from bs4 import BeautifulSoup
import json
import httplib2
http = httplib2.Http()

# reverse lookup on cellnumber
def lookup_number(cellnum):
    # scrape city location from phone #
    npa = nxx = thoublock = None
    # handle +1xxxxxxxxxx pattern
    if cellnum[0] == '+' and len(cellnum) == 12:
        npa = cellnum[2:5]
        nxx = cellnum[5:8]
        thoublock = cellnum[8:12]
    # handle 1xxxxxxxxxx pattern
    elif cellnum[0] == '1' and len(cellnum) == 11:
        npa = cellnum[1:4]
        nxx = cellnum[4:7]
        thoublock = cellnum[7:11]
    # handle 10-digit pattern
    elif len(cellnum) == 10:
        npa = cellnum[0:3]
        nxx = cellnum[3:6]
        thoublock = cellnum[6:10]

    if npa:
        status, resp = http.request("http://www.fonefinder.net/findome.php?npa=%s&nxx=%s&thoublock=%s&usaquerytype=Search+by+Number" % (npa, nxx, thoublock))
        # convert bytes to string
        resp = resp.decode('utf8').strip()
        
        city_begin = resp.index("findcity.php")
        city_begin = resp.index("=", city_begin) +1
        city_end = resp.index("&", city_begin)

        state_begin = resp.index("&state", city_begin)
        state_begin = resp.index("=", state_begin) +1
        state_end = resp.index("'>", state_begin)

        return {"city": resp[city_begin:city_end], "state":resp[state_begin:state_end] }

    return

# scrape YP for business address information
def find_biz(term, location, debug=False):
    biz_list = []
    term_url = term.replace(' ', '+')
    location = location.replace(' ', '+')

    url = "http://www.yellowpages.com/search?search_terms=%s&geo_location_terms=%s&s=distance" % (term_url, location)
    if debug: print (url)
        
    status, response = http.request(url)
    r = response.decode("utf-8")
    
    body = r.index('<body')
    
    if term in r[body:]:     
        term = r.index(term, body)
        
        # handle no results
        if "No results" in r:
            return []
        
        address_lbl = 'class="street-address">'
        address_ptr = r.index(address_lbl, term) +len(address_lbl)
        address = r[address_ptr: r.index('<', address_ptr)]
        if debug: print (address)
        
        locality_lbl = 'class="locality">'
        locality_ptr = r.index(locality_lbl, term) +len(locality_lbl)
        locality = r[locality_ptr: r.index('<', locality_ptr)].replace(",&nbsp;", "")
        if debug: print (locality)
        
        region_lbl = 'itemprop="addressRegion">'
        region_ptr = r.index(region_lbl, term) +len(region_lbl)
        region = r[region_ptr: r.index('<', region_ptr)]
        if debug: print (region)
        
        zip_lbl = 'itemprop="postalCode">'
        zip_ptr = r.index(zip_lbl, term) +len(zip_lbl)
        zip = r[zip_ptr: r.index('<', zip_ptr)]
        if debug: print (zip)
        
        biz_list.append({"address":address, "city":locality, "state":region, "zip":zip})

    return biz_list

# look for drug name suggestions
def look_for_suggestions(resp):
    try:
        # look for list of possible matches
        any_loc = resp.index("<p>Any of these sound right?")
        any_loc = resp.index('">', any_loc) +2
        suggest = resp[any_loc:resp.index('<', any_loc)]
        # if multiple words, return first
        if len(suggest.split(" ")) > 1:
            return suggest.split(" ")[0]
        else:
            # if name is correct then return it
            return suggest
    except:
        # return blank if any issues
        print ("error in look_for_suggestions()")
        return ""

# find prices for Rx in the soup content
def find_prices(soup):
    priceRows = soup.find_all("div", {"event-id": "priceRow"})
    price_list = []
    
    for p in priceRows:
        line = {}
        if p.find("div", {"class" : "store-name"}):
            line['store'] = p.find("div", {"class": "store-name"}).string
            if p.find("div", {"class": "drug-qualifier"}):
                line['qualifier'] = p.find("div", {"class": "drug-qualifier"}).string
            else:
                line['qualifier'] = ''

            # the 1st price is cash, 2nd (if applicable) is coupon
            if p.find("span", {"class": "drug-price"}):
                line['price'] = p.findAll("span", {"class": "drug-price"})[-1].find("span", {"class":"font-weight-medium"}).string
            else:
                line['price'] = "0"
            
            if len(p.find("span", {"class": "drug-price"})) == 2:
                line['cash-price'] = p.findAll("span", {"class": "drug-price"})[0].find("span", {"class":"font-weight-medium"}).string
            else:
                line['cash-price'] = "0"

            coupon_section = p.find("button")
        
            if coupon_section.has_attr('data-href'):
                line['href'] = "https://www.goodrx.com" +coupon_section['data-href']
            else:
                line['href'] = ''
            
            # ignore membership types
            if 'Membership' not in line['qualifier'] and line['price'] != '$':
                price_list.append(line)

    return price_list

# scrape GoodRx for name of Rx
def lookup_GoodRx_name(drug, r=None):
    if not r:
        status, response = http.request("https://www.goodrx.com/%s" % drug.replace(' ','+') )
        r = response.decode("utf-8")
    # check for failed lookup
    if "everywhere" in r:
        return look_for_suggestions(r)
    else:
        return drug

# scrape GoodRx for drug prices
def lookup_GoodRx_prices(drug, dosage=None):
    prices = []
    if dosage:
        status, response = http.request("https://www.goodrx.com/%s?dosage=%s" % (drug, dosage))
    else:
        status, response = http.request("https://www.goodrx.com/%s" % drug)
                                            
    soup = BeautifulSoup(response, 'html.parser')
    resp = response.decode("utf-8")
    # check for failed lookup
    if lookup_GoodRx_name(drug, resp) != drug:
        print ("do you mean '%s'?" % lookup_GoodRx_name(drug, resp))
    else:
        price_list = find_prices(soup)
        price_list = sorted(price_list, key=lambda k: float(k['price']))
        price_range = "Price range: $%s - $%s" % (price_list[0]['price'], price_list[len(price_list)-1]['price'])
        for p in price_list:
            prices.append({"store": p['store'], "price": p['price'], "cash-price": p['cash-price'], "qualifier": p['qualifier'], "href": p['href']})
        return price_range, prices

# lookup location for business near city/state on a price list
def lookup_biz_list(price_list, city, state, debug=False):
    if not price_list:
        return {}
    
    PRICE_RANGE = 1.3   # within 30% of lowest price
    result_list = []
    # skip 1st entry (price range)
    for p in price_list:
        store = p['store']
        # clean up store labels
        if "(CVS)" in store:
            store = store.replace('(CVS)', '')
        if "Pharmacy" in store:
            store = store.replace('Pharmacy', '')
        if 'Sams' in store:
            store = store.replace('Sams', "Sam's")
        if 'Rite-Aid' in store:
            store = store.replace('Rite-Aid', 'Rite Aid')

        if debug: print (store, city, state)
        biz = find_biz(term = store, location = city + '+' + state, debug=debug)
        
        if biz:
            #print (p['store'], '$'+p['price'], biz[0]['city'], biz[0]['state'], biz[0]['address'], "{0:.2f}mi".format(biz[0]['distance']) )
            result_list.append({"store": p['store'], "price": p['price'], "cash-price": p['cash-price'], "city": biz[0]['city'], "address": biz[0]['address'], "state": biz[0]['state'], "coupon": p['href']})

            # once we have first biz, break (list provided needs to be sorted by price)
            break

    if result_list:
        cheapest_coupon = sorted([r for r in result_list if float(r['price']) < float(result_list[0]['price'])*PRICE_RANGE], key=lambda k: float(k['price']))
        cheapest_cash = sorted([r for r in result_list if float(r['price']) < float(result_list[0]['price'])*PRICE_RANGE], key=lambda k: float(k['cash-price']))

        return {"best-coupon": cheapest_coupon[0], "best-cash": cheapest_cash[0], "list": result_list}
    
# figure out best price
def best_price(price_list, city, state, debug=False):
    # first sorted by price
    prices = sorted(price_list, key=lambda k: float(k['price']))
    if debug: print ("searching for best coupon price")
    best_coupon = lookup_biz_list(prices, city, state, debug=debug)['best-coupon']
    # first sorted by cash-price
    prices = sorted(prices, key=lambda k: float(k['cash-price']))
    if debug: print ("searching for best cash price")
    best_cash = lookup_biz_list(prices, city, state, debug=debug)['best-cash']
    
    # try comparing prices, if error return coupon price
    try:
        if float(best_cash['cash-price']) <= float(best_coupon['price']):
            best_cash['price'] = best_cash['cash-price']
            best_cash['type'] = 'cash'
            best_cash.pop("cash-price", 'one price')
            best_cash.pop("coupon", 'no coupon')
            return best_cash
        else:
            best_coupon['type'] = 'coupon'
            best_coupon.pop("cash-price", 'no price')
            return best_coupon
    except:
        best_coupon['type'] = 'coupon'
        best_coupon.pop("cash-price", 'no price')
        return best_coupon



In [33]:
# restore all of our data structures
import pickle
data = pickle.load( open( "Rx_training_data", "rb" ) )
words = data['words']
classes = data['classes']
train_x = data['train_x']
train_y = data['train_y']

# import our chat-bot intents file
import json
with open('intents_Rx.json') as json_data:
    intents = json.load(json_data)

In [34]:
# Build neural network structure
net = tflearn.input_data(shape=[None, len(train_x[0])])
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')
net = tflearn.regression(net)

# Define model and setup tensorboard
model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')

In [35]:
# clean up a sentence with stemming and lower-case
def clean_up_sentence(sentence):
    # tokenize the pattern
    sentence_words = nltk.word_tokenize(sentence)
    # stem each word
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words, show_details=False):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # bag of words
    bag = [0]*len(words)  
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)

    return(np.array(bag))

In [36]:
# load our saved model
model.load('./Rxmodel.tflearn')

In [37]:
# create a dictionary to hold user context
context = {}

# ignore classifications with a probability BELOW a set threshold 
ERROR_THRESHOLD = 0.25

# generate an intent classification for a sentence
def classify(sentence):
    # generate probabilities from the model
    results = model.predict([bow(sentence, words)])[0]
    # filter out predictions below a threshold
    results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]
    # sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append((classes[r[0]], r[1]))
    # return tuple of intent and probability
    return return_list

# process contextual response, userID is unique cell # (eg. SMS conversation)
def response(sentence, userID='5612779900', show_details=False):
    # establish a context for each unique userID (cell #)
    if userID not in context:
        context[userID] = {}
    
    # check if the most recent context is a get... if so we take the response
    if 'context' in context[userID] and 'get' in context[userID]['context']:
        # the label is what's after the 'get'
        label = context[userID]['context'].replace('get', '')
        context[userID][label] = sentence
        # transfer control to the corresponding intent to handle the response
        next_intent = "handle%s" % label
        results = [(next_intent, 1.0)]
    
    else:
        results = classify(sentence)
    # loop as long as there are matches to process
    while results:
        for i in intents['intents']:
            # find a tag matching the first result
            if i['tag'] == results[0][0]:
                # set context for this intent if necessary
                if 'context' in i:
                    if show_details: print ('context:', i['context'])
                    
                    # handle an informational context
                    if 'handle' in i['context']:
                        label = i['context'].replace('handle', '')
                        location = lookup_number(userID)
                        if show_details:
                            print ('    location', userID, location)  
                        
                        # get GoodRx store/price list
                        _, price_list = lookup_GoodRx_prices(lookup_GoodRx_name(context[userID][label]))
                        if show_details:
                            print ('    looking up', context[userID][label])

                        best = None
                        if price_list:
                            # determine the best price for our location
                            best = best_price(price_list, location['city'], location['state'])

                        if show_details:
                            print (best)
                        if best:
                            # formulate response
                            response = "best price on %s is $%s (%s) at %s, %s %s" % \
                                       (lookup_GoodRx_name(context[userID][label]), best['price'], best['type'], \
                                        best['store'], best['address'], best['city'])
                            if 'coupon' in best:
                                context[userID]['coupon'] = best['coupon']
                            del context[userID]['context']
                            return response
                        
                    # handle a coupon request
                    elif 'coupon' in i['context']:
                        if 'coupon' in context[userID]:
                            response = "use this coupon: %s" % context[userID]['coupon']
                            del context[userID]['coupon']
                            return response

                    else:
                        context[userID]['context'] = i['context']

                if show_details: print ('tag:', i['tag'])
                # a random response from the intent
                if i['responses']:
                    return print(random.choice(i['responses']))

        results.pop(0)

In [38]:
lookup_number('5612779900')

{'city': 'JUPITER', 'state': 'FL'}

In [39]:
find_biz('Walmart', 'Jupiter FL')

[{'address': '2144 W Indiantown Rd',
  'city': 'Jupiter',
  'state': 'FL',
  'zip': '33458'}]

In [40]:
lookup_GoodRx_name('lipator')

'Lipitor'

In [41]:
lookup_GoodRx_prices('adderall')

('Price range: $40.56 - $61.65',
 [{'cash-price': '40.56',
   'href': 'https://www.goodrx.com/coupon/dispatch/AWMziVOCiFlqvtP7z3odkXZpVeSXa9C2wtY8CmilXWPxrpiVwf3djSPezii8utNNDfwN7ybmovzaZNofZw_PtwPL2FmknVMrFXimLMk6-xjTtQts55hHLg%3D%3D',
   'price': '40.56',
   'qualifier': 'with free Coupon',
   'store': 'Costco'},
  {'cash-price': '110',
   'href': 'https://www.goodrx.com/coupon/dispatch/qaErjeUjT4CpPyTHK_XuwAenlnHUw0N2lmNuQQn_lXIKplfLwtV1E-XUmXsC00co-qXancbo8BkGdkwaI3UOJGvkq5UxQi6WfqH-8pyQ3B8k3epVl4KeCA%3D%3D',
   'price': '40.56',
   'qualifier': 'with free Coupon',
   'store': 'Safeway'},
  {'cash-price': '103',
   'href': 'https://www.goodrx.com/coupon/dispatch/9Tw7GeJkdYKYHN5fl4qG1gycslG9l85MwkMdFJ61kpvJEn8YPH_s18I0j7N6ks_KhcwJ-8O6L2MBFAWtVYRkPwcuDJW4lbTbdMyQkk9X5dfisrB6',
   'price': '40.91',
   'qualifier': 'with free Coupon',
   'store': 'Kroger Pharmacy'},
  {'cash-price': '41.00',
   'href': 'https://www.goodrx.com/coupon/dispatch/kZquF9cmQgjTz53Jb7cEbWCrATlMdII1fajhmP6teinn

In [42]:
name = "adderall"
_, price_list = lookup_GoodRx_prices(lookup_GoodRx_name(name))
if price_list:
    best = best_price(price_list, "Jupiter", "FL")

best

{'address': '3250 Northlake Blvd',
 'city': 'Palm Beach Gardens',
 'price': '40.56',
 'state': 'FL',
 'store': 'Costco',
 'type': 'cash'}

In [43]:
classify('hi there, anyone home?')

[('greeting', 0.9999843835830688)]

In [44]:
response('hi there, anyone home?')

Hi there


In [45]:
response('looking for inexpensive meds')

What's the name of the medication?


In [46]:
response('lipitor', show_details=True)

context: handleRx
    location 5612779900 {'state': 'FL', 'city': 'JUPITER'}
    looking up lipitor
{'store': 'Safeway', 'city': 'Jupiter', 'price': '10.55', 'coupon': 'https://www.goodrx.com/coupon/dispatch/yGeOpPGn-I_h93JIaT2CzDwiQiKzZog2fh4rR7svd2brqqRdqFEBWyoHXq6q4lFHnThY_dRpgFdawV3LVbwc1uxKyZYDBPrWU4vveERmFXyUyA%3D%3D', 'address': '709 Commerce Way W', 'type': 'coupon', 'state': 'FL'}


'best price on lipitor is $10.55 (coupon) at Safeway, 709 Commerce Way W Jupiter'

In [47]:
context

{'5612779900': {'Rx': 'lipitor',
  'coupon': 'https://www.goodrx.com/coupon/dispatch/yGeOpPGn-I_h93JIaT2CzDwiQiKzZog2fh4rR7svd2brqqRdqFEBWyoHXq6q4lFHnThY_dRpgFdawV3LVbwc1uxKyZYDBPrWU4vveERmFXyUyA%3D%3D'}}

In [48]:
response('can I have the coupon?')

'use this coupon: https://www.goodrx.com/coupon/dispatch/yGeOpPGn-I_h93JIaT2CzDwiQiKzZog2fh4rR7svd2brqqRdqFEBWyoHXq6q4lFHnThY_dRpgFdawV3LVbwc1uxKyZYDBPrWU4vveERmFXyUyA%3D%3D'

In [49]:
context

{'5612779900': {'Rx': 'lipitor'}}

In [50]:
response('looking for inexpensive meds')

What's the name of the medication?


In [51]:
response('Aderal')

'best price on Adderall is $40.56 (cash) at Costco, 3250 Northlake Blvd Palm Beach Gardens'

In [52]:
context

{'5612779900': {'Rx': 'Aderal'}}

In [53]:
response('may I have the coupon?')

Sorry, no coupon


In [54]:
response('thanks for your help')

My pleasure
