https://www.analyticsvidhya.com/blog/2018/01/faq-chatbots-the-future-of-information-searching/
http://blog.christianperone.com/2013/09/machine-learning-cosine-similarity-for-vector-space-models-part-iii/
http://nlp.town/blog/sentence-similarity/
https://spacy.io/usage/training

In [1]:
import string
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction import stop_words
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import KDTree
from nltk.stem import WordNetLemmatizer

wordnet_lemmatizer = WordNetLemmatizer()

# Uncomment to see full cell text
pd.set_option('display.max_colwidth', 0)

In [2]:
faq = pd.read_csv('../data/interim/faq-text-separated.csv', keep_default_na=False)
test = pd.read_csv('../data/interim/test-questions.csv')

In [4]:
test.head()

Unnamed: 0,test_question,match_question
0,"I live on the corner of Bear Mountain Drive and Scrub Oak Circle, and would like the City enforce the speed limit on Bear Mountain Drive. Despite a limit of 25 mph, I routinely observe vehicles traveling well above that speed in both directions (especially in the mornings and early evening). There are a ton of kids in this neighborhood, as well as a blind curve in the road between several crosswalks. It baffles me to see so much enforcement on Lehigh (with the regular presence of a photo van) and only the very occasional patrol car on Bear Mountain Drive. It would be great if the photo van or other officers could regularly make an appearance on Bear Mountain.",Speeding on Residential Streets
1,Can you please mow the grass in the park. It is becoming difficult to find the dog poop and dog owners are just leaving it in the grass.,Park Maintenance Issues
2,Are there grizzlies in Boulder?,Do we have grizzly bears in Colorado?
3,Where do I report being hit by a bicycle?,"Have you had a close call with a bicycle, pedestrian or motorist? For example: Were you in a crosswalk (on foot, bike, skateboard) and a car almost hit you? Were you riding your bike on the right side of the road and a car almost hit you? Did you bike through a red light and a car almost hit you? Were you walking on the sidewalk and a bike almost hit you?"
4,How much time do I have to wait for my income certification for affordable housing?,How long does it take to become income-certified?


# Text Processing

In [3]:
def lem(words):
    lem_sentence=[]
    for word in words:
        lem_sentence.append(wordnet_lemmatizer.lemmatize(word))
    return lem_sentence

def text_process(mess):
    """
    Takes in a string of text, then performs the following:
    1. Remove all punctuation
    2. Remove all stopwords
    3. Lemmatizes all words
    4. Returns a list of the cleaned text
    """
    # Check characters to see if they are in punctuation
    clean = [char if char not in string.punctuation else ' ' for char in mess]

    # Join the characters again to form the string.
    clean = ''.join(clean)

    # Now just remove any stopwords
    clean = [word.lower() for word in clean.split() if word.lower() not in stop_words.ENGLISH_STOP_WORDS]
    
    # Lemmatize
    clean = lem(clean)
    
    return clean

In [4]:
# Create corpus by joining questions and answers
corpus = faq.question + ' ' + faq.answer

# Create BOW tranformer based on faq.question
bow_transformer = CountVectorizer(analyzer=text_process).fit(corpus)
# Tranform faq.question itself into BOW
q_bow = bow_transformer.transform(corpus)

# Create TFIDF transformer based on faq.question's BOW
tfidf_transformer = TfidfTransformer().fit(q_bow)
# Transform faq.question's BOW into TFIDF
q_tfidf = tfidf_transformer.transform(q_bow)

# QnA Maker
(The data used by QnA Maker at this time is a slightly less clean version.)<br>
6 successes

# Semantic Similarity with spaCy
1 success

In [11]:
# This is not the full code, so isn't operational in this notebook.
import spacy
nlp = spacy.load('en_core_web_sm')

def max_sim_spacy(q, docs):
    """
    Take user query in form of spaCy document, find question that has the highest similarity,
    and return the associated answer from FAQ if the similarity is above threshold value.
    docs must be a set of spaCy documents
    """
    q = nlp(q)
    max_i = 0
    max_s = 0
    ms = []
    for i, d in enumerate(docs):
        if d.similarity(q) > max_s:
            max_s = d.similarity(q)
            max_i = i
    
    return max_i, max_s    
 
q_docs = [nlp(entry) for entry in faq.question]
a_docs = [nlp(entry) for entry in faq.answer]      

# Cosine Similarity
7 successes<br>

In [7]:
def max_sim_skl(tq):
    # Transform test question into BOW using BOW transformer (based on faq.question) 
    tq_bow = bow_transformer.transform([tq])
    # Transform test question's BOW into TFIDF
    tq_tfidf = tfidf_transformer.transform(tq_bow)
    
    sims = np.transpose(cosine_similarity(tq_tfidf, q_tfidf))

    max_s = sims.max()
    max_i = np.argmax(sims)
    
    return max_i, max_s 

In [7]:
# from sklearn.feature_extraction.text import TfidfVectorizer
# tfidf_vectorizer = TfidfVectorizer()
# tfidf_matrix = tfidf_vectorizer.fit_transform(faq.question)
# print(tfidf_matrix.shape)

In [8]:
def respond(row):
    query = row.test_question.strip()

    index, sim = max_sim_skl(query)

    row['sim_question'] = faq.question.iloc[index]
#     row['info'] = faq.answer.iloc[index]
    row['max_similarity'] = round(sim, 2)
    row['success'] = row.sim_question == row.match_question
    return row   

In [38]:
test.apply(respond, axis=1) 

Unnamed: 0,test_question,match_question,sim_question,max_similarity,success
0,"I live on the corner of Bear Mountain Drive and Scrub Oak Circle, and would like the City enforce the speed limit on Bear Mountain Drive. Despite a limit of 25 mph, I routinely observe vehicles traveling well above that speed in both directions (especially in the mornings and early evening). There are a ton of kids in this neighborhood, as well as a blind curve in the road between several crosswalks. It baffles me to see so much enforcement on Lehigh (with the regular presence of a photo van) and only the very occasional patrol car on Bear Mountain Drive. It would be great if the photo van or other officers could regularly make an appearance on Bear Mountain.",Speeding on Residential Streets,What does CPW do about bears in town? When are they relocated or killed?,0.27,False
1,Can you please mow the grass in the park. It is becoming difficult to find the dog poop and dog owners are just leaving it in the grass.,Park Maintenance Issues,Dog Parks and Dog Swimming,0.4,False
2,Are there grizzlies in Boulder?,Do we have grizzly bears in Colorado?,Do we have grizzly bears in Colorado?,0.39,True
3,Where do I report being hit by a bicycle?,"Have you had a close call with a bicycle, pedestrian or motorist? For example: Were you in a crosswalk (on foot, bike, skateboard) and a car almost hit you? Were you riding your bike on the right side of the road and a car almost hit you? Did you bike through a red light and a car almost hit you? Were you walking on the sidewalk and a bike almost hit you?","Have you had a close call with a bicycle, pedestrian or motorist? For example: Were you in a crosswalk (on foot, bike, skateboard) and a car almost hit you? Were you riding your bike on the right side of the road and a car almost hit you? Did you bike through a red light and a car almost hit you? Were you walking on the sidewalk and a bike almost hit you?",0.5,True
4,How much time do I have to wait for my income certification for affordable housing?,How long does it take to become income-certified?,Housing Fund for Affordable Housing Providers,0.33,False
5,my water pipes froze and now they are leaking. how do i turn off the water??,How can I prevent and thaw frozen water pipes?,How can I prevent and thaw frozen water pipes?,0.52,True
6,"Hello, There are a group of Gambel Oak Trees with Tree ID #'s 38820-38825 that I would like to be pruned up in order to keep them away from the Rec Center wall and to keep them in good, trimmed health. I also would like Tree ID 38825 to be pruned away from the American Flag so it will not come in contact with the tree, from the nearby flag pole. Thank you very much and if you have any questions or concerns, feel free to give me a call.",Public Tree Issues,Public Tree Issues,0.47,True
7,There are constantly dogs off leash in the children playgrounds of columbine school. Even though the playgrounds are fenced and have a sign stating dogs should not go in. You can see some dog owners do not even pick their dog shit.,Dogs on Open Space and Mountain Parks,Dog Parks and Dog Swimming,0.56,False
8,"I parked in the garage on 11th and Walnut on Sunday 12/23 starting at 5 PM and left at 12:35 AM that Monday 12/24. I was charged $1.25 but it should have still been free since charged parking doesnâ€™t start till 7 AM on Mondays , I park here all the time and I am confused as to why it says I owed $1.25. Is there a glitch in the system ?",Contact Parking Services,"Parking Information - Hours, Rates and Holidays",0.23,False
9,Where do I apply for building permits?,How do I get a building permit?,Do I need a building permit?,0.65,False


In [41]:
print('Successes: ', sum(test.apply(respond, axis=1).success))

Successes:  7


# KD Trees Nearest Neighbor
6 successes with euclidean distance<br>
3 successes with cosine_similarity matrix (if I've implemented it right)<br>
Other results below:

In [31]:
dist_metric_success = {
    'euclidean': 7,
    'l2': 7,
    'minkowski': 7,
    'p': 7,
    'manhattan': 0,
    'cityblock': 0,
    'l1': 0,
    'chebyshev': 5,
    'infinity': 5}

In [74]:
tree = KDTree(q_tfidf.toarray(), metric='minkowski')
# tree = KDTree(cosine_similarity(q_tfidf, q_tfidf))

In [25]:
def kd_sim(query):
    # Transform test question into BOW using BOW transformer (based on faq.question) 
    tq_bow = bow_transformer.transform([query])
    # Transform test question's BOW into TFIDF
    tq_tfidf = tfidf_transformer.transform(tq_bow)

    nearest_dist, nearest_ind = tree.query(tq_tfidf.toarray(), k=2)  # k=2 nearest neighbors where k1 = identity
#     nearest_dist, nearest_ind = tree.query(cosine_similarity(tq_tfidf, q_tfidf), k=2)  # k=2 nearest neighbors where k1 = identity
        
    return nearest_ind[0][0], nearest_dist[0][0]

In [12]:
def kd_respond(row):
    query = row.test_question.strip()

    index, dis = kd_sim(query)

    row['near_question'] = faq.question.iloc[index]
#     row['info'] = faq.answer.iloc[index]
    row['nearest_distance'] = round(dis, 2)
    row['success'] = row.near_question == row.match_question
    return row   

In [75]:
test.apply(kd_respond, axis=1)

Unnamed: 0,test_question,match_question,near_question,nearest_distance,success
0,"I live on the corner of Bear Mountain Drive and Scrub Oak Circle, and would like the City enforce the speed limit on Bear Mountain Drive. Despite a limit of 25 mph, I routinely observe vehicles traveling well above that speed in both directions (especially in the mornings and early evening). There are a ton of kids in this neighborhood, as well as a blind curve in the road between several crosswalks. It baffles me to see so much enforcement on Lehigh (with the regular presence of a photo van) and only the very occasional patrol car on Bear Mountain Drive. It would be great if the photo van or other officers could regularly make an appearance on Bear Mountain.",Speeding on Residential Streets,What does CPW do about bears in town? When are they relocated or killed?,1.21,False
1,Can you please mow the grass in the park. It is becoming difficult to find the dog poop and dog owners are just leaving it in the grass.,Park Maintenance Issues,Dog Parks and Dog Swimming,1.1,False
2,Are there grizzlies in Boulder?,Do we have grizzly bears in Colorado?,Do we have grizzly bears in Colorado?,1.1,True
3,Where do I report being hit by a bicycle?,"Have you had a close call with a bicycle, pedestrian or motorist? For example: Were you in a crosswalk (on foot, bike, skateboard) and a car almost hit you? Were you riding your bike on the right side of the road and a car almost hit you? Did you bike through a red light and a car almost hit you? Were you walking on the sidewalk and a bike almost hit you?","Have you had a close call with a bicycle, pedestrian or motorist? For example: Were you in a crosswalk (on foot, bike, skateboard) and a car almost hit you? Were you riding your bike on the right side of the road and a car almost hit you? Did you bike through a red light and a car almost hit you? Were you walking on the sidewalk and a bike almost hit you?",1.0,True
4,How much time do I have to wait for my income certification for affordable housing?,How long does it take to become income-certified?,Housing Fund for Affordable Housing Providers,1.16,False
5,my water pipes froze and now they are leaking. how do i turn off the water??,How can I prevent and thaw frozen water pipes?,How can I prevent and thaw frozen water pipes?,0.98,True
6,"Hello, There are a group of Gambel Oak Trees with Tree ID #'s 38820-38825 that I would like to be pruned up in order to keep them away from the Rec Center wall and to keep them in good, trimmed health. I also would like Tree ID 38825 to be pruned away from the American Flag so it will not come in contact with the tree, from the nearby flag pole. Thank you very much and if you have any questions or concerns, feel free to give me a call.",Public Tree Issues,Public Tree Issues,1.03,True
7,There are constantly dogs off leash in the children playgrounds of columbine school. Even though the playgrounds are fenced and have a sign stating dogs should not go in. You can see some dog owners do not even pick their dog shit.,Dogs on Open Space and Mountain Parks,Dog Parks and Dog Swimming,0.94,False
8,"I parked in the garage on 11th and Walnut on Sunday 12/23 starting at 5 PM and left at 12:35 AM that Monday 12/24. I was charged $1.25 but it should have still been free since charged parking doesnâ€™t start till 7 AM on Mondays , I park here all the time and I am confused as to why it says I owed $1.25. Is there a glitch in the system ?",Contact Parking Services,"Parking Information - Hours, Rates and Holidays",1.24,False
9,Where do I apply for building permits?,How do I get a building permit?,Do I need a building permit?,0.83,False


In [5]:
print('Successes: ', sum(test.apply(kd_respond, axis=1).success))

# Soft Cosine Similarity... 
https://www.machinelearningplus.com/nlp/cosine-similarity/

# Doc2Vec with GenSim

In [6]:
import gensim
from gensim.models import Doc2Vec
from gensim.models.doc2vec import TaggedDocument
from nltk import word_tokenize
from nltk.stem.porter import *

In [7]:
def default_clean(mess):
    '''
    Removes default bad characters
    '''
    # Check characters to see if they are in punctuation
    clean = [char if char not in string.punctuation else ' ' for char in mess]
    # Join the characters again to form the string.
    clean = ''.join(clean)
    # Now just remove any stopwords
    clean = [word.lower() for word in clean.split() if word.lower() not in stop_words.ENGLISH_STOP_WORDS]
    return ' '.join(clean)
 
def stop_and_stem(text, stem=True, stemmer = PorterStemmer()):
    '''
    Removes stopwords and does stemming
    '''
    stoplist = stop_words.ENGLISH_STOP_WORDS
    if stem:
        text_stemmed = [stemmer.stem(word) for word in word_tokenize(text) if word not in stoplist and len(word) > 3]
    else:
        text_stemmed = [word for word in word_tokenize(text) if word not in stoplist and len(word) > 3]
    text = ' '.join(text_stemmed)
    return text

In [8]:
# Add Dept, Category, Topic as LABELS???
class TaggedDocumentIterator(object):
    def __init__(self, doc_list, labels_list):
        self.labels_list = labels_list
        self.doc_list = doc_list
    def __iter__(self):
        for idx, doc in enumerate(self.doc_list):
            yield TaggedDocument(words=doc.split(), tags=[self.labels_list[idx]])

In [9]:
def test_d2v(test_sample):
    """Passes parameter test_sample into Doc2Vec model and returns the most similar entry in training set."""
    #Clean the document using the utility functions used in train phase
    test_sample = default_clean(test_sample)
    test_sample = stop_and_stem(test_sample, stem=False)

    #Convert the sample document into a list and use the infer_vector method to get a vector representation for it
    new_doc_words = test_sample.split()
    new_doc_vec = model.infer_vector(new_doc_words)

    #use the most_similar utility to find the most similar documents.
    return model.docvecs.most_similar(positive=[new_doc_vec])

In [10]:
def respond_d2v(row):
    query = row.test_question.strip()

    sims = test_d2v(query)
    
    best = sims[0][0]

    row['sim_question'] = best
    row['similarity'] = round(sims[0][1], 2)
    row['success'] = (best == row.match_question)
    return row   

## Train on Answer, test with Question

In [273]:
sample = faq[['question', 'answer']]
sample = sample.sample(frac=1).reset_index(drop=True)
print ('The shape of the input data frame: {}'.format(sample.shape))

The shape of the input data frame: (688, 2)


In [274]:
sample['answer'] = sample['answer'].apply(default_clean)
sample['answer'] = sample['answer'].apply(stop_and_stem, stem=False)

In [275]:
docLabels = list(sample['question'])
data = list(sample['answer'])
sentences = TaggedDocumentIterator(data, docLabels)

In [304]:
model = Doc2Vec(vector_size=100, min_count=1, epochs=100)
model.build_vocab(sentences)
model.train(sentences,total_examples=model.corpus_count, epochs=model.epochs)

In [305]:
# model = gensim.models.Doc2Vec(vector_size=300, min_count=0, alpha=0.025, min_alpha=0.025)
# model.build_vocab(sentences)
# #training of model
# for epoch in range(100):
#     print('iteration ' +str(epoch+1))
#     model.train(sentences, total_examples=model.corpus_count, epochs=model.epochs)
#     model.alpha -= 0.002
#     model.min_alpha = model.alpha

In [316]:
t = test.apply(respond_d2v, axis=1)
print('Successes: ', sum(t.success))
t

Successes:  5


Unnamed: 0,test_question,match_question,sim_question,similarity,success
0,"I live on the corner of Bear Mountain Drive and Scrub Oak Circle, and would like the City enforce the speed limit on Bear Mountain Drive. Despite a limit of 25 mph, I routinely observe vehicles traveling well above that speed in both directions (especially in the mornings and early evening). There are a ton of kids in this neighborhood, as well as a blind curve in the road between several crosswalks. It baffles me to see so much enforcement on Lehigh (with the regular presence of a photo van) and only the very occasional patrol car on Bear Mountain Drive. It would be great if the photo van or other officers could regularly make an appearance on Bear Mountain.",Speeding on Residential Streets,Why can't I park a few minutes at a time throughout the day in a residential zone?,-0.0,False
1,Can you please mow the grass in the park. It is becoming difficult to find the dog poop and dog owners are just leaving it in the grass.,Park Maintenance Issues,Can I bring my dog to the Boulder Reservoir?,0.24,False
2,Are there grizzlies in Boulder?,Do we have grizzly bears in Colorado?,Prairie Dogs,0.76,False
3,Where do I report being hit by a bicycle?,"Have you had a close call with a bicycle, pedestrian or motorist? For example: Were you in a crosswalk (on foot, bike, skateboard) and a car almost hit you? Were you riding your bike on the right side of the road and a car almost hit you? Did you bike through a red light and a car almost hit you? Were you walking on the sidewalk and a bike almost hit you?",How do I contact Code Enforcement to ask questions or report a code violation?,0.37,False
4,How much time do I have to wait for my income certification for affordable housing?,How long does it take to become income-certified?,How long does it take to become income-certified?,0.7,True
5,my water pipes froze and now they are leaking. how do i turn off the water??,How can I prevent and thaw frozen water pipes?,Zoning Code Violations,0.45,False
6,"Hello, There are a group of Gambel Oak Trees with Tree ID #'s 38820-38825 that I would like to be pruned up in order to keep them away from the Rec Center wall and to keep them in good, trimmed health. I also would like Tree ID 38825 to be pruned away from the American Flag so it will not come in contact with the tree, from the nearby flag pole. Thank you very much and if you have any questions or concerns, feel free to give me a call.",Public Tree Issues,Public Tree Issues,0.08,True
7,There are constantly dogs off leash in the children playgrounds of columbine school. Even though the playgrounds are fenced and have a sign stating dogs should not go in. You can see some dog owners do not even pick their dog shit.,Dogs on Open Space and Mountain Parks,Leash Law and Tethering,0.06,False
8,"I parked in the garage on 11th and Walnut on Sunday 12/23 starting at 5 PM and left at 12:35 AM that Monday 12/24. I was charged $1.25 but it should have still been free since charged parking doesnâ€™t start till 7 AM on Mondays , I park here all the time and I am confused as to why it says I owed $1.25. Is there a glitch in the system ?",Contact Parking Services,How do I Replace a Lost or Stolen Pass?,0.36,False
9,Where do I apply for building permits?,How do I get a building permit?,"If I already have a sales and use tax license for a different business name, can I just change it?",0.68,False


In [None]:
0-5 model = Doc2Vec(vector_size=100, min_count=1, epochs=100)


In [24]:
# Store the model to mmap-able files
model.save('../models/model_answer.doc2vec')
# Load the model
model = Doc2Vec.load('../models/model_answer.doc2vec')

## Train on Question+Answer, test with Question

In [27]:
sample = pd.DataFrame(corpus, columns=['qna']) 
sample['question'] = faq['question']
sample['answer'] = faq['answer']
sample = sample.sample(frac=1).reset_index(drop=True)
print ('The shape of the input data frame: {}'.format(sample.shape))

The shape of the input data frame: (688, 3)


In [28]:
sample

Unnamed: 0,qna,question,answer
0,"Injured and Orphaned Small Wildlife To report injured or orphaned wildlife, call Animal Protection at 303-441-3333. Boulder Police Animal Protection responds to reports of injured and orphaned small wildlife, including baby birds, raccoons, skunks, hawks, squirrels, foxes and hawks. For additional information about small wildlife, please contact the Greenwood Rehabilitation Center at 303-545-5849. Injured and orphaned wildlife resources Colorado Parks and Wildlife - 303-297-1192 Greenwood Rehabilitation Center - 303-545-5849 Birds of Prey Foundation - 303-460-0674",Injured and Orphaned Small Wildlife,"To report injured or orphaned wildlife, call Animal Protection at 303-441-3333. Boulder Police Animal Protection responds to reports of injured and orphaned small wildlife, including baby birds, raccoons, skunks, hawks, squirrels, foxes and hawks. For additional information about small wildlife, please contact the Greenwood Rehabilitation Center at 303-545-5849. Injured and orphaned wildlife resources Colorado Parks and Wildlife - 303-297-1192 Greenwood Rehabilitation Center - 303-545-5849 Birds of Prey Foundation - 303-460-0674"
1,"How do I change or reset my court date? If your court date is currently set for your initial appearance, or arraignment, you may call the court at 303-441-1841 to speak with a clerk to assist you in changing the date. Cases are generally continued for one to two weeks, however, if circumstances warrant, a continuance of up to 30 days may be granted. If your case is set for any other type of court appearance, you must appear on your given court date unless you have previously submitted a written request to continue, which has been granted by the court.",How do I change or reset my court date?,"If your court date is currently set for your initial appearance, or arraignment, you may call the court at 303-441-1841 to speak with a clerk to assist you in changing the date. Cases are generally continued for one to two weeks, however, if circumstances warrant, a continuance of up to 30 days may be granted. If your case is set for any other type of court appearance, you must appear on your given court date unless you have previously submitted a written request to continue, which has been granted by the court."
2,"Graffiti The City of Boulder has a strict graffiti ordinance. Commercial or rental property owners are responsible for taking measures to prevent and remove graffiti within three days of noticing it. If the graffiti is not removed, the property owner will receive a warning notice that a summons will be issued for continued non-compliance. If the graffiti remains after a warning is given, the property owner will receive a summons. A first-time summons is $250, which can be mailed in. The maximum violation penalty is $1,000 and 90 days in jail. For graffiti removal tips, visit the city's Graffiti Removal Program. If you witness a vandal in the act of graffiti tagging, please call 911. To report a graffiti crime on your property, call the Boulder Police at 303-441-3333. A reward of up to $1,000 is available for information leading to the arrest of a vandal. To request graffiti removal, to report graffiti on city or private property, or to request more information, please fill out the form below. In the description, please indicate if you know whether it's public or private property. LOCATION: Please use the nearest intersection or address so the mapping application can track your request (for example: Arapahoe & Broadway). Once you type in the location and click outside of the field, the map should appear with your location. You can move the red dot around to get closer to the actual location if needed. DESCRIPTION: Use this field to indicate more specifics about the location and request.",Graffiti,"The City of Boulder has a strict graffiti ordinance. Commercial or rental property owners are responsible for taking measures to prevent and remove graffiti within three days of noticing it. If the graffiti is not removed, the property owner will receive a warning notice that a summons will be issued for continued non-compliance. If the graffiti remains after a warning is given, the property owner will receive a summons. A first-time summons is $250, which can be mailed in. The maximum violation penalty is $1,000 and 90 days in jail. For graffiti removal tips, visit the city's Graffiti Removal Program. If you witness a vandal in the act of graffiti tagging, please call 911. To report a graffiti crime on your property, call the Boulder Police at 303-441-3333. A reward of up to $1,000 is available for information leading to the arrest of a vandal. To request graffiti removal, to report graffiti on city or private property, or to request more information, please fill out the form below. In the description, please indicate if you know whether it's public or private property. LOCATION: Please use the nearest intersection or address so the mapping application can track your request (for example: Arapahoe & Broadway). Once you type in the location and click outside of the field, the map should appear with your location. You can move the red dot around to get closer to the actual location if needed. DESCRIPTION: Use this field to indicate more specifics about the location and request."
3,"Pay Your Utility Bill and MyBUB (MyBoulderUtilityBill Website) Pay Your Boulder Utility Bill Online MyBUB is the City of Boulder's utility account information and bill payment website. Short for ""My Boulder Utility Bill,"" MyBUB allows customers to: pay utility bills online using American Express, Discover, MasterCard or Visa; sign up to receive electronic bills instead of paper copies in the mail; view water usage history; view water budget information; and access convenient links to water conservation information and city news. To access your account information online, you will need your account and customer numbers. These numbers are listed at the top of your water utility bill. You do not have to register with MyBUB to pay your water utility bills online. Visit the MyBUB website at myboulderutilitybill.bouldercolorado.gov.",Pay Your Utility Bill and MyBUB (MyBoulderUtilityBill Website),"Pay Your Boulder Utility Bill Online MyBUB is the City of Boulder's utility account information and bill payment website. Short for ""My Boulder Utility Bill,"" MyBUB allows customers to: pay utility bills online using American Express, Discover, MasterCard or Visa; sign up to receive electronic bills instead of paper copies in the mail; view water usage history; view water budget information; and access convenient links to water conservation information and city news. To access your account information online, you will need your account and customer numbers. These numbers are listed at the top of your water utility bill. You do not have to register with MyBUB to pay your water utility bills online. Visit the MyBUB website at myboulderutilitybill.bouldercolorado.gov."
4,"How can I access this network? Once the network is live, accessing it should be easy. Go to your device's wireless configuration settings. Select the wireless network named ""ConnectBoulder."" Review the user agreement and click ""Accept."" Browse to your heart's content.",How can I access this network?,"Once the network is live, accessing it should be easy. Go to your device's wireless configuration settings. Select the wireless network named ""ConnectBoulder."" Review the user agreement and click ""Accept."" Browse to your heart's content."
5,"Building Safety Code Violations Planning and Development Services (P&DS) investigates code violations and concerns related to building safety, such as interior and exterior structural integrity; handrails and guardrails; doors and windows; interior sanitation (accumulation of rubbish and garbage); plumbing systems and hot water requirements; heating requirements; and outlet and lighting requirements. To report a building safety code violation, make a service request below or call 303-441-1880. P&DS also investigates code violations and concerns related to right of ways, rental housing licensing, and zoning. The Code Enforcement Unit, part of the Boulder Police Department, responds to all other property maintenance and nuisance code violations that affect public safety and quality of life in the City of Boulder. Please call Code Enforcement at 303-441-3333 to ask questions or report a code violation to dispatch.",Building Safety Code Violations,"Planning and Development Services (P&DS) investigates code violations and concerns related to building safety, such as interior and exterior structural integrity; handrails and guardrails; doors and windows; interior sanitation (accumulation of rubbish and garbage); plumbing systems and hot water requirements; heating requirements; and outlet and lighting requirements. To report a building safety code violation, make a service request below or call 303-441-1880. P&DS also investigates code violations and concerns related to right of ways, rental housing licensing, and zoning. The Code Enforcement Unit, part of the Boulder Police Department, responds to all other property maintenance and nuisance code violations that affect public safety and quality of life in the City of Boulder. Please call Code Enforcement at 303-441-3333 to ask questions or report a code violation to dispatch."
6,"Have you had a close call with a bicycle, pedestrian or motorist? For example: Were you in a crosswalk (on foot, bike, skateboard) and a car almost hit you? Were you riding your bike on the right side of the road and a car almost hit you? Did you bike through a red light and a car almost hit you? Were you walking on the sidewalk and a bike almost hit you? If you actually had an accident where physical contact was made in one form or another, please report that to the Police Department, it's the law to report all transportation accidents and also helps the City identify countermeasures to reduce future collisions and improve transportation safety. For emergencies, Dial 911. For non-emergencies, call 303-441-3333. Please complete the form below and provide your contact information. The city's bicycle and pedestrian coordinator may contact you for more information.","Have you had a close call with a bicycle, pedestrian or motorist? For example: Were you in a crosswalk (on foot, bike, skateboard) and a car almost hit you? Were you riding your bike on the right side of the road and a car almost hit you? Did you bike through a red light and a car almost hit you? Were you walking on the sidewalk and a bike almost hit you?","If you actually had an accident where physical contact was made in one form or another, please report that to the Police Department, it's the law to report all transportation accidents and also helps the City identify countermeasures to reduce future collisions and improve transportation safety. For emergencies, Dial 911. For non-emergencies, call 303-441-3333. Please complete the form below and provide your contact information. The city's bicycle and pedestrian coordinator may contact you for more information."
7,Train Noise and Quiet Zones For the Latest Information: You can find the latest information about the city's quiet zones project and how to stay updated by visiting the city's Railroad Quiet Zones web page.,Train Noise and Quiet Zones,For the Latest Information: You can find the latest information about the city's quiet zones project and how to stay updated by visiting the city's Railroad Quiet Zones web page.
8,"Employment Definitions Temporary A temporary position with a Management or BMEA counterpart. No collective bargaining. Eligible for overtime pay per FLSA provisions (elected officials are not eligible). Eligible for PERA retirement benefits. Not eligible for leave or insurance benefits, except for Workers' Compensation and life insurance offered to PERA members. Seasonal A seasonal hourly position with the Parks and Recreation Department or Open Space and Mountain Parks Department. No collective bargaining. Eligible for overtime pay per FLSA provisions. Eligible for PERA retirement benefits. Not eligible for insurance and leave benefits, except for Workers' Compensation and life insurance offered to PERA members. Fixed Term May or may not have collective bargaining depending on the employee group. Eligible for overtime pay per FLSA provisions. Eligible for PERA retirement benefits. Eligible for insurance and leave benefitsPosition has a pre-determined end date.",Employment Definitions,"Temporary A temporary position with a Management or BMEA counterpart. No collective bargaining. Eligible for overtime pay per FLSA provisions (elected officials are not eligible). Eligible for PERA retirement benefits. Not eligible for leave or insurance benefits, except for Workers' Compensation and life insurance offered to PERA members. Seasonal A seasonal hourly position with the Parks and Recreation Department or Open Space and Mountain Parks Department. No collective bargaining. Eligible for overtime pay per FLSA provisions. Eligible for PERA retirement benefits. Not eligible for insurance and leave benefits, except for Workers' Compensation and life insurance offered to PERA members. Fixed Term May or may not have collective bargaining depending on the employee group. Eligible for overtime pay per FLSA provisions. Eligible for PERA retirement benefits. Eligible for insurance and leave benefitsPosition has a pre-determined end date."
9,"Who needs to be on the application? Everyone who lives in the household or is part of the household, in addition to all people who will be listed on the property title. Household members should include all individuals: legal spouse, domestic partner, or common-law spouse; children (under 18 who reside with the applicant at least 50 percent of the time); and/or a significant other whom the applicant chooses to include as a member of his/her household, who will be occupying the house. The applicant's spouse must be included unless they are legally divorced or separated. Persons not counted include foster children, unborn children, and children under 18 who reside with the applicant less then 50 percent of the time.",Who needs to be on the application?,"Everyone who lives in the household or is part of the household, in addition to all people who will be listed on the property title. Household members should include all individuals: legal spouse, domestic partner, or common-law spouse; children (under 18 who reside with the applicant at least 50 percent of the time); and/or a significant other whom the applicant chooses to include as a member of his/her household, who will be occupying the house. The applicant's spouse must be included unless they are legally divorced or separated. Persons not counted include foster children, unborn children, and children under 18 who reside with the applicant less then 50 percent of the time."


In [29]:
sample['qna'] = sample['qna'].apply(default_clean)
sample['qna'] = sample['qna'].apply(stop_and_stem, stem=False)

In [30]:
docLabels = list(sample['question'])
data = list(sample['qna'])
sentences = TaggedDocumentIterator(data, docLabels)

In [35]:
model = Doc2Vec(vector_size=100, min_count=0, epochs=100)
model.build_vocab(sentences)
model.train(sentences,total_examples=model.corpus_count, epochs=model.epochs)

In [56]:
t = test.apply(respond_d2v, axis=1)
print('Successes: ', sum(t.success))
t

Successes:  4


Unnamed: 0,test_question,match_question,sim_question,similarity,success
0,"I live on the corner of Bear Mountain Drive and Scrub Oak Circle, and would like the City enforce the speed limit on Bear Mountain Drive. Despite a limit of 25 mph, I routinely observe vehicles traveling well above that speed in both directions (especially in the mornings and early evening). There are a ton of kids in this neighborhood, as well as a blind curve in the road between several crosswalks. It baffles me to see so much enforcement on Lehigh (with the regular presence of a photo van) and only the very occasional patrol car on Bear Mountain Drive. It would be great if the photo van or other officers could regularly make an appearance on Bear Mountain.",Speeding on Residential Streets,What does CPW do about bears in town? When are they relocated or killed?,0.0,False
1,Can you please mow the grass in the park. It is becoming difficult to find the dog poop and dog owners are just leaving it in the grass.,Park Maintenance Issues,What information do I need to reserve a park?,0.29,False
2,Are there grizzlies in Boulder?,Do we have grizzly bears in Colorado?,Can I meet with the City Attorney before my court date?,0.29,False
3,Where do I report being hit by a bicycle?,"Have you had a close call with a bicycle, pedestrian or motorist? For example: Were you in a crosswalk (on foot, bike, skateboard) and a car almost hit you? Were you riding your bike on the right side of the road and a car almost hit you? Did you bike through a red light and a car almost hit you? Were you walking on the sidewalk and a bike almost hit you?","Have you had a close call with a bicycle, pedestrian or motorist? For example: Were you in a crosswalk (on foot, bike, skateboard) and a car almost hit you? Were you riding your bike on the right side of the road and a car almost hit you? Did you bike through a red light and a car almost hit you? Were you walking on the sidewalk and a bike almost hit you?",0.29,True
4,How much time do I have to wait for my income certification for affordable housing?,How long does it take to become income-certified?,Why has train noise increased in recent years?,0.64,False
5,my water pipes froze and now they are leaking. how do i turn off the water??,How can I prevent and thaw frozen water pipes?,What kinds of payments do you take?,0.53,False
6,"Hello, There are a group of Gambel Oak Trees with Tree ID #'s 38820-38825 that I would like to be pruned up in order to keep them away from the Rec Center wall and to keep them in good, trimmed health. I also would like Tree ID 38825 to be pruned away from the American Flag so it will not come in contact with the tree, from the nearby flag pole. Thank you very much and if you have any questions or concerns, feel free to give me a call.",Public Tree Issues,Public Tree Issues,0.09,True
7,There are constantly dogs off leash in the children playgrounds of columbine school. Even though the playgrounds are fenced and have a sign stating dogs should not go in. You can see some dog owners do not even pick their dog shit.,Dogs on Open Space and Mountain Parks,How long is my permit valid?,0.18,False
8,"I parked in the garage on 11th and Walnut on Sunday 12/23 starting at 5 PM and left at 12:35 AM that Monday 12/24. I was charged $1.25 but it should have still been free since charged parking doesnâ€™t start till 7 AM on Mondays , I park here all the time and I am confused as to why it says I owed $1.25. Is there a glitch in the system ?",Contact Parking Services,Where can I make my payment if I do not want to contest my ticket?,0.32,False
9,Where do I apply for building permits?,How do I get a building permit?,Internships and Work-Study Positions,0.51,False


In [53]:
2-6 model = Doc2Vec(vector_size=100, min_count=0, epochs=100)


SyntaxError: invalid syntax (<ipython-input-53-6171f14dabb6>, line 1)

In [312]:
# Store the model to mmap-able files
model.save('../models/model_qna.doc2vec')
# Load the model
model = Doc2Vec.load('../models/model_qna.doc2vec')