The app.py code includes 4 recommender systems based on similarity model to provide 4 different type of recommendations including:
- Providing three similar recommendations to previously visited place by the user.
- Providing the top two recommendations for the best time chosen by the user.
- Providing the top two recommendations for the province chosen by the user.
- Providing the top two recommendations for attraction type chosen by the user. Also, it includes a flask set up to provide fulfillment routes from/to chatbot to receive the user queries and return the corresponding recommendations.


In [2]:
from math import sqrt
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [3]:
df_attractions = pd.read_csv('C:/Users/yusri/OneDrive - just.edu.jo/Desktop/RS_Project/Chatbot/attraction_new.csv')

df_attractions.head(2)

Unnamed: 0,location_id,location_name,location_type,type_id,ratings,no. of rating,provience,cost,user_id,best_time_to_visit,visiting_hours,Address,Website_link,image_link
0,0,Ripley's Aquarium of Canada,Aquariums,4,4.5,20326,ontario,15$,1,summer,Sun - Sat 10:00 AM - 8:00 PM,"288 Bremner Boulevard, Toronto, Ontario M5V 3L...",http://www.ripleyaquariums.com/canada,https://dynamic-media-cdn.tripadvisor.com/medi...
1,1,Mount Royal Park,Parks,2,3.0,10493,quebec,5$,1,fall,Sun - Sat 6:00 AM - 12:00 AM,"1260 Remembrance Road, Montreal, Quebec H3H 1A...",http://www.lemontroyal.qc.ca/en,https://dynamic-media-cdn.tripadvisor.com/medi...


# Recommender System on the basis of location type

In [4]:
# Recommender System on the basis of location type
tfidf_attraction_type = TfidfVectorizer()
tfidf_attraction_type_matrix = tfidf_attraction_type.fit_transform(df_attractions['location_type'])
cosine_sim_attractions = linear_kernel(tfidf_attraction_type_matrix, tfidf_attraction_type_matrix)

In [5]:
def get_recommendations_based_on_type(location_name, cosine_sim_attractions=cosine_sim_attractions):
    """
    Calculates top 2 attraction to recommend based on given location type. 
    """
    idx_attraction = df_attractions.loc[df_attractions['location_name'].isin([location_name])]
    idx_attraction = idx_attraction.index
    sim_scores_attractions = list(enumerate(cosine_sim_attractions[idx_attraction][0]))
    sim_scores_attractions = sorted(sim_scores_attractions, key=lambda x: x[1], reverse=True)
    sim_scores_attractions = sim_scores_attractions[1:3]
    attraction_indices = [i[0] for i in sim_scores_attractions]
    return df_attractions['location_name'].iloc[attraction_indices]

# Recommender System on the basis of provience

In [7]:
tfidf_attraction_provience = TfidfVectorizer()
tfidf_attraction_provience_matrix = tfidf_attraction_provience.fit_transform(df_attractions['provience'])
cosine_sim_attractions_pro = linear_kernel(tfidf_attraction_provience_matrix, tfidf_attraction_provience_matrix)

In [8]:
tfidf_attraction_provience = TfidfVectorizer()
tfidf_attraction_provience_matrix = tfidf_attraction_provience.fit_transform(df_attractions['provience'])
cosine_sim_attractions_pro = linear_kernel(tfidf_attraction_provience_matrix, tfidf_attraction_provience_matrix)

def get_recommendations_based_on_provience(location_name, cosine_sim_attractions_pro=cosine_sim_attractions_pro):
    """
    Calculates top 2 attraction to recommend based on given location type. 
    """
    idx_attraction_pro = df_attractions.loc[df_attractions['location_name'].isin([location_name])]
    idx_attraction_pro = idx_attraction_pro.index
    sim_scores_attractions_pro = list(enumerate(cosine_sim_attractions_pro[idx_attraction_pro][0]))
    sim_scores_attractions_pro = sorted(sim_scores_attractions_pro, key=lambda x: x[1], reverse=True)
    sim_scores_attractions_pro = sim_scores_attractions_pro[1:3]
    attraction_indices_pro = [i[0] for i in sim_scores_attractions_pro]
    return df_attractions['location_name'].iloc[attraction_indices_pro]


In [9]:
#def get_province(province_name):
  #loc = df_attractions.loc[df_attractions['provience'].isin([province_name])]
  #provience_fetch = loc.iloc[0]['location_name']
  #re=list(get_recommendations_based_on_provience(provience_fetch).values)
  #return re

# Recommender System based on best time to visit

In [11]:
tfidf_attraction_best_time = TfidfVectorizer()
tfidf_attraction_best_time_matrix = tfidf_attraction_best_time.fit_transform(df_attractions['best_time_to_visit'])
cosine_sim_attractions_best_time = linear_kernel(tfidf_attraction_best_time_matrix, tfidf_attraction_best_time_matrix)

In [12]:
def get_recommendations_based_on_best_time(location_name, cosine_sim_attractions_best_time=cosine_sim_attractions_best_time):
    """
    Calculates top 2 attraction to recommend based on given best time to visit. 
    """
    idx_attraction_best_time = df_attractions.loc[df_attractions['location_name'].isin([location_name])]
    idx_attraction_best_time = idx_attraction_best_time.index
    # Get the pairwsie similarity scores of all attraction with that name
    sim_scores_attractions_best_time = list(enumerate(cosine_sim_attractions_best_time[idx_attraction_best_time][0]))
    # Sort the attractions based on the similarity scores
    sim_scores_attractions_best_time = sorted(sim_scores_attractions_best_time, key=lambda x: x[1], reverse=True)
    # Get the scores of the 2 most similar attractions
    sim_scores_attractions_best_time = sim_scores_attractions_best_time[1:3]
    # Get the attraction indices
    attraction_indices_best_time = [i[0] for i in sim_scores_attractions_best_time]
    # Return the top 2 most similar attractions
    return df_attractions['location_name'].iloc[attraction_indices_best_time]

---------------------------------------------

# Content-Based Recommender System model considering multiple features together such as type of location, provience, best season, cost, ratings.

In [13]:
features = ['location_type', 'provience', 'cost','best_time_to_visit','ratings']
def combine_features(row):
    return row['location_type']+' '+row['provience']+' '+row['cost']+' '+row['best_time_to_visit']+' '+str(row['ratings'])

In [14]:
for feature in features:
    df_attractions[feature] = df_attractions[feature].fillna('')
    df_attractions['combined_features'] = df_attractions.apply(combine_features, axis = 1)

In [15]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer()
count_matrix = cv.fit_transform(df_attractions['combined_features'])

In [16]:
from sklearn.metrics.pairwise import cosine_similarity

cosine_sim = cosine_similarity(count_matrix)
def get_location_from_locationId(location_id):
    return df_attractions[df_attractions.location_id == location_id]["location_name"].values[0]
def get_locationId_from_location(location_name):
    return df_attractions[df_attractions.location_name == location_name]["location_id"].values[0]
def get_ratings_from_locationId(location_id):
  return df_attractions[df_attractions.location_id == location_id]["ratings"].values[0]
def get_provience_from_locationId(location_id):
    return df_attractions[df_attractions.location_id == location_id]["provience"].values[0]
def get_best_time_from_locationId(location_id):
    return df_attractions[df_attractions.location_id == location_id]["best_time_to_visit"].values[0]
def get_visiting_hours_from_locationId(location_id):
    return df_attractions[df_attractions.location_id == location_id]["visiting_hours"].values[0]
def get_address_from_locationId(location_id):
    return df_attractions[df_attractions.location_id == location_id]["Address"].values[0]
def get_website_from_locationId(location_id):
    return df_attractions[df_attractions.location_id == location_id]["Website_link"].values[0]
def get_image_from_locationId(location_id):
    return df_attractions[df_attractions.location_id == location_id]["image_link"].values[0]

In [17]:
import numpy as np

In [18]:
from itertools import islice

list1=[]

def contect_based_combined_list(location_user_likes):
  location_locationId= get_locationId_from_location(location_user_likes)
  similar_locations = list(enumerate(cosine_sim[location_locationId])) #accessing the row corresponding to given location to find all the similarity scores for that location and then enumerating over it
  sorted_similar_locations = sorted(similar_locations,key=lambda x:x[1],reverse=True)[1:]
  
  i=0
  for element in sorted_similar_locations:
      list1.append((get_location_from_locationId(element[0])))
      list1.append((get_provience_from_locationId(element[0])))
      list1.append((get_ratings_from_locationId(element[0])))
      list1.append((get_address_from_locationId(element[0])))
      list1.append((get_best_time_from_locationId(element[0])))
      list1.append((get_visiting_hours_from_locationId(element[0])))

      i=i+1
      if i>2:
          break
  
  list_final = [list1[i:i + 6] for i in range(0, len(list1), 6)]
  return list_final


In [25]:
try:
    import urllib
    import json
    import os
    from flask import (Flask,request, make_response)

except Exception as e:

    print("Some modules are missing {}".format(e))
# Flask app should start in global layout
app = Flask(__name__)


# whenever you make request /webhook
# Following calls are made
# webhook ->
# -----------> Process requests
# ---------------------------->get_data()


@app.route('/webhook', methods=['POST'])
def webhook():

    if request.method == "POST":
        req = request.get_json(silent=True, force=True)
        res = processRequest(req)

        res = json.dumps(res, indent=4)
        r = make_response(res)
        r.headers['Content-Type'] = 'application/json'
        return r


def processRequest(req):

    # Get all the Query Parameter
    query_response = req["queryResult"]
    print(query_response)
    #text = query_response.get('queryText', None)
    #parameters = query_response.get('parameters', None)
    sessionID = req.get('responseId')
    query_response = req.get("queryResult")
    intent = query_response.get("intent").get('displayName')
    query_text = query_response.get("queryText")
    parameters = query_response.get("parameters")
    province_name = parameters.get("geo-state")
    print(province_name)
    attraction = parameters.get("attraction_type")
    besttime = parameters.get("best_time")
    previous_attarction = parameters.get("attraction_typee")
    #db = configureDataBase()
    #res = get_data()
    #res= get_province_rec(province_name)
    if intent == 'Province':
        res= get_province(province_name)
        #fulfillmentText = get_province_rec(province_name)
        return res
    elif intent =='Attraction':
        res= loc_type(attraction)
        #fulfillmentText = get_province_rec(province_name)
        return res
    elif intent =='BestTime':
        res= loc_type(attraction)
        return res
        
    elif intent =='Previous_Visit':
        res= get_from_u(previous_attarction)
        return res
    else:
        return {
            "fulfillmentText": "Something went wrong,Lets start from the begning, Say Hi",
        }



def get_from_u(previous_attarction):
    model=contect_based_combined_list(previous_attarction)
    return {
        "fulfillmentText":'Here are three similar recommendations to '+ str(location_user_likes)+': \n'+ '\n'+ str(model)
        #"fulfillmentText":'Here are the top two recommendations for'

    }



def get_best_time(besttime):
    loc = df_attractions.loc[df_attractions['best_time_to_visit'].isin([besttime])]
    time_fetch = loc.iloc[0]['location_name']
    re=list(get_recommendations_based_on_best_time(time_fetch).values)
    return {
        "fulfillmentText":'Here are the top two recommendations for '+ str(besttime)+': \n'+ '\n'+ str(re)
        #"fulfillmentText":'Here are the top two recommendations for'
    }




def get_province(province_name):
    loc = df_attractions.loc[df_attractions['provience'].isin([province_name.lower()])]
    provience_fetch = loc.iloc[0]['location_name']
    re=list(get_recommendations_based_on_provience(provience_fetch).values)
    return {
        "fulfillmentText":'Here are the top two recommendations for '+ str(province_name)+': \n'+ '\n'+ str(re)
    }


def loc_type(attraction):
    loc = df_attractions.loc[df_attractions['location_type'].isin([attraction])]
    location_fetch = loc.iloc[0]['location_name']
    re=list(get_recommendations_based_on_type(location_fetch).values)
    return {
        "fulfillmentText":'Here are the top two recommendations for '+ str(attraction)+': \n'+ '\n'+ str(re)
        #"fulfillmentText":'Here are the top two recommendations for'
    }





if __name__ == '__main__':
    port = int(os.getenv('PORT', 5000))
    print ("Starting app on port %d" %(port))

    #app.run(debug=True, port=port, host='0.0.0.0')
    app.run(debug=True, use_reloader=False)
    #export FLASK_APP=my_app.py
    #export FLASK_DEBUG=1
    #flask run


'''if __name__ == '__main__':
    port = int(os.getenv('PORT',5000))
    print("Starting app on port %d" % port)
    #app.run(debug=True, port=port, host='0.0.0.0')'''
'''if __name__ == "__main__":
    app.run(port=5000, debug=True)''' # running the app on the local machine on port 8000

Starting app on port 5000
 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [12/Apr/2021 09:34:53] "[37mPOST /webhook HTTP/1.1[0m" 200 -


{'queryText': 'on', 'parameters': {'geo-state': 'Ontario'}, 'allRequiredParamsPresent': True, 'outputContexts': [{'name': 'projects/recommenderbot-ybiv/agent/sessions/60545ab0-0ed5-03bc-743a-06d99ed29518/contexts/__system_counters__', 'parameters': {'no-input': 0.0, 'no-match': 0.0, 'geo-state': 'Ontario', 'geo-state.original': 'on'}}], 'intent': {'name': 'projects/recommenderbot-ybiv/agent/intents/830a84cb-8d4d-471e-b139-0b184a5313f3', 'displayName': 'Province'}, 'intentDetectionConfidence': 1.0, 'languageCode': 'en'}
Ontario


127.0.0.1 - - [12/Apr/2021 09:36:58] "[37mPOST /webhook HTTP/1.1[0m" 200 -


{'queryText': 'alberta', 'parameters': {'geo-state': 'Alberta'}, 'allRequiredParamsPresent': True, 'outputContexts': [{'name': 'projects/recommenderbot-ybiv/agent/sessions/webdemo-0dbbc350-b3fd-97b9-b89e-33423fde882c/contexts/__system_counters__', 'parameters': {'no-input': 0.0, 'no-match': 0.0, 'geo-state': 'Alberta', 'geo-state.original': 'alberta'}}], 'intent': {'name': 'projects/recommenderbot-ybiv/agent/intents/830a84cb-8d4d-471e-b139-0b184a5313f3', 'displayName': 'Province'}, 'intentDetectionConfidence': 1.0, 'languageCode': 'en'}
Alberta


127.0.0.1 - - [12/Apr/2021 09:37:50] "[37mPOST /webhook HTTP/1.1[0m" 200 -


{'queryText': 'qc', 'parameters': {'geo-state': 'Quebec'}, 'allRequiredParamsPresent': True, 'outputContexts': [{'name': 'projects/recommenderbot-ybiv/agent/sessions/webdemo-0dbbc350-b3fd-97b9-b89e-33423fde882c/contexts/__system_counters__', 'lifespanCount': 1, 'parameters': {'no-input': 0.0, 'no-match': 0.0, 'geo-state': 'Quebec', 'geo-state.original': 'qc'}}], 'intent': {'name': 'projects/recommenderbot-ybiv/agent/intents/830a84cb-8d4d-471e-b139-0b184a5313f3', 'displayName': 'Province'}, 'intentDetectionConfidence': 1.0, 'languageCode': 'en'}
Quebec


127.0.0.1 - - [12/Apr/2021 09:38:00] "[37mPOST /webhook HTTP/1.1[0m" 200 -


{'queryText': 'on', 'parameters': {'geo-state': 'Ontario'}, 'allRequiredParamsPresent': True, 'outputContexts': [{'name': 'projects/recommenderbot-ybiv/agent/sessions/webdemo-0dbbc350-b3fd-97b9-b89e-33423fde882c/contexts/__system_counters__', 'parameters': {'no-input': 0.0, 'no-match': 0.0, 'geo-state': 'Ontario', 'geo-state.original': 'on'}}], 'intent': {'name': 'projects/recommenderbot-ybiv/agent/intents/830a84cb-8d4d-471e-b139-0b184a5313f3', 'displayName': 'Province'}, 'intentDetectionConfidence': 1.0, 'languageCode': 'en'}
Ontario


127.0.0.1 - - [12/Apr/2021 09:38:09] "[37mPOST /webhook HTTP/1.1[0m" 200 -


{'queryText': 'manitoba', 'parameters': {'geo-state': 'Manitoba'}, 'allRequiredParamsPresent': True, 'outputContexts': [{'name': 'projects/recommenderbot-ybiv/agent/sessions/webdemo-0dbbc350-b3fd-97b9-b89e-33423fde882c/contexts/__system_counters__', 'parameters': {'no-input': 0.0, 'no-match': 0.0, 'geo-state': 'Manitoba', 'geo-state.original': 'manitoba'}}], 'intent': {'name': 'projects/recommenderbot-ybiv/agent/intents/830a84cb-8d4d-471e-b139-0b184a5313f3', 'displayName': 'Province'}, 'intentDetectionConfidence': 1.0, 'languageCode': 'en'}
Manitoba
{'queryText': 'fall', 'parameters': {'best_time': 'fall'}, 'allRequiredParamsPresent': True, 'outputContexts': [{'name': 'projects/recommenderbot-ybiv/agent/sessions/webdemo-0dbbc350-b3fd-97b9-b89e-33423fde882c/contexts/__system_counters__', 'parameters': {'no-input': 0.0, 'no-match': 0.0, 'best_time': 'fall', 'best_time.original': 'fall'}}], 'intent': {'name': 'projects/recommenderbot-ybiv/agent/intents/5fa66014-250e-4459-97ee-5862e8134846

127.0.0.1 - - [12/Apr/2021 09:38:46] "[35m[1mPOST /webhook HTTP/1.1[0m" 500 -
Traceback (most recent call last):
  File "F:\ANACONDA\Lib\site-packages\flask\app.py", line 2464, in __call__
    return self.wsgi_app(environ, start_response)
  File "F:\ANACONDA\Lib\site-packages\flask\app.py", line 2450, in wsgi_app
    response = self.handle_exception(e)
  File "F:\ANACONDA\Lib\site-packages\flask\app.py", line 1867, in handle_exception
    reraise(exc_type, exc_value, tb)
  File "F:\ANACONDA\Lib\site-packages\flask\_compat.py", line 39, in reraise
    raise value
  File "F:\ANACONDA\Lib\site-packages\flask\app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "F:\ANACONDA\Lib\site-packages\flask\app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "F:\ANACONDA\Lib\site-packages\flask\app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "F:\ANACONDA\Lib\site-packages\flask\_compat.py

{'queryText': 'Parks', 'parameters': {'attraction_type': 'Parks'}, 'allRequiredParamsPresent': True, 'fulfillmentMessages': [{'text': {'text': ['']}}], 'outputContexts': [{'name': 'projects/recommenderbot-ybiv/agent/sessions/webdemo-0dbbc350-b3fd-97b9-b89e-33423fde882c/contexts/__system_counters__', 'parameters': {'no-input': 0.0, 'no-match': 0.0, 'attraction_type': 'Parks', 'attraction_type.original': 'Parks'}}], 'intent': {'name': 'projects/recommenderbot-ybiv/agent/intents/9fea50c7-5779-4681-acf7-d5b5e5aecab3', 'displayName': 'Attraction'}, 'intentDetectionConfidence': 1.0, 'languageCode': 'en'}
None


127.0.0.1 - - [12/Apr/2021 09:45:46] "[37mPOST /webhook HTTP/1.1[0m" 200 -


{'queryText': 'Trams', 'parameters': {'attraction_type': 'Trams'}, 'allRequiredParamsPresent': True, 'fulfillmentMessages': [{'text': {'text': ['']}}], 'outputContexts': [{'name': 'projects/recommenderbot-ybiv/agent/sessions/webdemo-0dbbc350-b3fd-97b9-b89e-33423fde882c/contexts/__system_counters__', 'parameters': {'no-input': 0.0, 'no-match': 0.0, 'attraction_type': 'Trams', 'attraction_type.original': 'Trams'}}], 'intent': {'name': 'projects/recommenderbot-ybiv/agent/intents/9fea50c7-5779-4681-acf7-d5b5e5aecab3', 'displayName': 'Attraction'}, 'intentDetectionConfidence': 1.0, 'languageCode': 'en'}
None


127.0.0.1 - - [12/Apr/2021 09:47:18] "[37mPOST /webhook HTTP/1.1[0m" 200 -


{'queryText': 'Gardens', 'parameters': {'attraction_type': 'Gardens'}, 'allRequiredParamsPresent': True, 'fulfillmentMessages': [{'text': {'text': ['']}}], 'outputContexts': [{'name': 'projects/recommenderbot-ybiv/agent/sessions/webdemo-0dbbc350-b3fd-97b9-b89e-33423fde882c/contexts/__system_counters__', 'lifespanCount': 1, 'parameters': {'no-input': 0.0, 'no-match': 0.0, 'attraction_type': 'Gardens', 'attraction_type.original': 'Gardens'}}], 'intent': {'name': 'projects/recommenderbot-ybiv/agent/intents/9fea50c7-5779-4681-acf7-d5b5e5aecab3', 'displayName': 'Attraction'}, 'intentDetectionConfidence': 1.0, 'languageCode': 'en'}
None


'if __name__ == "__main__":\n    app.run(port=5000, debug=True)'

In [21]:
get_from_u('Stanley Park')

NameError: name 'location_user_likes' is not defined

In [131]:
loc_type('Parks')

{'fulfillmentText': "Here are the top two recommendations for Parks: \n\n['Stanley Park', 'Kingston Waterfront']"}

In [132]:
get_best_time('fall')

{'fulfillmentText': "Here are the top two recommendations for fall: \n\n['Heritage Park Historical Village', 'Toronto Island Park']"}

{'fulfillmentText': 'Here are the top two recommendations for'}