In [None]:
import time, os, json, re, requests
import datetime as dt

import getnoms_utils as nom

import numpy as np
import pandas as pd

from ipywidgets import HTML, embed
import ipyleaflet as ipyl

In [None]:
pd.set_option("display.max_columns", None)

In [None]:
!jupyter nbextension enable --py --sys-prefix ipyleaflet

In [None]:
num_type = ["int64", "float64"]

In [None]:
curr_path = os.getcwd()
creds_path = r"D:\personal\creds"

with open(os.path.join(creds_path, "gmaps", "apikey.json")) as f:
    gmaps_apikey = json.load(f)
    f.close()

with open(os.path.join(creds_path, "yelp", "apikey.json")) as f:
    yelp_apikey = json.load(f)
    f.close()
    
with open(os.path.join(creds_path, "ipstack", "apikey.json")) as f:
    ipstack_apikey = json.load(f)
    f.close()

In [None]:
my_address = ""

lat, lng, coords = nom.get_origin(ipstack_key=ipstack_apikey["key"], 
                                  gmaps_key=gmaps_apikey["key"],
                                  address=my_address)

In [None]:
search_query = "ramen"

search_google = nom.GooglePlaceSearch(gmaps_key=gmaps_apikey["key"],
                                      query=search_query,
                                      radius=7,
                                      origin_coords=coords,
                                      unit="miles")                

dict_gquery = search_google.google_searchquery()

In [None]:
gbi = nom.GoogleBusinessInfo(gmaps_key=gmaps_apikey["key"],
                             place_ids=dict_gquery["place_id"],
                             origin_coords=coords)

dict_getinfo = gbi.google_businfo(return_hours=False)

In [None]:
yelp = nom.YelpMatch(yelp_key=yelp_apikey["key"])

In [None]:
match_list = yelp.google_yelp_match(dict_gquery, dict_getinfo, "US")

In [None]:
dict_yquery = yelp.yelp_searchquery(match_list)

In [None]:
yelp_sent = yelp.yelp_sentiment(match_list)

In [None]:
df_gquery = pd.DataFrame(dict_gquery)
df_getinfo = pd.DataFrame(dict_getinfo)
df_yquery = pd.DataFrame(dict_yquery)
df_yelpsent = pd.DataFrame(yelp_sent)

In [None]:
data = pd.merge(left=df_gquery, 
                right=df_getinfo, 
                on="place_id", 
                how="left"
               ).merge(right=df_yquery, 
                       on="place_id", 
                       how="left"
                      ).merge(right=df_yelpsent, 
                              on=["place_id", "yelp_id"], 
                              how="left"
                             )

In [None]:
data.select_dtypes(num_type).isnull().any()[data.isnull().any()==True].index

In [None]:
len(data)

In [None]:
data.head()

In [None]:
correlations = data.drop(labels=["lat", "lng", "trip_time"], axis=1).corr().stack().reset_index(drop=False)
correlations.columns = ["feat1", "feat2", "corr"]
correlations.drop(labels=correlations[correlations["corr"]==1].index, axis=0, inplace=True)

In [None]:
correlations.sort_values("corr", ascending=False)

In [None]:
from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA
import scipy.cluster.hierarchy as sch
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
simpimputer_freq = SimpleImputer(missing_values=np.nan, strategy="most_frequent")
simpimputer_median = SimpleImputer(missing_values=np.nan, strategy="median")

In [None]:
for price_level in ["goog_price_level", "yelp_price_level"]:
    data[price_level] = simpimputer_freq.fit_transform(data[price_level].values.reshape(-1, 1))

In [None]:
data.dropna(axis=0, how="any", subset=["yelp_sent_comp"], inplace=True)

In [None]:
data.select_dtypes(num_type).isnull().any()[data.isnull().any()==True].index

In [None]:
features = ["distance", "goog_rating", "yelp_rating", "goog_price_level", "yelp_price_level", "goog_sent_comp", "yelp_sent_comp"]

In [None]:
"""
Yeo-Johnson method is used as vaderSentiment composite scoring can range between -1 to 1.
All other features extracted from the APIs are strictly positive, however.
"""
pt = PowerTransformer(method="yeo-johnson")

In [None]:
data_ptfitted = pd.DataFrame(pt.fit_transform(X=data[features]), columns=features)

In [None]:
pca_rating, pca_price, pca_sent = PCA(n_components=1), PCA(n_components=1), PCA(n_components=1)

In [None]:
data_ptfitted["rating_comp"] = pca_rating.fit_transform(data_ptfitted[["goog_rating", "yelp_rating"]])
data_ptfitted["sent_comp"] = pca_sent.fit_transform(data_ptfitted[["goog_sent_comp", "yelp_sent_comp"]])
data_ptfitted["price_comp"] = pca_price.fit_transform(data_ptfitted[["goog_price_level", "yelp_price_level"]])

In [None]:
print(f"Scaled composite rating explained variance: {round(pca_rating.explained_variance_ratio_[0], 3)}")
print(f"Scaled composite rating noise covariance: {round(pca_rating.noise_variance_, 3)}")
pca_rating.get_precision()

In [None]:
print(f"Scaled composite sentiment score explained variance: {round(pca_sent.explained_variance_ratio_[0], 3)}")
print(f"Scaled composite sentiment score noise covariance: {round(pca_sent.noise_variance_, 3)}")
pca_sent.get_precision()

In [None]:
print(f"Scaled composite price level explained variance: {round(pca_price.explained_variance_ratio_[0], 3)}")
print(f"Scaled composite price level noise covariance: {round(pca_price.noise_variance_, 3)}")
pca_price.get_precision()

In [None]:
data_ptfitted.head()

In [None]:
features_for_model = ["distance", "rating_comp", "sent_comp", "price_comp"]

In [None]:
link = sch.linkage(data_ptfitted[features_for_model], 
                   method="ward", 
                   metric="euclidean")

In [None]:
plt.figure(figsize=(16,9))
dendo = sch.dendrogram(link)
plt.grid(True)

In [None]:
from sklearn.cluster import AgglomerativeClustering

In [None]:
model = AgglomerativeClustering(n_clusters=link.shape[1], 
                                affinity="euclidean", 
                                linkage="ward")

In [None]:
data["pred"] = model.fit_predict(data_ptfitted[features_for_model])

In [None]:
data.hist(column="pred")

In [None]:
info_cols = ["name_x", "address", "city", "distance", "goog_rating", "yelp_rating", "goog_price_level", "yelp_price_level", "goog_sent_comp", "yelp_sent_comp"]

In [None]:
data[data["pred"]==0][info_cols]

In [None]:
data[data["pred"]==1][info_cols]

In [None]:
data[data["pred"]==2][info_cols]

In [None]:
data[data["pred"]==3][["name_x", "distance", "goog_rating", "yelp_rating", "goog_price_level", "yelp_price_level", "goog_sent_comp", "yelp_sent_comp"]]

In [None]:
data.reset_index(drop=True, inplace=True)

In [None]:
sent_bounds = [-0.25, 0.8]

data.loc[data[data["goog_sent_comp"]>sent_bounds[1]].index, "goog_peoplesay"] = "Yay!"
data.loc[data[data["goog_sent_comp"].between(left=sent_bounds[0], right=sent_bounds[1], inclusive=True)].index, "goog_peoplesay"] = "Meh."
data.loc[data[data["goog_sent_comp"]<sent_bounds[0]].index, "goog_peoplesay"] = "This place sucks."
         
data.loc[data[data["yelp_sent_comp"]>sent_bounds[1]].index, "yelp_peoplesay"] = "Yay!"
data.loc[data[data["yelp_sent_comp"].between(left=sent_bounds[0], right=sent_bounds[1], inclusive=True)].index, "yelp_peoplesay"] = "Meh."
data.loc[data[data["yelp_sent_comp"]<sent_bounds[0]].index, "yelp_peoplesay"] = "This place sucks."

In [None]:
map_base = ipyl.Map(center=(lat, lng), 
                    zoom=13,
                    close_popup_on_click=False
                   )

map_base.add_control(ipyl.FullScreenControl())

map_origin_marker = ipyl.Marker(location=map_base.center)
map_origin_circle = ipyl.CircleMarker()
map_origin_circle.location = (lat, lng)
map_origin_circle.radius = 10
map_origin_circle.color = "black"


map_origin_message = HTML(value="Your location")
popup_origin = ipyl.Popup(location=(lat, lng),
                          child=map_origin_message,
                          close_button=True
                         )

map_origin_marker.popup = map_origin_message

map_base.add_layer(map_origin_marker)
map_base.add_layer(map_origin_circle)
map_base.add_layer(popup_origin)

map_marker_cmap = {0: "red", 
                   1: "orange", 
                   2: "yellow", 
                   3: "green",
                   4: "blue",
                   5: "purple"
                  }

for clust in data["pred"].sort_values().unique():
    var_clust_name = "c"+str(clust)
    globals()[var_clust_name] = ipyl.LayerGroup(name="cluster "+str(clust))
    map_base.add_layer(globals()[var_clust_name])

for i in range(0, len(data)):
    coords = (data["lat"][i], data["lng"][i])
    
    map_loc_marker = ipyl.Marker(location=coords)
    map_loc_circle = ipyl.CircleMarker()
    map_loc_circle.location = coords
    map_loc_circle.radius = 5
    map_loc_circle.color = map_marker_cmap[data["pred"][i]]
    
    map_marker_message = HTML()
    map_marker_message_text = f"Name: {data['name_x'][i]}<br>"\
                              f"Address: {', '.join([data['address'][i], data['city'][i], data['state'][i], data['zip'][i]])}<br>"\
                              f"Phone: {data['phone_formatted'][i]}<br>"\
                              f"Google rating: {data['goog_rating'][i]}; Yelp rating: {data['yelp_rating'][i]}<br>"\
                              f"Google says: {data['goog_peoplesay'][i]}<br>"\
                              f"Yelp says: {data['yelp_peoplesay'][i]}<br>"\
                              f"Price level: {int(data.loc[i, ['goog_price_level', 'yelp_price_level']].values.max())*'$'}"
                              
    map_marker_message.value = map_marker_message_text
    map_loc_marker.popup_max_width = 425
    map_loc_marker.popup = map_marker_message
    
    clust_label = data["pred"][i]
    globals()["c"+str(clust_label)].add_layer(map_loc_marker)
    globals()["c"+str(clust_label)].add_layer(map_loc_circle)

map_base.add_control(ipyl.LayersControl())

In [None]:
display(map_base)

In [None]:
embed.embed_minimal_html(fp="export.html", views=[map_base], requirejs=True)

In [None]:
filename = "_".join([search_query.replace(" ", "_"), dt.datetime.now().strftime("%Y%m%dT%H%M%S")])

In [None]:
data.to_csv(filename+".csv", index=False)

In [None]:
data[data[["goog_peoplesay", "yelp_peoplesay"]].apply(tuple, axis=1)==("Yay!", "Yay!")]