In [1]:
import os
os.chdir("../..")
from itertools import product
import json
import matplotlib.pyplot as plt
import pandas as pd
import requests
# !pip install google-api-python-client
from googleapiclient.discovery import build

# local import
# from scripts.python.config import GoogleAPIkey
from scripts.python.GoogleTrends import GT

In [2]:
gt = GT("AIzaSyC3o-2L8wOh8CFbf-zCKdaYrT7yhKe8HFc")

In [10]:
gt.get_graph("CORRUPTION", restrictions_geo="SB")

{'lines': [{'term': 'CORRUPTION',
   'points': [{'value': 0, 'date': '2004-01-01'},
    {'value': 0, 'date': '2004-02-01'},
    {'value': 0, 'date': '2004-03-01'},
    {'value': 84, 'date': '2004-04-01'},
    {'value': 0, 'date': '2004-05-01'},
    {'value': 100, 'date': '2004-06-01'},
    {'value': 0, 'date': '2004-07-01'},
    {'value': 0, 'date': '2004-08-01'},
    {'value': 0, 'date': '2004-09-01'},
    {'value': 0, 'date': '2004-10-01'},
    {'value': 0, 'date': '2004-11-01'},
    {'value': 0, 'date': '2004-12-01'},
    {'value': 0, 'date': '2005-01-01'},
    {'value': 0, 'date': '2005-02-01'},
    {'value': 0, 'date': '2005-03-01'},
    {'value': 0, 'date': '2005-04-01'},
    {'value': 0, 'date': '2005-05-01'},
    {'value': 0, 'date': '2005-06-01'},
    {'value': 0, 'date': '2005-07-01'},
    {'value': 28, 'date': '2005-08-01'},
    {'value': 0, 'date': '2005-09-01'},
    {'value': 44, 'date': '2005-10-01'},
    {'value': 0, 'date': '2005-11-01'},
    {'value': 52, 'date': '2005

## Case-sensitive testing

In [3]:
upper_parms = {
    'terms': "Papua New Guinea",
    'restrictions_geo': "AU",
    'restrictions_startDate': "2004-01"
}

# change terms to lower case
lower_parms = upper_parms.copy()
lower_parms["terms"] = lower_parms["terms"].lower()

upper = gt.get_graph(upper_parms["terms"], upper_parms["restrictions_geo"])
lower = gt.get_graph(lower_parms["terms"], lower_parms["restrictions_geo"])

# test whether keyword is case-sensitive
assert upper["lines"][0]["points"] == lower["lines"][0]["points"], "Keyword is sensitive."

In [4]:
# Transform to df
kw_png = gt.to_df(upper)
kw_png.head(5)

Unnamed: 0,value,date,term
0,44,2004-01-01,Papua New Guinea
1,91,2004-02-01,Papua New Guinea
2,68,2004-03-01,Papua New Guinea
3,56,2004-04-01,Papua New Guinea
4,63,2004-05-01,Papua New Guinea


## Downloading files

In [5]:
countries = ["fiji", "vanuatu", "palau", "tonga", "samoa", "solomon islands"]
secondary = ["travel", "hotel", "flights"]
geos = ["AU", "NZ", "US", "JP", "HK", "NC"]

for country in countries:

    filename = os.getcwd() + "/data/tourism/trends/trends_" + \
        str(country).replace(" ", "_") + ".csv"

    df = pd.DataFrame()
    df["date"] = pd.date_range(
        start="2004-01-01", end="2022-10-01", freq="MS")

    for geo in geos:
        search_term = [str(country) + " " + str(i) for i in secondary]
        graph_response = gt.get_graph(search_term, restrictions_geo=geo)
        trend_response = gt.get_health_trends(search_term)

        try:
            graph, trend = gt.to_df(graph_response), gt.to_df(trend_response)
            graph["term"] = graph["term"] + "-" + geo
            graph = pd.pivot(graph, index='date',
                             columns='term', values='value')
            df = pd.merge(df, graph, how="left", on="date")

        except:
            print(f"{country}, {search_term} raises an Error.")

    trend = pd.pivot(trend, index='date',
                     columns='term', values='value')
    df = pd.merge(df, trend, how="left", on="date")
    
    df.to_csv(filename, encoding="utf-8")

In [16]:
from bokeh.plotting import figure, show, output_file
from bokeh.models import ColumnDataSource, Legend
from bokeh.palettes import Category20

# output_file('fiji_month_by_country.html')
p = figure(height=600, width=1000,
           title="Google Trend Index By Month",
           x_axis_type="datetime",
           x_axis_label="Date",
           y_axis_label="Google Trend Index",
           tooltips=[("Date", "$Date"),
                     ("Search Index", "@$col")])

cols = df.columns[-3:].tolist()
p.add_layout(Legend(), 'right')
source = ColumnDataSource(df)

for col, color in zip(cols, Category20[3]):
    # have to use different colnames for y-coords so tooltip can refer to @$name
    p.line('date', col, source=df, name=col, legend_label=col, color=color)

p.legend.label_text_font_size = '9pt'
p.legend.click_policy = "mute"
p.legend.location = "top_left"

show(p)