In [1]:
import os
os.chdir("../../")
import pandas as pd
import matplotlib.pyplot as plt

from src.google_trends import GT
from dotenv import load_dotenv
load_dotenv()

from bokeh.plotting import figure, show, output_notebook, output_file
from bokeh.models import CustomJS, Select, ColumnDataSource, HoverTool, BoxZoomTool, ResetTool, Legend, WheelZoomTool
from bokeh.palettes import Category20
output_notebook()

import warnings
warnings.filterwarnings("ignore")

In [2]:
saved_path = os.getcwd() + "/outputs/text/solomon_islands/trends/"
if not os.path.exists(saved_path):
    os.mkdir(saved_path)

In [3]:
GoogleAPIkey = os.getenv("GoogleAPIkey")

In [4]:
gt = GT(GoogleAPIkey)

In [5]:
trends_df = pd.DataFrame()
for term in ["economy", "inflation", "unemployment"]:
    search_term = "solomon islands " + str(term)
    trend_response = gt.get_health_trends(search_term)
    df = gt.to_df(trend_response)
    df = df.drop("term", axis=1).rename({"value": term}, axis=1)
    if trends_df.empty:
        trends_df = df
    else:
        trends_df = trends_df.merge(df, how="left", on="date")

In [6]:
economic_and_business_keywords = [
    "GDP", "Inflation", "Unemployment", "Job", "Development Project", "Tax",
    "Trade", "CPI", "Food", "Investment"
]

job_lst = [
    "Job market trends", "Job opportunities", "Unemployment rate", "Workforce",
    "Labor market", "Job vacancies", "Employment statistics", "Hiring trends",
    "Career prospects", "Workforce development", "Job seekers", "Job search",
    "Salary trends", "Economic growth", "Youth employment", "Job creation",
    "Remote work", "Part-time jobs", "Full-time jobs", "Job applications",
    "Recruitment agencies", "Skills in demand", "Job fairs", "Job postings",
    "Labor force participation", "Employment opportunities",
    "Job satisfaction", "Job market analysis", "Economic development",
    "Wages and salaries", "Income levels", "Workforce training",
    "Job stability", "Economic indicators", "Job losses", "Job retention",
    "Job interviews", "Employment trends", "Job growth", "Labor force",
    "Occupational outlook", "Economy", "Workplace dynamics",
    "Remote job opportunities", "Labor laws", "Job market research",
    "Labor market conditions", "Employment data", "Job outlook",
    "Job security"
]

In [7]:
local_trends_df = pd.DataFrame()
for term in job_lst:
    local_response = gt.get_graph(term, "SB", "2011-01")
    local_response_df = gt.to_df(local_response)
    local_response_df = (local_response_df.drop("term", axis=1)
                            .rename({"value": term}, axis=1))
    if local_trends_df.empty:
        local_trends_df = local_response_df
    else: 
        local_trends_df = local_trends_df.merge(local_response_df, how="left", on="date")

In [11]:
local_trends_df.columns = [
    col.lower().replace(" ", "_") for col in local_trends_df.columns
]
cols = ["date"]
words_cols = [col for col in local_trends_df.columns if col != "date"]
cols.extend(words_cols)
local_trends_df = local_trends_df.loc[:, cols]
local_trends_df.to_csv(saved_path + "job_trends.csv", encoding="utf-8")

In [24]:
from sklearn.decomposition import PCA
pca = PCA(n_components=4).fit(local_trends_df[words_cols])
pd.DataFrame(pca.transform(local_trends_df[words_cols]))

Unnamed: 0,0,1,2,3
0,-25.975439,96.837368,87.629039,-1.465718
1,-11.857347,18.170182,-48.573158,24.988605
2,-25.276424,-2.773079,-10.953575,-11.813908
3,-29.615741,18.286160,35.182447,-27.756538
4,-31.165497,25.807317,51.659597,-33.450334
...,...,...,...,...
148,27.301444,14.476098,8.752807,0.783754
149,31.678415,-5.236736,13.129452,-1.659055
150,10.375251,-0.446975,-2.275374,-11.927344
151,12.045646,-1.902645,-0.438088,-1.104123


In [25]:
pca.explained_variance_ratio_

array([0.384866  , 0.17667849, 0.1346697 , 0.12007433])

In [7]:
output_file(filename=saved_path + "solomon_islands_local_trends.html",
            title="Solomon Islands Google Trends")

hover = HoverTool(tooltips=[('Date', '@date{%Y-%m-%d}'), ('Number', '@$name')],
                  formatters={'@date': 'datetime'})
p = figure(
    height=400,
    width=700,
    title="Google Trend Index By Month (search conducted in Solomon Islands)",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="Google Trend Index",
    tools=[hover, BoxZoomTool(),
           ResetTool(), WheelZoomTool()])

cols = [col for col in local_trends_df.columns if col != "date"]
p.add_layout(Legend(), 'right')
source = ColumnDataSource(trends_df)

for col, color in zip(cols, Category20[3]):
    # have to use different colnames for y-coords so tooltip can refer to @$name
    p.line('date',
           col,
           source=local_trends_df,
           name=col,
           legend_label=col,
           color=color)

p.legend.label_text_font_size = '9pt'
p.legend.click_policy = "mute"
p.legend.location = "top_left"

show(p)

In [8]:
output_file(filename=saved_path + "solomon_islands_econ_trends.html",
            title="Solomon Islands Google Trends")

hover = HoverTool(tooltips=[('Date', '@date{%Y-%m-%d}'),
                            ('Number', '@$name{.3f}')],
                  formatters={'@date': 'datetime'})
p = figure(height=400,
           width=700,
           title="Google Trend Index By Month",
           x_axis_type="datetime",
           x_axis_label="Date",
           y_axis_label="Google Trend Index",
           tools=[hover, BoxZoomTool(), ResetTool()])

cols = [col for col in trends_df.columns if col != "date"]
p.add_layout(Legend(), 'right')
source = ColumnDataSource(trends_df)

for col, color in zip(cols, Category20[3]):
    # have to use different colnames for y-coords so tooltip can refer to @$name
    p.line('date',
           col,
           source=trends_df,
           name=col,
           legend_label=col,
           color=color)

p.legend.label_text_font_size = '9pt'
p.legend.click_policy = "mute"
p.legend.location = "top_left"

show(p)