In [14]:
# Make sure you put the 'keywords.csv' file in the same directory

In [15]:
!pip install pytrends 



In [16]:
import pandas as pd
from pytrends.request import TrendReq
from pytrends import dailydata
import re
import warnings

warnings.filterwarnings("ignore")

pytrends = TrendReq(hl='en-US', tz=300) # Connect to Google

In [17]:
kw_df = pd.read_csv('keywords.csv', header=0) # Load the input file
kw_list = kw_df['Keywords'].tolist() 
tf = str(kw_df['Start_date'][0] + ' ' + kw_df['End_date'][0])
country = kw_df['Country'][0] 
province = kw_df['Province'][0] # Setup parameters

In [18]:
dataset = []

for kw in kw_list:
    pytrends.build_payload([kw], geo=country, timeframe=tf)
    data = pytrends.interest_over_time() # Scrape the data
    data.drop(index=data[data['isPartial'] == 'True'].index,inplace=True) # Delete the data that is not complete 
    data.drop('isPartial', axis=1, inplace=True) # Drop 'isPartial' column and clean the data
    dataset.append(data)
    
# A for loop used here in case keywords more than 5 which will return an error

CA_time_trends = pd.concat(dataset, axis=1)
CA_time_trends

Unnamed: 0_level_0,covid,coronavirus,cough,headache,tired,covid19
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-10-01,72,71,71,68,54,60
2020-10-02,100,100,63,85,69,100
2020-10-03,78,88,90,68,88,83
2020-10-04,69,83,82,77,73,62
2020-10-05,89,81,100,100,41,57
2020-10-06,80,76,86,75,79,69
2020-10-07,71,82,77,81,67,48
2020-10-08,71,70,90,73,68,54
2020-10-09,85,78,72,71,58,61
2020-10-10,70,78,66,76,70,56


In [19]:
CA_time_trends.to_csv('CA_time_trends.csv') # Export the data

In [20]:
dataset = []

for kw in kw_list:
    pytrends.build_payload([kw], geo=country, timeframe=tf)
    data = pytrends.interest_by_region(resolution='REGION', inc_low_vol=True, inc_geo_code=False) # Scrape the data
    dataset.append(data)
    
# A for loop used here in case keywords more than 5 which will return an error

province_trends = pd.concat(dataset, axis=1)
province_trends

Unnamed: 0_level_0,covid,coronavirus,cough,headache,tired,covid19
geoName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alberta,48,49,43,33,30,47
British Columbia,43,53,51,31,32,50
Manitoba,100,98,45,33,32,96
New Brunswick,77,100,41,30,33,100
Newfoundland and Labrador,36,40,36,27,42,40
Northwest Territories,41,37,90,100,100,0
Nova Scotia,46,58,55,31,30,55
Nunavut,34,14,100,55,80,0
Ontario,55,68,49,31,29,69
Prince Edward Island,46,46,95,40,31,40


In [21]:
province_trends.to_csv('province_trends.csv') # Export the data

In [22]:
dataset = []

for kw in kw_list:
    pytrends.build_payload([kw], geo=country+'-'+province, timeframe=tf)
    data = pytrends.interest_over_time() # Scrape the data
    data.drop(index=data[data['isPartial'] == 'True'].index,inplace=True) # Delete the data that is not complete 
    data.drop('isPartial', axis=1, inplace=True) # Drop 'isPartial' column and clean the data
    dataset.append(data)
    
# A for loop used here in case keywords more than 5 which will return an error

ON_time_trends = pd.concat(dataset, axis=1)
ON_time_trends

Unnamed: 0_level_0,covid,coronavirus,cough,headache,tired,covid19
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-10-01,73,71,81,82,56,60
2020-10-02,100,100,68,84,69,90
2020-10-03,81,91,100,76,76,81
2020-10-04,72,82,88,69,61,77
2020-10-05,84,76,83,86,47,71
2020-10-06,77,76,85,100,75,82
2020-10-07,69,74,68,93,69,65
2020-10-08,67,60,60,71,63,53
2020-10-09,90,81,87,73,54,100
2020-10-10,70,79,52,63,57,79


In [23]:
ON_time_trends.to_csv('ON_time_trends.csv') # Export the data

In [24]:
# In terms of scrapping city level data, pytrends has a bug that has not been fixed yet. So I suggest we manually download this
# part of data if there is no better way to automate it.

In [25]:
# Scrape country keywords searching trends in daily level
dataset = []
start = list(map(int, re.split(r'\W', kw_df['Start_date'][0])))
end = list(map(int, re.split(r'\W', kw_df['End_date'][0])))

for kw in kw_list:
    data = dailydata.get_daily_data(kw, start_year=start[0], start_mon=start[1], stop_year=end[0], stop_mon=end[1], geo=country, verbose=True, wait_time=5.0)
    data.drop(index=data[data['isPartial'] == 'True'].index,inplace=True) # Delete the data that is not complete 
    data = data[kw]
    dataset.append(data)

CA_daily_trends = pd.concat(dataset, axis=1)
CA_daily_trends

covid:2020-10-01 2020-10-31
covid:2020-11-01 2020-11-30
coronavirus:2020-10-01 2020-10-31
coronavirus:2020-11-01 2020-11-30
cough:2020-10-01 2020-10-31
cough:2020-11-01 2020-11-30
headache:2020-10-01 2020-10-31
headache:2020-11-01 2020-11-30
tired:2020-10-01 2020-10-31
tired:2020-11-01 2020-11-30
covid19:2020-10-01 2020-10-31
covid19:2020-11-01 2020-11-30


Unnamed: 0_level_0,covid,coronavirus,cough,headache,tired,covid19
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-10-01,51.83,47.6,48.0,67.76,37.63,40.26
2020-10-02,100.0,100.0,80.51,75.69,43.52,100.0
2020-10-03,60.84,79.17,60.72,66.99,66.22,60.68
2020-10-04,50.41,68.06,99.0,90.09,37.8,54.6
2020-10-05,81.0,61.56,89.0,65.12,40.96,42.7
2020-10-06,68.06,61.62,83.0,73.8,61.5,47.88
2020-10-07,54.02,60.83,60.72,82.0,81.78,35.4
2020-10-08,51.84,48.3,71.44,85.0,65.25,40.71
2020-10-09,73.96,60.83,62.37,67.08,48.64,53.6
2020-10-10,48.3,61.62,61.62,69.7,53.9,38.43


In [27]:
CA_daily_trends.to_csv('CA_daily_trends.csv') # export the data