In [1]:
# Fetch data from Google Analytics API
from datetime import date
from google_analytics_module.services.google_analytics_service import GoogleAnalyticsService

google_analytics_service = GoogleAnalyticsService()
start_date = date(2023,7,1)
end_date = date(2024,6,30)
dataset_id = "0QK91R12" # Burnside
landing_page_df = google_analytics_service.get_sessions_by_landing_page_as_df(dataset_id, start_date, end_date)
landing_page_df

Unnamed: 0,customEvent:DatasetID,landingPage,sessions
0,0QK91R12,/org/202703-Drug_and_Alcohol_Services_SA_-_Wit...,483
1,0QK91R12,/org/236722-Justices_of_the_Peace_-_Burnside,411
2,0QK91R12,/org/201829-Cats_Assistance_To_Sterilise_Inc.,305
3,0QK91R12,/,267
4,0QK91R12,/org/201612-Dementia_Australia,261
...,...,...,...
1205,0QK91R12,/thesaurus/15708-Youth_Advisory_Committees,1
1206,0QK91R12,/user/21035/edit,1
1207,0QK91R12,/user/login,1
1208,0QK91R12,/volunteers%2C%20volunteering,1


In [2]:
# landing page starts with /org/ are valid
landing_page_df = landing_page_df[landing_page_df['landingPage'].str.startswith("/org/")]
landing_page_df

Unnamed: 0,customEvent:DatasetID,landingPage,sessions
0,0QK91R12,/org/202703-Drug_and_Alcohol_Services_SA_-_Wit...,483
1,0QK91R12,/org/236722-Justices_of_the_Peace_-_Burnside,411
2,0QK91R12,/org/201829-Cats_Assistance_To_Sterilise_Inc.,305
4,0QK91R12,/org/201612-Dementia_Australia,261
5,0QK91R12,/org/196321-South_Australia_Police_Pistol_Club,244
...,...,...,...
1109,0QK91R12,/org/238298-Women's_Information_Service_-_Goolwa,1
1110,0QK91R12,/org/238313-Glenunga_Cricket_Club,1
1111,0QK91R12,/org/238562-The_Lights_Community_and_Sports_Ce...,1
1112,0QK91R12,/org/238596-Cowandilla_Community_Room,1


In [3]:
landing_page_df['customEvent:DatasetID'].unique()

array(['0QK91R12'], dtype=object)

In [4]:
landing_page_df.to_csv("./data/burnside_landing_page.csv", index=False)

In [5]:
landing_page_df[landing_page_df['customEvent:DatasetID']=="08CNPP17"]

Unnamed: 0,customEvent:DatasetID,landingPage,sessions


In [6]:
import pandas as pd
# Adjust the file name appropriately.
cu_export_df = pd.read_csv("./data/cu_export.csv")

In [7]:
cu_export_df.head()

Unnamed: 0,ID_19,Org_name,Street_Address_Line_1,Street_Address_Line_2,Suburb,State,Postal_Code,Country,Postal_Address_Line_1,Postal_Address_Line_2,...,Organisation_Last_updated,IM_Screen_Name_1,IM_Screen_Name_2,IM_Screen_Name_3,IM_Screen_Name_4,IM_Screen_Name_5,IM_Screen_Name_6,Organisati_Council,Organisati_Electorate_State_,Organisati_Electorate_Federal_
0,194023,Motorcycling South Australia Inc.,"Motorcycling South Australia Inc., 251 The Pde",,Beulah Park,South Australia,5067.0,Australia,,,...,2022-11-16 11:54:08,https://www.facebook.com/motorcyclingsa/ 7,,,,,,City of Burnside,Dunstan,Sturt
1,194025,Orienteering SA,,,Glenside,South Australia,5065.0,Australia,c/o 5/355 Angas St,,...,2022-07-24 15:14:31,https://www.facebook.com/OrienteeringSA 7,https://www.youtube.com/channel/UCSrZVpB1et3Jo...,,,,,City of Burnside,Bragg,Sturt
2,194813,Burnside Library & Information Service,401 Greenhill Rd,,Tusmore,South Australia,5065.0,Australia,PO Box 9,,...,2022-12-07 10:38:10,BurnsideLibrary 7,BurnsideLibrary 4,burnsidelibrary 1,,,,City of Burnside,Bragg,Sturt
3,196167,Burnside Family Church,88 Lockwood Rd,,Burnside,South Australia,5066.0,Australia,,,...,2023-01-25 13:53:04,burnsidefamilychurch 7,BFamilyChurch 4,,,,,City of Burnside,Bragg,Sturt
4,196171,Linden Park Primary School and OSHC/VAC,"Linden Park Primary School, 14 Hay Rd",,Linden Park,South Australia,5065.0,Australia,,,...,2023-01-25 14:06:43,https://www.facebook.com/groups/3219860600/abo...,,,,,,City of Burnside,Bragg,Sturt


In [8]:
from data_transform.clean_landing_page import CleanLandingPage

# Clean data
processed_data_df = CleanLandingPage().process_data(landing_page_df, cu_export_df)
processed_data_df

Unnamed: 0,org_id,landing_page,sessions_count,organization_name_sa_community,organization_name_google,is_record_available_in_sacommunity_db,customEvent:DatasetID,landingPage,sessions,organization_id_name,organization_id,organization_name
0,202703,https://sacommunity.org/org/202703-Drug_and_Al...,483,Drug and Alcohol Services SA - Withdrawal Serv...,Drug and Alcohol Services SA - Withdrawal Serv...,True,0QK91R12,/org/202703-Drug_and_Alcohol_Services_SA_-_Wit...,483,202703-Drug and Alcohol Services SA - Withdraw...,202703,Drug and Alcohol Services SA - Withdrawal Serv...
1,236722,https://sacommunity.org/org/236722-Justices_of...,411,Justices of the Peace - Burnside,Justices of the Peace - Burnside,True,0QK91R12,/org/236722-Justices_of_the_Peace_-_Burnside,411,236722-Justices of the Peace - Burnside,236722,Justices of the Peace - Burnside
2,201829,https://sacommunity.org/org/201829-Cats_Assist...,305,Cats Assistance To Sterilise Inc.,Cats Assistance To Sterilise Inc.,True,0QK91R12,/org/201829-Cats_Assistance_To_Sterilise_Inc.,305,201829-Cats Assistance To Sterilise Inc.,201829,Cats Assistance To Sterilise Inc.
3,201612,https://sacommunity.org/org/201612-Dementia_Au...,261,Dementia Australia,Dementia Australia,True,0QK91R12,/org/201612-Dementia_Australia,261,201612-Dementia Australia,201612,Dementia Australia
4,196321,https://sacommunity.org/org/196321-South_Austr...,244,South Australia Police Pistol Club,South Australia Police Pistol Club,True,0QK91R12,/org/196321-South_Australia_Police_Pistol_Club,244,196321-South Australia Police Pistol Club,196321,South Australia Police Pistol Club
...,...,...,...,...,...,...,...,...,...,...,...,...
875,238298,https://sacommunity.org/org/238298-Women's_Inf...,1,,Women's Information Service - Goolwa,False,0QK91R12,/org/238298-Women's_Information_Service_-_Goolwa,1,238298-Women's Information Service - Goolwa,238298,Women's Information Service - Goolwa
876,238313,https://sacommunity.org/org/238313-Glenunga_Cr...,1,Glenunga Cricket Club,Glenunga Cricket Club,True,0QK91R12,/org/238313-Glenunga_Cricket_Club,1,238313-Glenunga Cricket Club,238313,Glenunga Cricket Club
877,238562,https://sacommunity.org/org/238562-The_Lights_...,1,,The Lights Community and Sports Centre,False,0QK91R12,/org/238562-The_Lights_Community_and_Sports_Ce...,1,238562-The Lights Community and Sports Centre,238562,The Lights Community and Sports Centre
878,238596,https://sacommunity.org/org/238596-Cowandilla_...,1,,Cowandilla Community Room,False,0QK91R12,/org/238596-Cowandilla_Community_Room,1,238596-Cowandilla Community Room,238596,Cowandilla Community Room


In [9]:
from data_transform.clean_landing_page import CleanLandingPage
df = CleanLandingPage().get_sessions_by_organization(landing_page_df)
df

Unnamed: 0,customEvent:DatasetID,landingPage,sessions,organization_id_name,organization_id,organization_name
0,0QK91R12,/org/202703-Drug_and_Alcohol_Services_SA_-_Wit...,483,202703-Drug and Alcohol Services SA - Withdraw...,202703,Drug and Alcohol Services SA - Withdrawal Serv...
1,0QK91R12,/org/236722-Justices_of_the_Peace_-_Burnside,411,236722-Justices of the Peace - Burnside,236722,Justices of the Peace - Burnside
2,0QK91R12,/org/201829-Cats_Assistance_To_Sterilise_Inc.,305,201829-Cats Assistance To Sterilise Inc.,201829,Cats Assistance To Sterilise Inc.
4,0QK91R12,/org/201612-Dementia_Australia,261,201612-Dementia Australia,201612,Dementia Australia
5,0QK91R12,/org/196321-South_Australia_Police_Pistol_Club,244,196321-South Australia Police Pistol Club,196321,South Australia Police Pistol Club
...,...,...,...,...,...,...
1109,0QK91R12,/org/238298-Women's_Information_Service_-_Goolwa,1,238298-Women's Information Service - Goolwa,238298,Women's Information Service - Goolwa
1110,0QK91R12,/org/238313-Glenunga_Cricket_Club,1,238313-Glenunga Cricket Club,238313,Glenunga Cricket Club
1111,0QK91R12,/org/238562-The_Lights_Community_and_Sports_Ce...,1,238562-The Lights Community and Sports Centre,238562,The Lights Community and Sports Centre
1112,0QK91R12,/org/238596-Cowandilla_Community_Room,1,238596-Cowandilla Community Room,238596,Cowandilla Community Room


In [10]:
len(df['organization_id'].unique().tolist())

835

In [11]:
# these records are problematic, they are found in google analytics, but not in sacommunity council based export 
# Check these records manually, why it is not available in sacommunity db
# One posible reason is that the record in sacommunity is invalid, the council name could be wrong
# Later, will try to automate on how to get the exact council name from selenium
data_available_in_google_analytics_but_not_in_sacommunity_df = processed_data_df[processed_data_df["is_record_available_in_sacommunity_db"] == False]
data_available_in_google_analytics_but_not_in_sacommunity_df

Unnamed: 0,org_id,landing_page,sessions_count,organization_name_sa_community,organization_name_google,is_record_available_in_sacommunity_db,customEvent:DatasetID,landingPage,sessions,organization_id_name,organization_id,organization_name
55,208832,https://sacommunity.org/org/208832-Burnside_Yo...,38,,Burnside Youth Club,False,0QK91R12,/org/208832-Burnside_Youth_Club,38,208832-Burnside Youth Club,208832,Burnside Youth Club
77,196519,https://sacommunity.org/org/196519-Sturt_Badmi...,27,,Sturt Badminton Club Inc.,False,0QK91R12,/org/196519-Sturt_Badminton_Club_Inc.,27,196519-Sturt Badminton Club Inc.,196519,Sturt Badminton Club Inc.
89,196343,https://sacommunity.org/org/196343-Gospel_Pres...,24,,Gospel Presbyterian Church,False,0QK91R12,/org/196343-Gospel_Presbyterian_Church,24,196343-Gospel Presbyterian Church,196343,Gospel Presbyterian Church
90,202477,https://sacommunity.org/org/202477-Toastmaster...,24,,Toastmasters International - SA,False,0QK91R12,/org/202477-Toastmasters_International_-_SA,24,202477-Toastmasters International - SA,202477,Toastmasters International - SA
111,214815,https://sacommunity.org/org/214815-Sikh_Societ...,19,,Sikh Society of SA,False,0QK91R12,/org/214815-Sikh_Society_of_SA,19,214815-Sikh Society of SA,214815,Sikh Society of SA
...,...,...,...,...,...,...,...,...,...,...,...,...
874,238293,https://sacommunity.org/org/238293-North_Adela...,1,,North Adelaide Baroque Hall,False,0QK91R12,/org/238293-North_Adelaide_Baroque_Hall,1,238293-North Adelaide Baroque Hall,238293,North Adelaide Baroque Hall
875,238298,https://sacommunity.org/org/238298-Women's_Inf...,1,,Women's Information Service - Goolwa,False,0QK91R12,/org/238298-Women's_Information_Service_-_Goolwa,1,238298-Women's Information Service - Goolwa,238298,Women's Information Service - Goolwa
877,238562,https://sacommunity.org/org/238562-The_Lights_...,1,,The Lights Community and Sports Centre,False,0QK91R12,/org/238562-The_Lights_Community_and_Sports_Ce...,1,238562-The Lights Community and Sports Centre,238562,The Lights Community and Sports Centre
878,238596,https://sacommunity.org/org/238596-Cowandilla_...,1,,Cowandilla Community Room,False,0QK91R12,/org/238596-Cowandilla_Community_Room,1,238596-Cowandilla Community Room,238596,Cowandilla Community Room


In [12]:
data_available_in_google_analytics_but_not_in_sacommunity_df.shape

(633, 12)

In [13]:
len(data_available_in_google_analytics_but_not_in_sacommunity_df['landing_page'].tolist())

633

In [14]:
data_available_in_google_analytics_but_not_in_sacommunity_df['landing_page'].tolist()

['https://sacommunity.org/org/208832-Burnside_Youth_Club',
 'https://sacommunity.org/org/196519-Sturt_Badminton_Club_Inc.',
 'https://sacommunity.org/org/196343-Gospel_Presbyterian_Church',
 'https://sacommunity.org/org/202477-Toastmasters_International_-_SA',
 'https://sacommunity.org/org/214815-Sikh_Society_of_SA',
 'https://sacommunity.org/org/202004-Northern_Adelaide_Palliative_Care_Service',
 'https://sacommunity.org/org/201493-Drug_%26_Alcohol_Services_SA',
 'https://sacommunity.org/org/202693-Eastern_Community_Mental_Health_Service',
 'https://sacommunity.org/org/231857-Adelaide_Chinese_School',
 'https://sacommunity.org/org/197981-Probus_Club_-_Unley',
 'https://sacommunity.org/org/203167-SYP_Community_Housing_Association_Inc.',
 'https://sacommunity.org/org/236750-Justices_of_the_Peace_-_Adelaide',
 'https://sacommunity.org/org/206194-Southern_Mental_Health_Services',
 'https://sacommunity.org/org/231068-South_Australian_Tamil_School',
 'https://sacommunity.org/org/197187-Adul