In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import time

# Set up the Selenium WebDriver
driver = webdriver.Chrome()

# Navigate to the URL
url = "https://www.canada.ca/en/revenue-agency/services/charities-giving/other-organizations-that-issue-donation-receipts-qualified-donees/other-qualified-donees-listings/list-municipal-public-bodies-performing-a-function-government-canada-registered-qualified-donees.html#wb-auto-4"
driver.get(url)

# Prepare to store data
data = []

while True:
    # Locate the table using the preferred method
    table = driver.find_element(By.XPATH, "//table[@id='wb-auto-4']")

    # Extract table rows
    rows = table.find_elements(By.TAG_NAME, "tr")

    # Iterate through the rows and collect data
    for row in rows:
        cols = row.find_elements(By.TAG_NAME, "td")
        cols = [col.text.strip() for col in cols]
        if cols:  # Ensure that the row has data
            data.append(cols)

    # Check if there's a "Next" button and if it is enabled, then click it
    try:
        next_button = driver.find_element(By.ID, "wb-auto-4_next")
        # Check if the next button is not disabled
        if "disabled" not in next_button.get_attribute("class"):
            time.sleep(3)  # Wait for 3 seconds before clicking "Next"
            next_button.click()
        else:
            break  # "Next" button is disabled, exit the loop
    except NoSuchElementException:
        break  # No more "Next" button, exit loop

# Convert to a DataFrame
df = pd.DataFrame(data, columns=["Name", "Status", "Effective date", "City", "Province/Territory", "Notes"])

# Display the DataFrame
print(df)

# Optional: Save the DataFrame to a CSV file
df.to_csv("municipal_public_bodies.csv", index=False)

# Close the WebDriver
driver.quit()


                                  Name      Status Effective date  \
0              ?Akisq'nuk First Nation  Registered     2018-04-06   
1               ?Esdilagh First Nation  Registered     2017-11-17   
2             Aamjiwnaang First Nation  Registered     2012-01-01   
3                Abegweit First Nation  Registered     2012-01-01   
4                               Acadia  Registered     2012-12-18   
..                                 ...         ...            ...   
587              Yekooche First Nation  Registered     2015-12-19   
588                       Yellow Quill  Registered     2016-01-28   
589     Yellowknives Dene First Nation  Registered     2014-08-13   
590          York Factory First Nation  Registered     2016-02-02   
591  York Lake Regional Park Authority  Registered     2013-09-30   

              City     Province/Territory Notes  
0       Windermere       British Columbia        
1          Quesnel       British Columbia        
2           Sarnia   

In [23]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
import time

# Set up the Selenium WebDriver
driver = webdriver.Chrome()

# Read the data from the previously saved DataFrame or CSV
df = pd.read_csv("municipal_public_bodies.csv")

# Prepare to store data
websites = []

for index, row in df.iterrows():
    name = row['Name']
    city = row['City']
    province = row['Province/Territory']
    
    # Create a more specific search term
    search_term = f'{name}'
    
    # Print the search term being used (Debug)
    print(f"Searching for: {search_term}")
    
    # Navigate to Google
    driver.get("https://www.google.com")
    
    # Find the search bar, enter the search term, and submit the search
    search_bar = driver.find_element(By.NAME, "q")
    search_bar.send_keys(search_term)
    search_bar.submit()
    
    # Wait for results to load
    time.sleep(3)
    
    # Try to find the first result's link
    try:
        # Locate the first search result link by searching for any <a> tag within the first result block
        first_result = driver.find_element(By.CSS_SELECTOR, 'div#search a')
        website = first_result.get_attribute("href")
        # Print the found website (Debug)
        print(f"Found URL: {website}")
    except Exception as e:
        website = "No website found"
        # Print the exception message (Debug)
        print(f"Error finding URL: {e}")
    
    websites.append(website)
    time.sleep(2)  # Add some delay to avoid being blocked by Google

# Add the websites to the DataFrame
df['Website'] = websites

# Save the updated DataFrame to a new CSV
df.to_csv("municipal_public_bodies_with_websites.csv", index=False)

# Close the WebDriver
driver.quit()


Searching for: ?Akisq'nuk First Nation
Found URL: https://akisqnuk.org/
Searching for: ?Esdilagh First Nation
Found URL: https://www.esdilagh.com/
Searching for: Aamjiwnaang First Nation
Found URL: https://www.aamjiwnaang.ca/
Searching for: Abegweit First Nation
Found URL: https://abegweit.ca/
Searching for: Acadia
Found URL: https://www2.acadiau.ca/
Searching for: Acho Dene Koe First Nation
Found URL: http://www.adkfirstnation.ca/
Searching for: Adams Lake
Found URL: https://bcparks.ca/adams-lake-park-bush-creek-site/
Searching for: Ahousaht
Found URL: https://www.ahousaht.ca/
Searching for: Albany
Found URL: https://en.wikipedia.org/wiki/Albany,_New_York
Searching for: Alderville First Nation
Found URL: https://alderville.ca/
Searching for: Alexander First Nation
Found URL: https://alexanderfn.com/
Searching for: Alexis Creek
Found URL: https://landwithoutlimits.com/places/chilcotin/alexis-creek/
Searching for: Alexis Nakota Sioux Nation
Found URL: https://www.ansn.ca/
Searching for:

In [1]:
import pandas as pd

# Load the CSV files
contact_info = pd.read_csv("contact_info_extracted.csv")
municipal_info = pd.read_csv("municipal_public_bodies_with_websites.csv")

# Perform a left merge to keep all data from contact_info and add corresponding city and province
merged_data = pd.merge(contact_info, municipal_info[['Name', 'City', 'Province/Territory']], on='Name', how='left')

# Save the updated DataFrame to the original CSV file
merged_data.to_csv("contact_info_extracted.csv", index=False)

# Display a preview of the updated DataFrame
merged_data.head()


Unnamed: 0,Name,Website,Email,Phone,City,Province/Territory
0,?Akisq'nuk First Nation,https://akisqnuk.org/,"info@akisqnuk.org, info@akisqnuk.org.",250-342-6301,Windermere,British Columbia
1,?Esdilagh First Nation,https://www.esdilagh.com/,"temp@esdilagh.ca, reception@esdilagh.ca","(250) 747-2928, (250) 747-2002, (250) 991-0589...",Quesnel,British Columbia
2,Aamjiwnaang First Nation,https://www.aamjiwnaang.ca/,,"519-491-0912, 519-336-8410, 519-491-1374, 519-...",Sarnia,Ontario
3,Abegweit First Nation,https://abegweit.ca/,"info@abegweit.ca, info@ABEGWEIT.CA","(902) 676-3206, (902) 676-2353",Scotchfort,Prince Edward Island
4,Acadia,https://www2.acadiau.ca/,"acadia4u@acadiau.ca, financial.aid@acadiau.ca,...","902-585-4636, 800-565-6568, 902-585-1103, 877-...",Yarmouth,Nova Scotia


In [3]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('First nation GICBcontact_info_extracted.csv')

# Initialize an empty set to store unique emails
unique_emails = set()

# Iterate over each email cell in the 'Email' column
for emails in df['Email']:
    # Split the emails by commas and strip any extra whitespace
    email_list = [email.strip().lower() for email in str(emails).split(',')]
    # Add each email to the set of unique emails
    unique_emails.update(email_list)

# Convert the set to a sorted list
unique_emails_list = sorted(unique_emails)

# Create a DataFrame to store the unique emails
unique_emails_df = pd.DataFrame(unique_emails_list, columns=['Email'])

# Save the unique emails to a new CSV file
unique_emails_df.to_csv('unique_emails_extracted.csv', index=False)

# Display the first few unique emails
unique_emails_df.head()


Unnamed: 0,Email
0,1-604-858-4631info@soowahlie.ca
1,1-800-409-3978contact@wfl128.ca
2,1.2507546068info@cranberryfiredept.ca1555
3,211nb@findhelp.ca
4,311@toronto.ca


In [6]:
import pandas as pd

# Load the CSV files
subscriber_activity = pd.read_csv('gicb-subscriber_activity.csv')
contact_info = pd.read_csv('First nation GICBcontact_info_extracted.csv')

# Normalize email addresses to ensure consistent matching (case-insensitive)
subscriber_activity['Email'] = subscriber_activity['Email'].str.strip().str.lower()
contact_info['Email'] = contact_info['Email'].str.strip().str.lower()

# Split the email addresses in contact_info into individual emails
contact_info['Email'] = contact_info['Email'].str.split(',')

# Explode the list into individual rows for matching
contact_info = contact_info.explode('Email')
contact_info['Email'] = contact_info['Email'].str.strip()

# Match the email addresses
matching_emails = pd.merge(subscriber_activity, contact_info, on='Email', how='inner')

# Display the matched rows
print(matching_emails)

# Save the matched rows to a new CSV file if needed
matching_emails.to_csv('matched_emails.csv', index=False)


                                      Email  Opens  \
0                          brrp@sasktel.net      5   
1                  communications@qalipu.ca      3   
2             chn.skidegate@haidanation.com      3   
3                  hello@neaultmarketing.ca      2   
4                            311@toronto.ca      2   
5                  caretaker@vaucroftid.com      2   
6          bengoughregionalpark@sasktel.net      2   
7        booking@suffernlakeregionalpark.ca      2   
8                         211nb@findhelp.ca      2   
9               communications@membertou.ca      2   
10                              info@krg.ca      2   
11  georgina.improvement.district@gmail.com      2   
12            joseph.waswa@eabametoongfn.ca      1   
13             kay.ostamas@eabametoongfn.ca      1   
14                 kindersleyrp@sasktel.net      1   
15                macklinlakerp@sasktel.net      1   
16             mclarenlake.office@gmail.com      1   
17                      nain