In [3]:
# Import selenium and its dependencies
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
from bs4 import BeautifulSoup

import time
# import pandas 
import pandas as pd

In [14]:
# create a dataframe to hold the scrapped data
df = pd.DataFrame()

In [25]:
def get_cushmanwakefield_data(driver):
    driver.get("https://www.cushmanwakefield.com/en/canada/properties/lease/lease-property-search#sort=%40propertylastupdateddate%20descending&f:PropertyType=%5BRetail%5D&f:Country=%5BCanada%5D&f:StateProvince=%5BON%5D")
    while True:
        time.sleep(1)
        el = driver.find_element(By.CLASS_NAME, 'CoveoResultList')
        listings = el.find_elements(By.CLASS_NAME, 'CoveoResult')
        for listing in listings:
            name = listing.find_element(By.CLASS_NAME, 'CoveoResultLink').text
            reference = listing.find_element(By.CLASS_NAME, 'CoveoResultLink').get_dom_attribute('href')
            print(f"Scrapping: {name}")
            address = listing.find_element(By.CLASS_NAME, 'card-text').text
            data = {'Name': name, 'Address': address, 'Reference': reference}
            df = pd.concat([df, pd.DataFrame(data, index=[0])], ignore_index=True)
            
        # click on the next page
        try:
            print("Click the next button")
            next = driver.find_element(By.XPATH, "//a[@title='Next']")
            driver.execute_script("arguments[0].click();", next)
        except Exception as e:
            print(e)
            break
        
    df['Postal Code'] = df['Address'].str.extract(r'([A-Za-z]\d[A-Za-z] \d[A-Za-z]\d)')
    df['City'] = df['Address'].map(lambda x: x.split('\n')[1].split(',')[0])
    df['Address'] = df['Address'].map(lambda x: x.split('\n')[0])
    return df

def get_riocan_data(driver):
    driver.get("https://www.riocan.com/English/our-properties/leasing/default.aspx")

    # Select the province and click search
    Select(driver.find_element(By.ID, 'stateSelect')).select_by_visible_text('Ontario')
    driver.find_element(By.ID, 'property-search-submit').click()

    # Keep click the load more button till the last page
    while True:
        try:
            driver.find_element(By.CLASS_NAME, 'pager_button--next').click()
        except:
            break

    # get all the listings
    listings = driver.find_elements(By.CLASS_NAME, 'search-result_content')

    # loop through each listing and get the data
    for listing in listings:
        name = listing.find_element(By.CLASS_NAME, 'larger').find_element(By.TAG_NAME, 'a').text
        reference = "https://www.riocan.com" + listing.find_element(By.CLASS_NAME, 'larger').find_element(By.TAG_NAME, 'a').get_dom_attribute('href')
        print(f"Scrapping: {name}")
        address = ""
        available = listing.text.strip()
        
        # if there are no avaliable spaces then skip
        if 'N/A' in available:
            continue
        data = {'Name': name, 'Address': address, 'Reference': reference}
        df = pd.concat([df, pd.DataFrame(data, index=[0])], ignore_index=True)

    # Go into each profile page for the property and get the address
    for iter, rows in df.iterrows():
        driver.get(rows['Reference'])
        time.sleep(1.1)
        address = driver.find_element(By.XPATH, "//div[@data-content='address']").text
        df.loc[iter, 'Address'] = address
    
    # Extract the road, city and postal code
    df['Address'] = df['Address'].str.replace('Address: ', ' ')
    df['Postal Code'] = df['Address'].str.extract(r'([A-Za-z]\d[A-Za-z] \d[A-Za-z]\d)')
    df['City'] = df['Address'].str.extract(r', (\w+),')
    df['Address'] = df['Address'].str.split(',')[0][0].strip()
    
    return df

def get_avison_data():
    soup = BeautifulSoup(open('avison.txt').read(), 'html.parser')
    listings = soup.find_all('div', {'class': 'col-12'})

    for list in listings:
        name = list.find('h5', {'class': 'ellipsis plugin-primary-color list-item-title'}).text
        reference = list.find('a').get('href')
        address = list.find('div', {'class': 'list-item-attribute'}).text.strip()
        data = {'Name': name, 'Address': address, 'Reference': reference}
        df = pd.concat([df, pd.DataFrame(data, index=[0])], ignore_index=True)
        
    df['Postal Code'] = df['Address'].str.extract(r'([A-Za-z]\d[A-Za-z]\s?\d[A-Za-z]\d)')
    df['City'] = df['Address'].map(lambda x: x.split(',')[0])
    df['Address'] = df['Name']
    return df

def get_clearstream_data():
    soup = BeautifulSoup(open('clearstreamcre.txt').read(), 'html.parser')
    listings = soup.find_all('div', {'class': 'Zc7IjY'})

    for list in listings:
        avaliable = list.find('span', {'class': 'color_15 wixui-rich-text__text'}).text.strip()
        if avaliable == '0' or avaliable == 'â€‹':
            continue
        name = list.find('h2', {'class': 'wixui-rich-text__text'}).text
        reference = list.find('a').get('href')
        address = name
        data = {'Name': name, 'Address': address, 'Reference': reference}
        df = pd.concat([df, pd.DataFrame(data, index=[0])], ignore_index=True)

    df['City'] = df['Address'].map(lambda x: x.split(',')[-2])
    return df

def get_metrus_data():
    soup = BeautifulSoup(open('metrus.txt').read(), 'html.parser')
    listings = soup.find_all('div', {'class': 'property-info'})
    for list in listings:
        name = (list.text.strip().split('\n')[0])
        address = (list.text.strip().split('\n')[0])
        data = {'Name': name, 'Address': address, 'Reference': 'https://www.metrusproperties.com/index.php?o=leasing&mode=Retail'}
        df = pd.concat([df, pd.DataFrame(data, index=[0])], ignore_index=True)
    
    return df

In [79]:
df = pd.DataFrame()



In [80]:
df

Unnamed: 0,Name,Address,Reference
0,16635 Yonge St.,16635 Yonge St.,https://www.metrusproperties.com/index.php?o=l...
1,1331 Yonge,1331 Yonge,https://www.metrusproperties.com/index.php?o=l...
2,230 Adelaide St. E.,230 Adelaide St. E.,https://www.metrusproperties.com/index.php?o=l...
3,5010 Steeles Ave. W,5010 Steeles Ave. W,https://www.metrusproperties.com/index.php?o=l...
4,8500 Leslie Street,8500 Leslie Street,https://www.metrusproperties.com/index.php?o=l...
5,3115 Glen Erin Dr.,3115 Glen Erin Dr.,https://www.metrusproperties.com/index.php?o=l...
6,10 West Pearce St.,10 West Pearce St.,https://www.metrusproperties.com/index.php?o=l...
7,45 Vogell Road,45 Vogell Road,https://www.metrusproperties.com/index.php?o=l...
8,6 Monogram Place,6 Monogram Place,https://www.metrusproperties.com/index.php?o=l...
9,400 Applewood Crescent,400 Applewood Crescent,https://www.metrusproperties.com/index.php?o=l...
