In [5]:
import pandas as pd
import urllib
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service

In [1]:
def create_driver(headless=False):
    chrome_options = Options()
    if headless:  # 👈 Optional condition to "hide" the browser window
        chrome_options.headless = True

    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options) 
    # 👆  Creation of the "driver" that we're using to interact with the browser
    
    driver.implicitly_wait(40) 
    # 👆 How much time should Selenium wait until an element is able to interact
    return driver

def getAddressfromSearch(name):
    query = urllib.parse.quote(' \' '+ name)   
    driver = create_driver()  # Method defined in previous examples
    url = 'https://www.google.com/search?q='+query 
    try:    
        driver.get(url)
        driver.find_element(By.XPATH,'/html/body/div[3]/div[3]/span/div/div/div/div[3]/div[1]/button[2]').click();
        address = driver.find_element(By.XPATH,'//*[contains(concat( " ", @class, " " ), concat( " ", "LrzXr", " " ))]').text
    except:    
        address = ''
    driver.quit();
    return address   

def getAddressfromMaps(link):
    link= link.replace("http://","")
    link= link.replace("https://","")
    query = urllib.parse.quote(link)
    url = 'https://www.google.com/maps/search/'+query
    driver = create_driver()  # Method defined in previous examples
    driver.get(url)
    driver.find_element(By.XPATH,'//*[@id="yDmH0d"]/c-wiz/div/div/div/div[2]/div[1]/div[3]/div[1]/div[1]/form[2]/div/div/button').click();
    try:    
        address = driver.find_element(By.XPATH,'//*[@id="QA0Szd"]/div/div/div[1]/div[2]/div/div[1]/div/div/div[7]/div[3]/button/div[1]/div[2]/div[1]').text
    except:
        address = ''
    driver.quit()
    return address    

In [6]:
def getData(url):
    html_page = urllib.request.urlopen(url)
    soup = BeautifulSoup(html_page)
    data = soup.findAll(attrs={"data-region": True})
    
    regions = []
    links = []
    names = []
    # Extract relevant data
    for row in data:
        region = row['data-region']
        link = row.find('a').get('href')
        name = row.find('a').get_text()

        regions.append(region)  
        links.append(link) 
        names.append (name)

    zipped = list(zip(names,links, regions))
    df = pd.DataFrame(zipped, columns=['Company Name', 'Link', 'Region'])    

    return df

ai_df = getData('https://www.ai4belgium.be/ai-landscape/')

In [7]:
addresses = []
for idx, row in ai_df.iterrows():
    address = getAddressfromSearch(row['Company Name'])
    addresses.append(address)
    print((idx,row['Company Name'], address))

(0, 'AE Projects', '')
(1, 'Agilytic', 'Rue F. Dubois 2, 1310 La Hulpe')
(2, 'Aptus', 'Meensesteenweg 449, 8501 Kortrijk')
(3, 'Arinti', '')
(4, 'Around Media', 'Adres: Kortrijksesteenweg 1127/0002, 9051, 9000 Gent')
(5, 'B12 Consulting', 'Boucle Odon Godart, 2, 1348, Ottignies-Louvain-la-Neuve')
(6, 'Blendr.io', '2017')
(7, 'BOBUP', 'Rue de Ransbeek 230, 1120 Bruxelles')
(8, 'Boltzmann', '20 februari 1844, Wenen, Oostenrijk')
(9, 'Brainjar', 'Gaston Geenslaan 11, 3001 Leuven')
(10, 'Brainstorm Consulting', '')
(11, 'Calculus', '')
(12, 'Chiveo', '')
(13, 'Comexis Partners', 'Avenue Pasteur 6, Building H, 1300 Wavre')
(14, 'CooperLink', 'Rue du Bois Saint-Jean 15/1, 4102 Seraing')
(15, 'Creax', 'Walle 113G, 8500 Kortrijk')
(16, 'CrossLang', 'Kerkstraat 106, 9050 Gent')
(17, 'D-Sight ', '2010')
(18, 'DART Consulting', 'Congresstraat 13, 1000 Brussel')
(19, 'Data Factory ', 'Sinter-Goedeleplein 5, 1000 Brussel')
(20, 'Data Minded', 'Vismarkt 17, 3000 Leuven')
(21, 'Dataa', 'Dieudonné Lef