# Web Scraping Data from Kolabtree

## Overview
This script scrapes expert data from Kolabtree, collecting multiple pages in a single run.

## XPath Expressions

| **Data Field**   | **XPath Expression** |
|------------------|----------------------|
| **Name**        | `//div[contains(@class, 'update-name')]` |
| **Title**       | `//div[contains(@class, 'headline-text')]` |
| **Location**    | `//div[contains(@class, 'locationc')]` |
| **Profile Link** | `//a[contains(@class, 'text-btn')]` |
| **Rating**      | `//div[contains(@class, 'fill-ratings')]/span` |
| **Price**       | `//div[contains(@class, 'price-box')]/strong[2]` |
| **Degree**      | `//span[contains(text(), 'Doctor of Science - Epidemiology')]` |
| **Next Page Button** | `//a[contains(text(), '>')]` |


In [5]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoSuchElementException, TimeoutException

import time
import csv


In [12]:
options = Options()
options.add_argument("--headless") 
driver = webdriver.Chrome()




In [17]:
url = "https://www.kolabtree.com/find-an-expert"
driver.get(url)

In [None]:
data = []

In [None]:

page_count = 0
max_pages = 200

while page_count < max_pages:
    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, "box.expert-tiles"))
        )

        elements = driver.find_elements(By.CLASS_NAME, "box.expert-tiles")

        for el in elements:
            try:
                name = el.find_element(By.CLASS_NAME, "update-name").text.strip()
            except NoSuchElementException:
                name = "N/A"

            try:
                title = el.find_element(By.CLASS_NAME, "headline-text").text.strip()
            except NoSuchElementException:
                title = "N/A"

            try:
                location = el.find_element(By.CLASS_NAME, "locationc").text.strip()
            except NoSuchElementException:
                location = "N/A"

            try:
                profile_link = el.find_element(By.CLASS_NAME, "text-btn").get_attribute("href")
            except NoSuchElementException:
                profile_link = "N/A"

            try:
                rating_element = el.find_element(By.CLASS_NAME, "fill-ratings")
                rating = rating_element.find_element(By.TAG_NAME, "span").text.strip()
            except NoSuchElementException:
                rating = "N/A"

            try:
                price_element = el.find_element(By.CLASS_NAME, "price-box")
                currency = price_element.find_element(By.CLASS_NAME, "currncy").text.strip()
                amount = price_element.find_elements(By.TAG_NAME, "strong")[1].text.strip()
                price = f"{currency} {amount}/hr"
            except NoSuchElementException:
                price = "N/A"

            try:
                degree = el.find_element(By.XPATH, "//span[contains(text(), 'Doctor of Science - Epidemiology')]").text.strip()
            except NoSuchElementException:
                degree = "N/A"

            data.append({
                "Name": name,
                "Title": title,
                "Location": location,
                "Profile Link": profile_link,
                "Rating": rating,
                "Price": price,
                "Degree": degree
            })

        try:
            next_button = driver.find_element(By.XPATH, "//a[contains(text(), '>')]")
            driver.execute_script("arguments[0].click();", next_button)
            time.sleep(10)
            page_count += 1
            
        except NoSuchElementException:
            break
        time.sleep(10)

    except TimeoutException:
        break

df = pd.DataFrame(data)
df.to_csv("kolabtree data.csv", index=False, encoding="utf-8")




In [23]:
len(data)/7

341.0

In [44]:
driver.quit()