In [11]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import csv


In [13]:
# ChromeDriver 
chrome_driver_path = "D:/chromedriver/chromedriver.exe"
chrome_options = Options()
chrome_options.page_load_strategy = 'eager'  # Load pages faster
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)


In [14]:


def convert_relative_time_to_date(relative_time):
    current_time = datetime.now()
    if "minute" in relative_time:
        minutes = int(relative_time.split()[0])
        return current_time - timedelta(minutes=minutes)
    elif "hour" in relative_time:
        hours = int(relative_time.split()[0])
        return current_time - timedelta(hours=hours)
    elif "day" in relative_time:
        days = int(relative_time.split()[0])
        return current_time - timedelta(days=days)
    elif "week" in relative_time:
        weeks = int(relative_time.split()[0])
        return current_time - timedelta(weeks=weeks)
    else:
        return current_time  # Default to now if parsing fails


with open("zameen_data.csv", mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)                                                 
    # Write the header                                                        
    writer.writerow(["Title", "Price", "Location", "Beds", "Area", "Baths", "Agent", "Date", "Company"])
    
    # Loop through multiple pages (example: limit to 4 pages)
    for page_number in range(1, 4):  

        titles, prices, locations, beds, area, baths, agents, dates, companies = [], [], [], [], [], [], [], [], []


        url = f"https://www.zameen.com/Flats_Apartments/Peshawar-17-{page_number}.html"
        print(f"Scraping page: {url}")
        

        driver.get(url)


        WebDriverWait(driver, 7).until(EC.presence_of_element_located((By.CLASS_NAME, "a37d52f0")))

        # Get the page source and parse it with BeautifulSoup
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, "html.parser")

        
        listings = soup.find_all("li", role="article", class_="a37d52f0")

     
        for i in range(0, len(listings), 4):
            tabs_to_open = []

            for j in range(4):
                if i + j >= len(listings):
                    break
                listing = listings[i + j]

                # Extract details directly from the main page
                try:
                    title = listing.find("h2", class_="_36dfb99f").text.strip()
                    property_link = listing.find("a", href=True)["href"]  # Extract the property link
                    titles.append(title)
                except:
                    titles.append(" ")

              
                try:
                    price = listing.find("span", class_="dc381b54").text.strip()
                except:
                    price = " "
                prices.append(price)

                
                try:
                    location = listing.find("div", class_="db1aca2f").text.strip()
                except:
                    location = " "
                locations.append(location)

               
                try:
                    bed = listing.find("span", class_="_6d9b9b83", attrs={"aria-label": "Beds"}).text.strip()
                except:
                    bed = " "
                beds.append(bed)

                
                try:
                    area_value = listing.find("span", class_="_6d9b9b83", attrs={"aria-label": "Area"}).text.strip()
                except:
                    area_value = " "
                area.append(area_value)

              
                try:
                    bath = listing.find("span", class_="_6d9b9b83", attrs={"aria-label": "Baths"}).text.strip()
                except:
                    bath = " "
                baths.append(bath)

                
                if property_link:
                    full_url = 'https://www.zameen.com' + property_link
                    driver.execute_script("window.open('" + full_url + "', '_blank');")
                    tabs_to_open.append(driver.window_handles[-1])

           
            for tab_handle in tabs_to_open:
                driver.switch_to.window(tab_handle)
                try:

                    WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.CLASS_NAME, "d10ba6ac")))
                    agent_name = driver.find_element(By.CLASS_NAME, "d10ba6ac").text.strip()
                except:
                    agent_name = "N/A"
                agents.append(agent_name)

            
                try:
                    creation_relative_time = WebDriverWait(driver, 5).until(
                        EC.presence_of_element_located((By.XPATH, "//span[@aria-label='Creation date']"))
                    ).text.strip()

                    creation_date = convert_relative_time_to_date(creation_relative_time)
                    dates.append(creation_date.strftime('%Y-%m-%d %H:%M:%S'))  # Format the date
                except:
                    dates.append("N/A")

               
                try:
                    WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.CLASS_NAME, "_0a8efec2")))
                    company_name = driver.find_element(By.CLASS_NAME, "_0a8efec2").text.strip()
                except:
                    company_name = "N/A"
                companies.append(company_name)

          
            for tab_handle in tabs_to_open:
                driver.switch_to.window(tab_handle)       #   *Close the 4 tabs after data extraction *
                driver.close()

          
            driver.switch_to.window(driver.window_handles[0])        # Close tabs after data extraction
 
        
        for i in range(len(titles)):    # Write the data for this page to the CSV file
            writer.writerow([titles[i], prices[i], locations[i], beds[i], area[i], baths[i], agents[i], dates[i], companies[i]])

        print(f"page# {page_number} completed..,.")


driver.quit()

print("Scraping completed .......")


Scraping page: https://www.zameen.com/Flats_Apartments/Peshawar-17-1.html
page# 1 completed..,.
Scraping completed .......
