In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [2]:
def get_locality(soup):

    try:
        # Outer Tag Object
        locality = soup.find("span", attrs={"class":'component__pdPropAddress'}).text.strip().split()
        
        # Inner NavigatableString Object
        title_local = locality[-4][0:-1]

        # Title as a string value
        title_string = title_local.strip()

    except AttributeError:
        title_string = "N/A"

    return title_string

def get_region(soup):

    try:
        region = soup.find("span", attrs={"class":'component__pdPropAddress'}).text.strip().split()
        new_region=(region[-3]+' '+region[-2]).replace(',', '')
        
    except AttributeError:

        new_region = "N/A"

    return new_region

def get_price(soup):

    try:
        price = soup.find("span", attrs={"class":'component__pdPropValue'}).text.strip()
    
    except AttributeError:

        price = "N/A"	

    return price

def get_type(soup):
    try:
        house_type = soup.find("span", attrs={"class":'component__pdPropDetail2Heading'}).text.strip()
        new_type = house_type[0:4]
    except AttributeError:
        new_type = "N/A"	

    return new_type


def get_area(soup):
    try:
        area = soup.find("span", attrs={"id":'carpetArea_span'}).text

    except AttributeError:
        area = "N/A"	

    return area



In [3]:

if __name__ == '__main__':

    # user agent 
    HEADERS = ({'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36', 'Accept-Language': 'en-US, en;q=0.5'})

    # The webpage URL
    url='https://www.99acres.com/search/property/rent/navi-mumbai?city=15&preference=R&area_unit=1&res_com=R'

    # HTTP Request
    webpage=requests.get(url,headers=HEADERS)

    # Soup Object containing all data
    soup = BeautifulSoup(webpage.content, "html.parser")

    # Fetch links as List of Tag Objects
    links = soup.find_all("a", attrs={'body_med srpTuple__propertyName'})

    # Store the links
    links_list = []

    # Loop for extracting links from Tag Objects
    for link in links:
            links_list.append(link.get('href'))

    d = {"Locality":[], "Region":[],"price":[],"Apartment Type":[],"Area":[]}
    
    # Loop for extracting product details from each link 
    for link in links_list:
        new_webpage = requests.get(link, headers=HEADERS)

        new_soup = BeautifulSoup(new_webpage.content, "html.parser")

        # Function calls to display all necessary product information
        d['Locality'].append(get_locality(new_soup))
        d['Region'].append(get_region(new_soup))
        d['price'].append(get_price(new_soup))
        d['Apartment Type'].append(get_type(new_soup))
        d['Area'].append(get_area(new_soup))

    
    acre_df = pd.DataFrame.from_dict(d)
    acre_df['Locality'].replace('', np.nan, inplace=True)
    acre_df = acre_df.dropna(subset=['Locality'])
    acre_df.to_csv("acre_data.csv", header=True, index=False)


In [4]:
acre_df.head(50)

Unnamed: 0,Locality,Region,price,Apartment Type,Area
0,Kharghar,Navi Mumbai,16000,1BHK,
1,Kharghar,Navi Mumbai,20000,1BHK,342.0
2,Vashi,Navi Mumbai,61000,3BHK,1100.0
3,Kharghar,Navi Mumbai,56500,3BHK,1250.0
4,Vashi,Navi Mumbai,80000,3BHK,1000.0
5,Koperkhairane,Navi Mumbai,40000,2BHK,
6,Seawoods,Navi Mumbai,65000,2BHK,900.0
7,Sanpada,Navi Mumbai,55000,2BHK,1200.0
8,Ghansoli,Navi Mumbai,52222,2BHK,638.0
9,Panvel,Navi Mumbai,15000,2BHK,453.0


{'Locality': ['Kharghar',
  'Kharghar',
  'Vashi',
  'Kharghar',
  'Vashi',
  'Koperkhairane',
  'Seawoods',
  'Sanpada',
  'Ghansoli',
  'Panvel',
  'Nevali',
  'Panvel',
  'Panvel',
  'Vashi',
  'Kamothe',
  'Panvel',
  'Panvel',
  'Ghansoli',
  'Vashi',
  'Vashi',
  'Koperkhairane',
  'Khairane',
  'Ghansoli',
  'Ghansoli',
  'Kharghar',
  'Airoli',
  'Beach'],
 'Region': ['Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai',
  'Navi Mumbai'],
 'price': ['16,000',
  '20,000',
  '61,000',
  '56,500',
  '80,000',
  '40,000',
  '65,000',
  '55,000',
  '52,222',
  '15,000',
  '7000',
  '32,000',
  '25,500',
