In [64]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import json
from selenium import webdriver
import time

In [65]:
def get_region(soup):

    try:
        region = soup.find_all("script", attrs={"type":'application/ld+json'})[2].text.strip().split(',')
        if len(region)>9:
            new_region = region[-8].replace(" ","").split()[0].split(":")[-1].strip('""')
        
    except AttributeError:

        new_region = "N/A"

    except IndexError:

        new_region = "N/A"
    
    return new_region

def get_price(soup):

    try:
        current_price = soup.find("div", attrs={"class":'mb-ldp__dtls__price'}).text.strip()
        if current_price[-1]=='0':
            cleaned_string = current_price.replace("₹", "").replace(",", "")
            price = int(cleaned_string)
        
        elif (current_price[-1])=='c':
            cleaned_string = current_price.replace("₹", "").replace(",", "")
            numeric_string = ''.join(filter(str.isdigit, cleaned_string))
            integer_value = int(numeric_string)
            price = integer_value * 10000

        else:
            price=current_price

            
    
    except AttributeError:
        price = "N/A"

    except IndexError:
        price = "N/A"


    return price

def get_type(soup):
    try:
        house_type = soup.find_all("script", attrs={"type":'application/ld+json'})[2].text.strip().split(',')
        if len(house_type) >= 12:
            new_type = (house_type[-11].replace(" ","").split()[0].split(":")[-1].strip('""'))+"BHK"


    except AttributeError:
        new_type = "N/A"

    except IndexError:
        new_type = "N/A"
	

    return new_type


def get_area(soup):
    try:
        new_area = soup.find_all("script", attrs={"type":'application/ld+json'})[2].text.strip().split(',')
        area = new_area[-1].replace(" ","").split()[0].split(":")[-1].strip('""')[:-3]

    except AttributeError:
        area = "N/A"

    except IndexError:
        area = "N/A"	

    return area

def get_lon(soup):
    try:
        longitude=soup.find_all("script", attrs={"type":'application/ld+json'})[2].text.strip().split(',')
        new_type = longitude[-4].replace(" ","").split()[0].split(":")[-1].strip('""')
    
    except AttributeError:
        new_type = "N/A"	
    
    except IndexError:
        new_type = "N/A"	

    return new_type

def get_lat(soup):
    try:
        latitude = soup.find_all("script", attrs={"type":'application/ld+json'})[2].text.strip().split(',')
        new_type = latitude[-3].replace(" ","").split()[0].split(":")[-1].strip('""')
    
    except AttributeError:
        new_type = "N/A"	
    
    except IndexError:
        new_type = "N/A"	


    return new_type

def get_locality(soup):

    try:
        loco = soup.find_all("script", attrs={"type":'application/ld+json'})[2].text.strip().split(',')
        if len(loco)>10:
            title_string=loco[-9].replace(" ","").split()[0].split(":")[-1].strip('""')

    except AttributeError:
        title_string = "N/A"

    except IndexError:
        title_string = "N/A"

    return title_string




In [66]:

if __name__ == '__main__':

    # user agent 
    HEADERS = ({'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36', 'Accept-Language': 'en-US, en;q=0.5'})

    # The webpage URL
    url='https://www.magicbricks.com/property-for-rent/residential-real-estate?bedroom=2,3&proptype=Multistorey-Apartment,Builder-Floor-Apartment,Penthouse,Studio-Apartment,Service-Apartment,Residential-House,Villa&cityName=Navi-Mumbai'

    options = webdriver.ChromeOptions()
    driver = webdriver.Chrome(options=options)
    driver.maximize_window()
    webpage = driver.get(url)
    y = 1000
    for i in range(1000):
        driver.execute_script("window.scrollTo(0, "+str(y)+")")
        y += 1000  
        time.sleep(1)
    
    page_source = driver.page_source
    
    # HTTP Request
    #webpage=requests.get(url,headers=HEADERS)

    # Soup Object containing all data
    soup = BeautifulSoup(page_source, "html.parser")

    # Fetch links as List of Tag Objects
    script_tags = soup.find_all("script", type='application/ld+json')

    # Store the links
    links = []

    # Loop for extracting links from Tag Objects
    for script in script_tags:
        json_data = json.loads(script.string)
        if 'url' in json_data:
            links.append(json_data['url'])

    driver.close()
    d = {"Locality":[], "Region":[],"longitude":[],"latitude":[],"price":[],"Apartment Type":[],"Area":[]}
    
    # Loop for extracting product details from each link 
    #for new_link in links:
    for i in range(len(links)-1):
        new_link=links[i]
        new_webpage = requests.get(new_link, headers=HEADERS)

        new_soup = BeautifulSoup(new_webpage.content, "html.parser")
        
        # Function calls to display all necessary product information
        d['Locality'].append(get_locality(new_soup))
        d['Region'].append(get_region(new_soup))
        d['longitude'].append(get_lon(new_soup))
        d['latitude'].append(get_lat(new_soup))
        d['price'].append(get_price(new_soup))
        d['Apartment Type'].append(get_type(new_soup))
        d['Area'].append(get_area(new_soup))

    
    acre_df = pd.DataFrame.from_dict(d)
    acre_df['Locality'].replace('', np.nan, inplace=True)
    acre_df = acre_df.dropna(subset=['Locality'])
    acre_df.to_csv("magic_data.csv", header=True, index=False)

In [67]:
len(links)

1501

In [68]:
print(links)

['https://www.magicbricks.com/propertyDetails/3-BHK-1850-Sq-ft-Multistorey-Apartment-FOR-Rent-Panvel-in-Navi-Mumbai&id=4d423637323234323939', 'https://www.magicbricks.com/propertyDetails/2-BHK-1284-Sq-ft-Multistorey-Apartment-FOR-Rent-Ghansoli-in-Navi-Mumbai&id=4d423638323635303537', 'https://www.magicbricks.com/propertyDetails/3-BHK-1710-Sq-ft-Multistorey-Apartment-FOR-Rent-Kharghar-in-Navi-Mumbai-r1&id=4d423533313832313233', 'https://www.magicbricks.com/propertyDetails/2-BHK-1300-Sq-ft-Multistorey-Apartment-FOR-Rent-Nerul-in-Navi-Mumbai&id=4d423638323637373139', 'https://www.magicbricks.com/propertyDetails/2-BHK-950-Sq-ft-Multistorey-Apartment-FOR-Rent-Sector-19-Kharghar-in-Navi-Mumbai&id=4d423639323637323431', 'https://www.magicbricks.com/propertyDetails/2-BHK-1135-Sq-ft-Multistorey-Apartment-FOR-Rent-Taloja-in-Navi-Mumbai&id=4d423639323533303031', 'https://www.magicbricks.com/propertyDetails/3-BHK-1530-Sq-ft-Multistorey-Apartment-FOR-Rent-Kharghar-in-Navi-Mumbai&id=4d42363836323932

In [72]:
acre_df.head(2000)

Unnamed: 0,Locality,Region,longitude,latitude,price,Apartment Type,Area
0,Panvel,NaviMumbai,73.1175162,18.9894007,30000,3BHK,1850
1,Ghansoli,NaviMumbai,72.9936439460767,19.1195451112663,50000,2BHK,1284
2,Kharghar,NaviMumbai,73.075974,19.0719517828309,35000,3BHK,1710
3,Nerul,NaviMumbai,73.0168543040566,19.0104070124285,52000,2BHK,1300
4,Sector19Kharghar,NaviMumbai,73.0762783248371,19.0501678475747,32000,2BHK,950
...,...,...,...,...,...,...,...
1495,Sector35IKharghar,NaviMumbai,73.0703141649291,19.0704024103758,24500,2BHK,965
1496,Sector35GKharghar,NaviMumbai,73.0707643075594,19.0702708557873,25000,2BHK,1060
1497,Sector10Kharghar,NaviMumbai,73.07655744,19.03569902,35000,2BHK,1385
1498,Ghansoli,NaviMumbai,73.000026,19.113057,45000,2BHK,1200
