## Real Estate Scraper 

In [1]:
from selenium.common.exceptions import NoSuchElementException
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager  
import time
import csv
import pandas as pd

In [2]:
def scrape_flat_card(flat_card):
    # Extract from single card flat related data
    # Room number
    try:
        room_number = flat_card.find_element(By.XPATH,'.//span[@class="item__r-count"]').text
    except NoSuchElementException:
        room_number = "NI"      
    # region    
    try:
        region = flat_card.find_element(By.XPATH,'.//span[@class="item__location"]').text
    except NoSuchElementException:
        region = "NI"         
    # Flat area    
    try:
        area = flat_card.find_element(By.XPATH,'.//span[@class="item__yardage"]').text
    except NoSuchElementException:
        area = "NI" 
    # Flat full price    
    try:
        price = flat_card.find_element(By.XPATH,'.//div[@class="card-item-prices__current"]').text
    except NoSuchElementException:
        price = "NI"         
    # Flat price square m  
    try:
        price_sq_m = flat_card.find_element(By.XPATH,'.//div[@class="card-item-prices__metr"]').text
    except NoSuchElementException:
        price_sq_m = "NI"     
    # Floor number
    try:
        floor = flat_card.find_element(By.XPATH,'.//div[@class="item__level"]').text
    except NoSuchElementException:
        floor = "NI" 
    # Apartment layout
    try:
        layout = flat_card.find_element(By.XPATH,'.//div[@class="item__d-row th-row"]').text
    except NoSuchElementException:
        layout = "NI"   
        
    return room_number, region, area, price, price_sq_m, floor, layout

In [4]:
def flats_scraper(num_flat_cards, waiting_time, url):
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.set_window_size(1120, 1000) 
    driver.get(url)
    flats = []
    while len(flats) < num_flat_cards:
        print("Progress: {}".format("" + str(len(flats)) + "/" + str(num_flat_cards)))
        page_reload_check_start = len(flats)
        flat_cards_list = driver.find_elements(By.XPATH,'.//div[@class="col-12 col-md-6 col-lg-4 js-item-container"]')#.text
        for i in range(len(flat_cards_list)):
            print("Progress: {}".format("" + str(len(flats)) + "/" + str(num_flat_cards)))
            if len(flats) >= num_flat_cards:
                break
            flats.append(scrape_flat_card(flat_cards_list[i]))             
            if i == (len(flat_cards_list)-1):
                # Clicking on the "next page" button
                driver.find_element(By.XPATH, './/a[@class="paginator__next"]').click()
                time.sleep(waiting_time)
                
    save_data_to_csv_file(flats)   

In [5]:
def save_data_to_csv_file(records):
    # Save scraped data to csv file
    with open('Real_estate_info.csv', 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['room_number', 'region', 'area', 'price', 'price_sq_m', 'floor', 'layout'])
        writer.writerows(records)

In [6]:
url = 'https://arevera.ru/apartments/'
flats_scraper(num_flat_cards=5, waiting_time=2, url=url)

Progress: 0/5
Progress: 0/5
Progress: 1/5
Progress: 2/5
Progress: 3/5
Progress: 4/5
Progress: 5/5


In [8]:
# Open csv file with the results and displays the first five rows of the results 
df = pd.read_csv("./Real_estate_info.csv")
df.head()

Unnamed: 0,room_number,region,area,price,price_sq_m,floor,layout
0,1-комн.,"Кировский р-н, Кутузова, 1",38.60 м 2,3 474 000 руб.,90 000 руб./м2,эт. из 20,"Кировский р-н, Кутузова, 1\nПланировка: новая\..."
1,1-комн.,"Советский р-н, Шумяцкого, 7",29.70 м 2,3 340 000 руб.,112 458 руб./м2,1 эт. из 9,"Советский р-н, Шумяцкого, 7\nПланировка: новая..."
2,3-комн.,"Советский р-н, 60 лет Образования СССР, 20",64.10 м 2,6 500 000 руб.,101 404 руб./м2,2 эт. из 9,"Советский р-н, 60 лет Образования СССР, 20\nПл..."
3,3-комн.,"Свердловский р-н, Семафорная, 241",58.40 м 2,4 990 000 руб.,85 445 руб./м2,1 эт. из 5,"Свердловский р-н, Семафорная, 241\nПланировка:..."
4,3-комн.,"Свердловский р-н, Южная набережная, 10",92.30 м 2,12 400 000 руб.,134 345 руб./м2,5 эт. из 6,"Свердловский р-н, Южная набережная, 10\nПланир..."
